diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,110338 +1,27597 @@ { - "best_metric": 0.18828971683979034, - "best_model_checkpoint": "./results_t5_mixed_wiki_cv_arhiv/checkpoint-39395", - "epoch": 2.0, + "best_metric": 0.160739004611969, + "best_model_checkpoint": "./results_t5_mixed_wiki_cv_arhiv_large/checkpoint-19699", + "epoch": 1.0, "eval_steps": 500, - "global_step": 78790, + "global_step": 19699, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 0.00012691965985531158, - "grad_norm": 6761.2021484375, - "learning_rate": 1.99991538689343e-05, - "loss": 16.6735, + "epoch": 0.00025381999086248035, + "grad_norm": 28009.150390625, + "learning_rate": 1.9998307866727585e-05, + "loss": 16.9957, "step": 5 }, { - "epoch": 0.00025383931971062316, - "grad_norm": 4428.99755859375, - "learning_rate": 1.99983077378686e-05, - "loss": 15.8445, + "epoch": 0.0005076399817249607, + "grad_norm": 8431.59375, + "learning_rate": 1.999661573345517e-05, + "loss": 15.242, "step": 10 }, { - "epoch": 0.00038075897956593474, - "grad_norm": 9961.09375, - "learning_rate": 1.9997461606802893e-05, - "loss": 16.5581, + "epoch": 0.000761459972587441, + "grad_norm": 13960.4951171875, + "learning_rate": 1.9994923600182752e-05, + "loss": 14.3643, "step": 15 }, { - "epoch": 0.0005076786394212463, - "grad_norm": 5654.59765625, - "learning_rate": 1.9996615475737192e-05, - "loss": 15.3784, + "epoch": 0.0010152799634499214, + "grad_norm": 9187.435546875, + "learning_rate": 1.9993231466910336e-05, + "loss": 13.9964, "step": 20 }, { - "epoch": 0.000634598299276558, - "grad_norm": 10435.923828125, - "learning_rate": 1.999576934467149e-05, - "loss": 14.1682, + "epoch": 0.0012690999543124015, + "grad_norm": 11192.2958984375, + "learning_rate": 1.999153933363792e-05, + "loss": 13.0181, "step": 25 }, { - "epoch": 0.0007615179591318695, - "grad_norm": 7558.43115234375, - "learning_rate": 1.999492321360579e-05, - "loss": 14.5603, + "epoch": 0.001522919945174882, + "grad_norm": 10066.05859375, + "learning_rate": 1.9989847200365503e-05, + "loss": 12.9025, "step": 30 }, { - "epoch": 0.0008884376189871811, - "grad_norm": 3485.970703125, - "learning_rate": 1.9994077082540087e-05, - "loss": 12.8713, + "epoch": 0.0017767399360373623, + "grad_norm": 7277.57373046875, + "learning_rate": 1.9988155067093086e-05, + "loss": 12.5301, "step": 35 }, { - "epoch": 0.0010153572788424927, - "grad_norm": 14000.3935546875, - "learning_rate": 1.9993230951474385e-05, - "loss": 12.8411, + "epoch": 0.002030559926899843, + "grad_norm": 157497.796875, + "learning_rate": 1.998646293382067e-05, + "loss": 11.712, "step": 40 }, { - "epoch": 0.0011422769386978044, - "grad_norm": 14125.9423828125, - "learning_rate": 1.9992384820408684e-05, - "loss": 11.8284, + "epoch": 0.0022843799177623227, + "grad_norm": 2509.368408203125, + "learning_rate": 1.9984770800548254e-05, + "loss": 11.4352, "step": 45 }, { - "epoch": 0.001269196598553116, - "grad_norm": 11657.3115234375, - "learning_rate": 1.9991538689342982e-05, - "loss": 12.2312, + "epoch": 0.002538199908624803, + "grad_norm": 9022.3505859375, + "learning_rate": 1.9983078667275837e-05, + "loss": 10.8293, "step": 50 }, { - "epoch": 0.0013961162584084274, - "grad_norm": 4395.9443359375, - "learning_rate": 1.9990692558277277e-05, - "loss": 11.9069, + "epoch": 0.0027920198994872835, + "grad_norm": 2620.181640625, + "learning_rate": 1.998138653400342e-05, + "loss": 10.5957, "step": 55 }, { - "epoch": 0.001523035918263739, - "grad_norm": 6860.7373046875, - "learning_rate": 1.9989846427211576e-05, - "loss": 12.1422, + "epoch": 0.003045839890349764, + "grad_norm": 4354.4189453125, + "learning_rate": 1.9979694400731e-05, + "loss": 10.201, "step": 60 }, { - "epoch": 0.0016499555781190507, - "grad_norm": 1533.4132080078125, - "learning_rate": 1.9989000296145874e-05, - "loss": 11.2057, + "epoch": 0.003299659881212244, + "grad_norm": 2636.816162109375, + "learning_rate": 1.9978002267458588e-05, + "loss": 9.8294, "step": 65 }, { - "epoch": 0.0017768752379743622, - "grad_norm": 25685.158203125, - "learning_rate": 1.9988154165080172e-05, - "loss": 11.0934, + "epoch": 0.0035534798720747245, + "grad_norm": 2717.7568359375, + "learning_rate": 1.997631013418617e-05, + "loss": 9.7655, "step": 70 }, { - "epoch": 0.0019037948978296738, - "grad_norm": 1331.36279296875, - "learning_rate": 1.998730803401447e-05, - "loss": 10.5316, + "epoch": 0.003807299862937205, + "grad_norm": 1577.282958984375, + "learning_rate": 1.9974618000913755e-05, + "loss": 9.2661, "step": 75 }, { - "epoch": 0.0020307145576849853, - "grad_norm": 1879.588623046875, - "learning_rate": 1.998646190294877e-05, - "loss": 9.8452, + "epoch": 0.004061119853799686, + "grad_norm": 1305.2647705078125, + "learning_rate": 1.997292586764134e-05, + "loss": 8.5601, "step": 80 }, { - "epoch": 0.002157634217540297, - "grad_norm": 1030.24072265625, - "learning_rate": 1.9985615771883068e-05, - "loss": 9.2661, + "epoch": 0.004314939844662166, + "grad_norm": 1029.8795166015625, + "learning_rate": 1.997123373436892e-05, + "loss": 8.3479, "step": 85 }, { - "epoch": 0.002284553877395609, - "grad_norm": 1018.1094970703125, - "learning_rate": 1.9984769640817366e-05, - "loss": 9.2595, + "epoch": 0.0045687598355246455, + "grad_norm": 488.6922302246094, + "learning_rate": 1.9969541601096505e-05, + "loss": 7.9367, "step": 90 }, { - "epoch": 0.0024114735372509203, - "grad_norm": 1145.442138671875, - "learning_rate": 1.998392350975166e-05, - "loss": 8.8839, + "epoch": 0.004822579826387126, + "grad_norm": 1308.7169189453125, + "learning_rate": 1.996784946782409e-05, + "loss": 7.6314, "step": 95 }, { - "epoch": 0.002538393197106232, - "grad_norm": 887.9046630859375, - "learning_rate": 1.998307737868596e-05, - "loss": 8.1254, + "epoch": 0.005076399817249606, + "grad_norm": 1560.9044189453125, + "learning_rate": 1.996615733455167e-05, + "loss": 7.2683, "step": 100 }, { - "epoch": 0.0026653128569615434, - "grad_norm": 3415.627685546875, - "learning_rate": 1.9982231247620258e-05, - "loss": 8.0569, + "epoch": 0.0053302198081120866, + "grad_norm": 1124.25390625, + "learning_rate": 1.9964465201279256e-05, + "loss": 6.7153, "step": 105 }, { - "epoch": 0.002792232516816855, - "grad_norm": 5088.13525390625, - "learning_rate": 1.9981385116554556e-05, - "loss": 7.806, + "epoch": 0.005584039798974567, + "grad_norm": 1180.7884521484375, + "learning_rate": 1.9962773068006836e-05, + "loss": 6.4866, "step": 110 }, { - "epoch": 0.0029191521766721664, - "grad_norm": 596.0669555664062, - "learning_rate": 1.9980538985488855e-05, - "loss": 7.4547, + "epoch": 0.005837859789837047, + "grad_norm": 156.94796752929688, + "learning_rate": 1.996108093473442e-05, + "loss": 5.8496, "step": 115 }, { - "epoch": 0.003046071836527478, - "grad_norm": 259.159912109375, - "learning_rate": 1.9979692854423153e-05, - "loss": 7.2016, + "epoch": 0.006091679780699528, + "grad_norm": 256.8822326660156, + "learning_rate": 1.9959388801462007e-05, + "loss": 5.4725, "step": 120 }, { - "epoch": 0.0031729914963827895, - "grad_norm": 640.3233032226562, - "learning_rate": 1.997884672335745e-05, - "loss": 7.0556, + "epoch": 0.006345499771562008, + "grad_norm": 242.91868591308594, + "learning_rate": 1.9957696668189587e-05, + "loss": 5.0473, "step": 125 }, { - "epoch": 0.0032999111562381014, - "grad_norm": 302.8347473144531, - "learning_rate": 1.997800059229175e-05, - "loss": 7.1921, + "epoch": 0.006599319762424488, + "grad_norm": 140.21670532226562, + "learning_rate": 1.9956004534917174e-05, + "loss": 4.739, "step": 130 }, { - "epoch": 0.003426830816093413, - "grad_norm": 842.9828491210938, - "learning_rate": 1.9977154461226045e-05, - "loss": 6.3183, + "epoch": 0.006853139753286969, + "grad_norm": 2685.30712890625, + "learning_rate": 1.9954312401644754e-05, + "loss": 4.309, "step": 135 }, { - "epoch": 0.0035537504759487245, - "grad_norm": 401.562255859375, - "learning_rate": 1.9976308330160343e-05, - "loss": 5.9496, + "epoch": 0.007106959744149449, + "grad_norm": 51.844295501708984, + "learning_rate": 1.9952620268372337e-05, + "loss": 4.074, "step": 140 }, { - "epoch": 0.003680670135804036, - "grad_norm": 261.024658203125, - "learning_rate": 1.9975462199094642e-05, - "loss": 5.5896, + "epoch": 0.007360779735011929, + "grad_norm": 15.273852348327637, + "learning_rate": 1.9950928135099924e-05, + "loss": 3.4298, "step": 145 }, { - "epoch": 0.0038075897956593475, - "grad_norm": 97.8665542602539, - "learning_rate": 1.997461606802894e-05, - "loss": 5.5108, + "epoch": 0.00761459972587441, + "grad_norm": 17.046186447143555, + "learning_rate": 1.9949236001827505e-05, + "loss": 3.1516, "step": 150 }, { - "epoch": 0.003934509455514659, - "grad_norm": 291.0576477050781, - "learning_rate": 1.9973769936963235e-05, - "loss": 4.9655, + "epoch": 0.00786841971673689, + "grad_norm": 13.263907432556152, + "learning_rate": 1.9947543868555088e-05, + "loss": 2.6151, "step": 155 }, { - "epoch": 0.004061429115369971, - "grad_norm": 81.0230712890625, - "learning_rate": 1.9972923805897534e-05, - "loss": 4.4269, + "epoch": 0.008122239707599371, + "grad_norm": 17.467811584472656, + "learning_rate": 1.994585173528267e-05, + "loss": 2.4349, "step": 160 }, { - "epoch": 0.004188348775225282, - "grad_norm": 87.64791870117188, - "learning_rate": 1.9972077674831832e-05, - "loss": 4.1874, + "epoch": 0.00837605969846185, + "grad_norm": 26.78594398498535, + "learning_rate": 1.9944159602010255e-05, + "loss": 2.1512, "step": 165 }, { - "epoch": 0.004315268435080594, - "grad_norm": 85.92936706542969, - "learning_rate": 1.997123154376613e-05, - "loss": 3.7181, + "epoch": 0.008629879689324332, + "grad_norm": 7.074516296386719, + "learning_rate": 1.994246746873784e-05, + "loss": 1.9379, "step": 170 }, { - "epoch": 0.004442188094935905, - "grad_norm": 74.9645767211914, - "learning_rate": 1.997038541270043e-05, - "loss": 3.2789, + "epoch": 0.008883699680186811, + "grad_norm": 6.19895601272583, + "learning_rate": 1.9940775335465422e-05, + "loss": 1.7137, "step": 175 }, { - "epoch": 0.004569107754791218, - "grad_norm": 24.999765396118164, - "learning_rate": 1.9969539281634727e-05, - "loss": 2.9509, + "epoch": 0.009137519671049291, + "grad_norm": 7.024235725402832, + "learning_rate": 1.9939083202193006e-05, + "loss": 1.6583, "step": 180 }, { - "epoch": 0.004696027414646529, - "grad_norm": 15.212715148925781, - "learning_rate": 1.9968693150569026e-05, - "loss": 2.7421, + "epoch": 0.009391339661911772, + "grad_norm": 5.285205841064453, + "learning_rate": 1.993739106892059e-05, + "loss": 1.5457, "step": 185 }, { - "epoch": 0.004822947074501841, - "grad_norm": 13.61313247680664, - "learning_rate": 1.9967847019503324e-05, - "loss": 2.6134, + "epoch": 0.009645159652774252, + "grad_norm": 4.7672600746154785, + "learning_rate": 1.9935698935648173e-05, + "loss": 1.423, "step": 190 }, { - "epoch": 0.004949866734357152, - "grad_norm": 13.876510620117188, - "learning_rate": 1.996700088843762e-05, - "loss": 2.3264, + "epoch": 0.009898979643636733, + "grad_norm": 4.843871116638184, + "learning_rate": 1.9934006802375756e-05, + "loss": 1.4004, "step": 195 }, { - "epoch": 0.005076786394212464, - "grad_norm": 9.046316146850586, - "learning_rate": 1.9966154757371917e-05, - "loss": 2.0937, + "epoch": 0.010152799634499212, + "grad_norm": 3.7040324211120605, + "learning_rate": 1.993231466910334e-05, + "loss": 1.3773, "step": 200 }, { - "epoch": 0.005203706054067775, - "grad_norm": 12.224512100219727, - "learning_rate": 1.9965308626306216e-05, - "loss": 2.0293, + "epoch": 0.010406619625361694, + "grad_norm": 3.55208420753479, + "learning_rate": 1.9930622535830924e-05, + "loss": 1.287, "step": 205 }, { - "epoch": 0.005330625713923087, - "grad_norm": 10.402899742126465, - "learning_rate": 1.9964462495240514e-05, - "loss": 1.7867, + "epoch": 0.010660439616224173, + "grad_norm": 4.512004852294922, + "learning_rate": 1.9928930402558507e-05, + "loss": 1.215, "step": 210 }, { - "epoch": 0.005457545373778398, - "grad_norm": 9.543730735778809, - "learning_rate": 1.9963616364174813e-05, - "loss": 1.8078, + "epoch": 0.010914259607086654, + "grad_norm": 3.2358570098876953, + "learning_rate": 1.992723826928609e-05, + "loss": 1.1807, "step": 215 }, { - "epoch": 0.00558446503363371, - "grad_norm": 8.407549858093262, - "learning_rate": 1.996277023310911e-05, - "loss": 1.655, + "epoch": 0.011168079597949134, + "grad_norm": 2.9898245334625244, + "learning_rate": 1.9925546136013674e-05, + "loss": 1.1454, "step": 220 }, { - "epoch": 0.005711384693489021, - "grad_norm": 7.521920680999756, - "learning_rate": 1.996192410204341e-05, - "loss": 1.5714, + "epoch": 0.011421899588811615, + "grad_norm": 3.377270221710205, + "learning_rate": 1.9923854002741258e-05, + "loss": 1.1211, "step": 225 }, { - "epoch": 0.005838304353344333, - "grad_norm": 7.959629535675049, - "learning_rate": 1.9961077970977708e-05, - "loss": 1.5174, + "epoch": 0.011675719579674095, + "grad_norm": 2.5435738563537598, + "learning_rate": 1.992216186946884e-05, + "loss": 1.0875, "step": 230 }, { - "epoch": 0.005965224013199644, - "grad_norm": 8.119318008422852, - "learning_rate": 1.9960231839912003e-05, - "loss": 1.4121, + "epoch": 0.011929539570536576, + "grad_norm": 2.505054473876953, + "learning_rate": 1.9920469736196425e-05, + "loss": 1.0262, "step": 235 }, { - "epoch": 0.006092143673054956, - "grad_norm": 5.927316188812256, - "learning_rate": 1.99593857088463e-05, - "loss": 1.3684, + "epoch": 0.012183359561399055, + "grad_norm": 2.3626339435577393, + "learning_rate": 1.991877760292401e-05, + "loss": 0.971, "step": 240 }, { - "epoch": 0.006219063332910267, - "grad_norm": 6.33473014831543, - "learning_rate": 1.99585395777806e-05, - "loss": 1.333, + "epoch": 0.012437179552261536, + "grad_norm": 2.3419456481933594, + "learning_rate": 1.9917085469651592e-05, + "loss": 0.9605, "step": 245 }, { - "epoch": 0.006345982992765579, - "grad_norm": 10.345564842224121, - "learning_rate": 1.9957693446714898e-05, - "loss": 1.3117, + "epoch": 0.012690999543124016, + "grad_norm": 2.3525550365448, + "learning_rate": 1.9915393336379175e-05, + "loss": 0.9515, "step": 250 }, { - "epoch": 0.006472902652620891, - "grad_norm": 12.649621963500977, - "learning_rate": 1.9956847315649196e-05, - "loss": 1.3655, + "epoch": 0.012944819533986497, + "grad_norm": 2.0995123386383057, + "learning_rate": 1.991370120310676e-05, + "loss": 0.9169, "step": 255 }, { - "epoch": 0.006599822312476203, - "grad_norm": 7.488447189331055, - "learning_rate": 1.9956001184583495e-05, - "loss": 1.2725, + "epoch": 0.013198639524848977, + "grad_norm": 2.3146281242370605, + "learning_rate": 1.9912009069834343e-05, + "loss": 0.9073, "step": 260 }, { - "epoch": 0.006726741972331514, - "grad_norm": 33.15998840332031, - "learning_rate": 1.9955155053517793e-05, - "loss": 1.2092, + "epoch": 0.013452459515711458, + "grad_norm": 2.5654919147491455, + "learning_rate": 1.9910316936561923e-05, + "loss": 0.8649, "step": 265 }, { - "epoch": 0.006853661632186826, - "grad_norm": 5.366878509521484, - "learning_rate": 1.995430892245209e-05, - "loss": 1.3167, + "epoch": 0.013706279506573937, + "grad_norm": 2.566847562789917, + "learning_rate": 1.990862480328951e-05, + "loss": 0.8795, "step": 270 }, { - "epoch": 0.0069805812920421375, - "grad_norm": 8.073542594909668, - "learning_rate": 1.9953462791386387e-05, - "loss": 1.139, + "epoch": 0.013960099497436419, + "grad_norm": 2.0294430255889893, + "learning_rate": 1.9906932670017093e-05, + "loss": 0.8524, "step": 275 }, { - "epoch": 0.007107500951897449, - "grad_norm": 21.493202209472656, - "learning_rate": 1.9952616660320685e-05, - "loss": 1.1516, + "epoch": 0.014213919488298898, + "grad_norm": 1.80044424533844, + "learning_rate": 1.9905240536744677e-05, + "loss": 0.8287, "step": 280 }, { - "epoch": 0.0072344206117527605, - "grad_norm": 25.653160095214844, - "learning_rate": 1.9951770529254983e-05, - "loss": 1.1351, + "epoch": 0.01446773947916138, + "grad_norm": 3.0150132179260254, + "learning_rate": 1.990354840347226e-05, + "loss": 0.7852, "step": 285 }, { - "epoch": 0.007361340271608072, - "grad_norm": 3.9601004123687744, - "learning_rate": 1.9950924398189282e-05, - "loss": 1.0773, + "epoch": 0.014721559470023859, + "grad_norm": 1.9330862760543823, + "learning_rate": 1.990185627019984e-05, + "loss": 0.7896, "step": 290 }, { - "epoch": 0.007488259931463384, - "grad_norm": 6.897655487060547, - "learning_rate": 1.9950078267123577e-05, - "loss": 1.0343, + "epoch": 0.01497537946088634, + "grad_norm": 1.4856704473495483, + "learning_rate": 1.9900164136927427e-05, + "loss": 0.7634, "step": 295 }, { - "epoch": 0.007615179591318695, - "grad_norm": 4.075333595275879, - "learning_rate": 1.9949232136057875e-05, - "loss": 1.1424, + "epoch": 0.01522919945174882, + "grad_norm": 2.0876917839050293, + "learning_rate": 1.989847200365501e-05, + "loss": 0.8272, "step": 300 }, { - "epoch": 0.007742099251174007, - "grad_norm": 4.745382308959961, - "learning_rate": 1.9948386004992174e-05, - "loss": 1.1066, + "epoch": 0.0154830194426113, + "grad_norm": 1.916139006614685, + "learning_rate": 1.989677987038259e-05, + "loss": 0.7789, "step": 305 }, { - "epoch": 0.007869018911029318, - "grad_norm": 11.768449783325195, - "learning_rate": 1.9947539873926472e-05, - "loss": 1.096, + "epoch": 0.01573683943347378, + "grad_norm": 1.5130165815353394, + "learning_rate": 1.9895087737110178e-05, + "loss": 0.7559, "step": 310 }, { - "epoch": 0.00799593857088463, - "grad_norm": 4.359046936035156, - "learning_rate": 1.994669374286077e-05, - "loss": 1.0069, + "epoch": 0.01599065942433626, + "grad_norm": 1.9628571271896362, + "learning_rate": 1.9893395603837758e-05, + "loss": 0.7496, "step": 315 }, { - "epoch": 0.008122858230739941, - "grad_norm": 4.613051891326904, - "learning_rate": 1.994584761179507e-05, - "loss": 1.0464, + "epoch": 0.016244479415198743, + "grad_norm": 2.4967174530029297, + "learning_rate": 1.9891703470565345e-05, + "loss": 0.7925, "step": 320 }, { - "epoch": 0.008249777890595254, - "grad_norm": 3.9694900512695312, - "learning_rate": 1.9945001480729367e-05, - "loss": 1.1703, + "epoch": 0.01649829940606122, + "grad_norm": 1.711539626121521, + "learning_rate": 1.989001133729293e-05, + "loss": 0.7409, "step": 325 }, { - "epoch": 0.008376697550450564, - "grad_norm": 3.9402709007263184, - "learning_rate": 1.9944155349663666e-05, - "loss": 1.0488, + "epoch": 0.0167521193969237, + "grad_norm": 1.7477493286132812, + "learning_rate": 1.988831920402051e-05, + "loss": 0.7136, "step": 330 }, { - "epoch": 0.008503617210305877, - "grad_norm": 4.778536796569824, - "learning_rate": 1.994330921859796e-05, - "loss": 0.9748, + "epoch": 0.017005939387786183, + "grad_norm": 4.01808500289917, + "learning_rate": 1.9886627070748096e-05, + "loss": 0.7446, "step": 335 }, { - "epoch": 0.008630536870161187, - "grad_norm": 3.6239206790924072, - "learning_rate": 1.994246308753226e-05, - "loss": 0.9361, + "epoch": 0.017259759378648664, + "grad_norm": 2.3399136066436768, + "learning_rate": 1.9884934937475676e-05, + "loss": 0.8573, "step": 340 }, { - "epoch": 0.0087574565300165, - "grad_norm": 8.929752349853516, - "learning_rate": 1.9941616956466557e-05, - "loss": 0.9252, + "epoch": 0.017513579369511142, + "grad_norm": 1.5367169380187988, + "learning_rate": 1.988324280420326e-05, + "loss": 0.6873, "step": 345 }, { - "epoch": 0.00888437618987181, - "grad_norm": 55.02832794189453, - "learning_rate": 1.9940770825400856e-05, - "loss": 0.9501, + "epoch": 0.017767399360373623, + "grad_norm": 3.4695355892181396, + "learning_rate": 1.9881550670930846e-05, + "loss": 0.7075, "step": 350 }, { - "epoch": 0.009011295849727123, - "grad_norm": 9.907842636108398, - "learning_rate": 1.9939924694335154e-05, - "loss": 1.0011, + "epoch": 0.018021219351236104, + "grad_norm": 1.5102581977844238, + "learning_rate": 1.9879858537658427e-05, + "loss": 0.6935, "step": 355 }, { - "epoch": 0.009138215509582435, - "grad_norm": 22.419662475585938, - "learning_rate": 1.9939078563269453e-05, - "loss": 0.993, + "epoch": 0.018275039342098582, + "grad_norm": 1.7649434804916382, + "learning_rate": 1.987816640438601e-05, + "loss": 0.6854, "step": 360 }, { - "epoch": 0.009265135169437746, - "grad_norm": 3.89047908782959, - "learning_rate": 1.993823243220375e-05, - "loss": 0.9965, + "epoch": 0.018528859332961063, + "grad_norm": 1.312583088874817, + "learning_rate": 1.9876474271113594e-05, + "loss": 0.796, "step": 365 }, { - "epoch": 0.009392054829293058, - "grad_norm": 7.672394275665283, - "learning_rate": 1.993738630113805e-05, - "loss": 0.9103, + "epoch": 0.018782679323823544, + "grad_norm": 1.6248362064361572, + "learning_rate": 1.9874782137841177e-05, + "loss": 0.6442, "step": 370 }, { - "epoch": 0.009518974489148369, - "grad_norm": 13.775672912597656, - "learning_rate": 1.9936540170072345e-05, - "loss": 0.9426, + "epoch": 0.019036499314686026, + "grad_norm": 1.6744344234466553, + "learning_rate": 1.9873090004568764e-05, + "loss": 0.6433, "step": 375 }, { - "epoch": 0.009645894149003681, - "grad_norm": 3.829921245574951, - "learning_rate": 1.9935694039006643e-05, - "loss": 0.9738, + "epoch": 0.019290319305548503, + "grad_norm": 1.5160279273986816, + "learning_rate": 1.9871397871296344e-05, + "loss": 0.6437, "step": 380 }, { - "epoch": 0.009772813808858992, - "grad_norm": 3.3131747245788574, - "learning_rate": 1.993484790794094e-05, - "loss": 0.8473, + "epoch": 0.019544139296410985, + "grad_norm": 1.3933593034744263, + "learning_rate": 1.9869705738023928e-05, + "loss": 0.6469, "step": 385 }, { - "epoch": 0.009899733468714304, - "grad_norm": 4.341257095336914, - "learning_rate": 1.993400177687524e-05, - "loss": 0.9637, + "epoch": 0.019797959287273466, + "grad_norm": 1.2953556776046753, + "learning_rate": 1.986801360475151e-05, + "loss": 0.6602, "step": 390 }, { - "epoch": 0.010026653128569615, - "grad_norm": 5.468863487243652, - "learning_rate": 1.9933155645809538e-05, - "loss": 0.8491, + "epoch": 0.020051779278135947, + "grad_norm": 1.387939214706421, + "learning_rate": 1.9866321471479095e-05, + "loss": 0.6201, "step": 395 }, { - "epoch": 0.010153572788424927, - "grad_norm": 12.207942962646484, - "learning_rate": 1.9932309514743837e-05, - "loss": 0.9425, + "epoch": 0.020305599268998425, + "grad_norm": 1.3470954895019531, + "learning_rate": 1.986462933820668e-05, + "loss": 0.6238, "step": 400 }, { - "epoch": 0.010280492448280238, - "grad_norm": 3.5878093242645264, - "learning_rate": 1.9931463383678135e-05, - "loss": 0.9482, + "epoch": 0.020559419259860906, + "grad_norm": 1.760582447052002, + "learning_rate": 1.9862937204934262e-05, + "loss": 0.6314, "step": 405 }, { - "epoch": 0.01040741210813555, - "grad_norm": 8.035053253173828, - "learning_rate": 1.9930617252612433e-05, - "loss": 0.8992, + "epoch": 0.020813239250723387, + "grad_norm": 1.6875195503234863, + "learning_rate": 1.9861245071661846e-05, + "loss": 0.6141, "step": 410 }, { - "epoch": 0.010534331767990861, - "grad_norm": 3.6767702102661133, - "learning_rate": 1.992977112154673e-05, - "loss": 0.8624, + "epoch": 0.02106705924158587, + "grad_norm": 1.4659909009933472, + "learning_rate": 1.985955293838943e-05, + "loss": 0.6416, "step": 415 }, { - "epoch": 0.010661251427846173, - "grad_norm": 11.812692642211914, - "learning_rate": 1.9928924990481027e-05, - "loss": 0.8126, + "epoch": 0.021320879232448346, + "grad_norm": 1.3127461671829224, + "learning_rate": 1.9857860805117013e-05, + "loss": 0.5959, "step": 420 }, { - "epoch": 0.010788171087701484, - "grad_norm": 3.734149694442749, - "learning_rate": 1.9928078859415325e-05, - "loss": 0.8651, + "epoch": 0.021574699223310827, + "grad_norm": 1.2059125900268555, + "learning_rate": 1.9856168671844596e-05, + "loss": 0.6423, "step": 425 }, { - "epoch": 0.010915090747556797, - "grad_norm": 2.6177423000335693, - "learning_rate": 1.9927232728349624e-05, - "loss": 0.794, + "epoch": 0.02182851921417331, + "grad_norm": 1.4507384300231934, + "learning_rate": 1.985447653857218e-05, + "loss": 0.619, "step": 430 }, { - "epoch": 0.011042010407412109, - "grad_norm": 8.637187004089355, - "learning_rate": 1.992638659728392e-05, - "loss": 0.8399, + "epoch": 0.02208233920503579, + "grad_norm": 1.2196464538574219, + "learning_rate": 1.9852784405299763e-05, + "loss": 0.6224, "step": 435 }, { - "epoch": 0.01116893006726742, - "grad_norm": 2.3038134574890137, - "learning_rate": 1.9925540466218217e-05, - "loss": 0.8192, + "epoch": 0.022336159195898268, + "grad_norm": 1.8387898206710815, + "learning_rate": 1.9851092272027347e-05, + "loss": 0.5911, "step": 440 }, { - "epoch": 0.011295849727122732, - "grad_norm": 5.3193888664245605, - "learning_rate": 1.9924694335152515e-05, - "loss": 0.825, + "epoch": 0.02258997918676075, + "grad_norm": 0.9832326769828796, + "learning_rate": 1.984940013875493e-05, + "loss": 0.5794, "step": 445 }, { - "epoch": 0.011422769386978043, - "grad_norm": 3.0405609607696533, - "learning_rate": 1.9923848204086814e-05, - "loss": 0.7812, + "epoch": 0.02284379917762323, + "grad_norm": 1.2416331768035889, + "learning_rate": 1.9847708005482514e-05, + "loss": 0.5693, "step": 450 }, { - "epoch": 0.011549689046833355, - "grad_norm": 4.860930919647217, - "learning_rate": 1.9923002073021112e-05, - "loss": 0.7578, + "epoch": 0.02309761916848571, + "grad_norm": 1.0847058296203613, + "learning_rate": 1.9846015872210097e-05, + "loss": 0.5366, "step": 455 }, { - "epoch": 0.011676608706688666, - "grad_norm": 2.7513668537139893, - "learning_rate": 1.992215594195541e-05, - "loss": 0.7241, + "epoch": 0.02335143915934819, + "grad_norm": 1.1567232608795166, + "learning_rate": 1.984432373893768e-05, + "loss": 0.5673, "step": 460 }, { - "epoch": 0.011803528366543978, - "grad_norm": 16.845439910888672, - "learning_rate": 1.992130981088971e-05, - "loss": 0.7192, + "epoch": 0.02360525915021067, + "grad_norm": 1.3223860263824463, + "learning_rate": 1.9842631605665265e-05, + "loss": 0.5835, "step": 465 }, { - "epoch": 0.011930448026399289, - "grad_norm": 3.0264551639556885, - "learning_rate": 1.9920463679824007e-05, - "loss": 0.7735, + "epoch": 0.02385907914107315, + "grad_norm": 1.5882163047790527, + "learning_rate": 1.9840939472392848e-05, + "loss": 0.5227, "step": 470 }, { - "epoch": 0.012057367686254601, - "grad_norm": 2.466160535812378, - "learning_rate": 1.9919617548758302e-05, - "loss": 0.7501, + "epoch": 0.024112899131935633, + "grad_norm": 2.358612537384033, + "learning_rate": 1.983924733912043e-05, + "loss": 0.5583, "step": 475 }, { - "epoch": 0.012184287346109912, - "grad_norm": 1.97993004322052, - "learning_rate": 1.99187714176926e-05, - "loss": 0.7444, + "epoch": 0.02436671912279811, + "grad_norm": 1.134081244468689, + "learning_rate": 1.9837555205848015e-05, + "loss": 0.5728, "step": 480 }, { - "epoch": 0.012311207005965224, - "grad_norm": 2.8091845512390137, - "learning_rate": 1.99179252866269e-05, - "loss": 0.6785, + "epoch": 0.02462053911366059, + "grad_norm": 1.3474904298782349, + "learning_rate": 1.98358630725756e-05, + "loss": 0.5207, "step": 485 }, { - "epoch": 0.012438126665820535, - "grad_norm": 1.9675675630569458, - "learning_rate": 1.9917079155561198e-05, - "loss": 0.7171, + "epoch": 0.024874359104523073, + "grad_norm": 0.914055585861206, + "learning_rate": 1.9834170939303182e-05, + "loss": 0.5529, "step": 490 }, { - "epoch": 0.012565046325675847, - "grad_norm": 3.2653939723968506, - "learning_rate": 1.9916233024495496e-05, - "loss": 0.782, + "epoch": 0.025128179095385554, + "grad_norm": 1.1716861724853516, + "learning_rate": 1.9832478806030762e-05, + "loss": 0.5516, "step": 495 }, { - "epoch": 0.012691965985531158, - "grad_norm": 2.545034646987915, - "learning_rate": 1.9915386893429794e-05, - "loss": 0.709, + "epoch": 0.025381999086248032, + "grad_norm": 1.6925829648971558, + "learning_rate": 1.983078667275835e-05, + "loss": 0.537, "step": 500 }, { - "epoch": 0.01281888564538647, - "grad_norm": 2.072016716003418, - "learning_rate": 1.9914540762364093e-05, - "loss": 0.7457, + "epoch": 0.025635819077110513, + "grad_norm": 4.091062545776367, + "learning_rate": 1.9829094539485933e-05, + "loss": 0.6366, "step": 505 }, { - "epoch": 0.012945805305241783, - "grad_norm": 2.087191104888916, - "learning_rate": 1.991369463129839e-05, - "loss": 0.6719, + "epoch": 0.025889639067972994, + "grad_norm": 1.6175552606582642, + "learning_rate": 1.9827402406213513e-05, + "loss": 0.503, "step": 510 }, { - "epoch": 0.013072724965097093, - "grad_norm": 2.222727060317993, - "learning_rate": 1.9912848500232686e-05, - "loss": 0.7209, + "epoch": 0.026143459058835476, + "grad_norm": 1.2992746829986572, + "learning_rate": 1.98257102729411e-05, + "loss": 0.5253, "step": 515 }, { - "epoch": 0.013199644624952406, - "grad_norm": 1.569743037223816, - "learning_rate": 1.9912002369166985e-05, - "loss": 0.69, + "epoch": 0.026397279049697953, + "grad_norm": 0.9036175012588501, + "learning_rate": 1.982401813966868e-05, + "loss": 0.532, "step": 520 }, { - "epoch": 0.013326564284807716, - "grad_norm": 3.3362247943878174, - "learning_rate": 1.9911156238101283e-05, - "loss": 0.6717, + "epoch": 0.026651099040560434, + "grad_norm": 1.241174340248108, + "learning_rate": 1.9822326006396267e-05, + "loss": 0.5049, "step": 525 }, { - "epoch": 0.013453483944663029, - "grad_norm": 1.7651731967926025, - "learning_rate": 1.991031010703558e-05, - "loss": 0.7131, + "epoch": 0.026904919031422916, + "grad_norm": 1.9662011861801147, + "learning_rate": 1.982063387312385e-05, + "loss": 0.4869, "step": 530 }, { - "epoch": 0.01358040360451834, - "grad_norm": 2.0284948348999023, - "learning_rate": 1.990946397596988e-05, - "loss": 0.7314, + "epoch": 0.027158739022285393, + "grad_norm": 1.3847944736480713, + "learning_rate": 1.981894173985143e-05, + "loss": 0.5257, "step": 535 }, { - "epoch": 0.013707323264373652, - "grad_norm": 1.9415416717529297, - "learning_rate": 1.9908617844904178e-05, - "loss": 0.691, + "epoch": 0.027412559013147875, + "grad_norm": 1.0714912414550781, + "learning_rate": 1.9817249606579018e-05, + "loss": 0.4935, "step": 540 }, { - "epoch": 0.013834242924228963, - "grad_norm": 2.5840821266174316, - "learning_rate": 1.9907771713838477e-05, - "loss": 0.6989, + "epoch": 0.027666379004010356, + "grad_norm": 1.3018203973770142, + "learning_rate": 1.9815557473306598e-05, + "loss": 0.4876, "step": 545 }, { - "epoch": 0.013961162584084275, - "grad_norm": 2.91890811920166, - "learning_rate": 1.9906925582772775e-05, - "loss": 0.6813, + "epoch": 0.027920198994872837, + "grad_norm": 1.309104084968567, + "learning_rate": 1.981386534003418e-05, + "loss": 0.5586, "step": 550 }, { - "epoch": 0.014088082243939586, - "grad_norm": 2.2091805934906006, - "learning_rate": 1.9906079451707073e-05, - "loss": 0.7072, + "epoch": 0.028174018985735315, + "grad_norm": 1.4573897123336792, + "learning_rate": 1.9812173206761768e-05, + "loss": 0.4816, "step": 555 }, { - "epoch": 0.014215001903794898, - "grad_norm": 2.8915011882781982, - "learning_rate": 1.990523332064137e-05, - "loss": 0.6928, + "epoch": 0.028427838976597796, + "grad_norm": 1.175330638885498, + "learning_rate": 1.981048107348935e-05, + "loss": 0.5306, "step": 560 }, { - "epoch": 0.014341921563650209, - "grad_norm": 2.807535171508789, - "learning_rate": 1.9904387189575667e-05, - "loss": 0.6777, + "epoch": 0.028681658967460277, + "grad_norm": 3.1696276664733887, + "learning_rate": 1.9808788940216935e-05, + "loss": 0.6168, "step": 565 }, { - "epoch": 0.014468841223505521, - "grad_norm": 1.944183349609375, - "learning_rate": 1.9903541058509965e-05, - "loss": 0.6442, + "epoch": 0.02893547895832276, + "grad_norm": 1.5825937986373901, + "learning_rate": 1.9807096806944516e-05, + "loss": 0.5071, "step": 570 }, { - "epoch": 0.014595760883360833, - "grad_norm": 2.8239967823028564, - "learning_rate": 1.9902694927444264e-05, - "loss": 0.6956, + "epoch": 0.029189298949185236, + "grad_norm": 2.6860151290893555, + "learning_rate": 1.98054046736721e-05, + "loss": 0.5171, "step": 575 }, { - "epoch": 0.014722680543216144, - "grad_norm": 2.3459413051605225, - "learning_rate": 1.990184879637856e-05, - "loss": 0.6745, + "epoch": 0.029443118940047718, + "grad_norm": 1.351448655128479, + "learning_rate": 1.9803712540399686e-05, + "loss": 0.4982, "step": 580 }, { - "epoch": 0.014849600203071456, - "grad_norm": 2.7598185539245605, - "learning_rate": 1.9901002665312857e-05, - "loss": 0.6775, + "epoch": 0.0296969389309102, + "grad_norm": 21.1049747467041, + "learning_rate": 1.9802020407127266e-05, + "loss": 0.5326, "step": 585 }, { - "epoch": 0.014976519862926767, - "grad_norm": 1.6821421384811401, - "learning_rate": 1.9900156534247155e-05, - "loss": 0.679, + "epoch": 0.02995075892177268, + "grad_norm": 93.52015686035156, + "learning_rate": 1.980032827385485e-05, + "loss": 0.4622, "step": 590 }, { - "epoch": 0.01510343952278208, - "grad_norm": 7.508544921875, - "learning_rate": 1.9899310403181454e-05, - "loss": 0.6209, + "epoch": 0.030204578912635158, + "grad_norm": 1.095617651939392, + "learning_rate": 1.9798636140582433e-05, + "loss": 0.457, "step": 595 }, { - "epoch": 0.01523035918263739, - "grad_norm": 1.7016167640686035, - "learning_rate": 1.9898464272115752e-05, - "loss": 0.6186, + "epoch": 0.03045839890349764, + "grad_norm": 1.1381208896636963, + "learning_rate": 1.9796944007310017e-05, + "loss": 0.4665, "step": 600 }, { - "epoch": 0.015357278842492703, - "grad_norm": 1.6929012537002563, - "learning_rate": 1.989761814105005e-05, - "loss": 0.6791, + "epoch": 0.03071221889436012, + "grad_norm": 146.4381866455078, + "learning_rate": 1.97952518740376e-05, + "loss": 0.4723, "step": 605 }, { - "epoch": 0.015484198502348013, - "grad_norm": 2.500966787338257, - "learning_rate": 1.989677200998435e-05, - "loss": 0.5933, + "epoch": 0.0309660388852226, + "grad_norm": 1.255440592765808, + "learning_rate": 1.9793559740765184e-05, + "loss": 0.4205, "step": 610 }, { - "epoch": 0.015611118162203326, - "grad_norm": 1.9297876358032227, - "learning_rate": 1.9895925878918647e-05, - "loss": 0.8199, + "epoch": 0.03121985887608508, + "grad_norm": 1.259681224822998, + "learning_rate": 1.9791867607492767e-05, + "loss": 0.4506, "step": 615 }, { - "epoch": 0.015738037822058636, - "grad_norm": 1.8862497806549072, - "learning_rate": 1.9895079747852942e-05, - "loss": 0.6254, + "epoch": 0.03147367886694756, + "grad_norm": 1.818813681602478, + "learning_rate": 1.979017547422035e-05, + "loss": 0.4028, "step": 620 }, { - "epoch": 0.01586495748191395, - "grad_norm": 1.6419658660888672, - "learning_rate": 1.989423361678724e-05, - "loss": 0.5598, + "epoch": 0.03172749885781004, + "grad_norm": 1.2193713188171387, + "learning_rate": 1.9788483340947935e-05, + "loss": 0.4232, "step": 625 }, { - "epoch": 0.01599187714176926, - "grad_norm": 2.011530876159668, - "learning_rate": 1.989338748572154e-05, - "loss": 0.6226, + "epoch": 0.03198131884867252, + "grad_norm": 1.1357531547546387, + "learning_rate": 1.9786791207675518e-05, + "loss": 0.4146, "step": 630 }, { - "epoch": 0.01611879680162457, - "grad_norm": 2.407090187072754, - "learning_rate": 1.9892541354655838e-05, - "loss": 0.6002, + "epoch": 0.032235138839535, + "grad_norm": 37.55704879760742, + "learning_rate": 1.97850990744031e-05, + "loss": 0.4468, "step": 635 }, { - "epoch": 0.016245716461479882, - "grad_norm": 1.8481730222702026, - "learning_rate": 1.9891695223590136e-05, - "loss": 0.5483, + "epoch": 0.032488958830397485, + "grad_norm": 1.28550386428833, + "learning_rate": 1.9783406941130685e-05, + "loss": 0.4205, "step": 640 }, { - "epoch": 0.016372636121335195, - "grad_norm": 1.8709776401519775, - "learning_rate": 1.9890849092524435e-05, - "loss": 0.6081, + "epoch": 0.03274277882125996, + "grad_norm": 1.4312454462051392, + "learning_rate": 1.978171480785827e-05, + "loss": 0.4807, "step": 645 }, { - "epoch": 0.016499555781190507, - "grad_norm": 1.3984227180480957, - "learning_rate": 1.9890002961458733e-05, - "loss": 0.5821, + "epoch": 0.03299659881212244, + "grad_norm": 397.8225402832031, + "learning_rate": 1.9780022674585852e-05, + "loss": 0.423, "step": 650 }, { - "epoch": 0.01662647544104582, - "grad_norm": 1.504011869430542, - "learning_rate": 1.988915683039303e-05, - "loss": 0.5864, + "epoch": 0.033250418802984925, + "grad_norm": 1.1653971672058105, + "learning_rate": 1.9778330541313436e-05, + "loss": 0.4284, "step": 655 }, { - "epoch": 0.01675339510090113, - "grad_norm": 1.7107971906661987, - "learning_rate": 1.9888310699327326e-05, - "loss": 0.6051, + "epoch": 0.0335042387938474, + "grad_norm": 1.3684107065200806, + "learning_rate": 1.977663840804102e-05, + "loss": 0.4189, "step": 660 }, { - "epoch": 0.01688031476075644, - "grad_norm": 1.7949597835540771, - "learning_rate": 1.9887464568261625e-05, - "loss": 0.5969, + "epoch": 0.03375805878470988, + "grad_norm": 1.4786688089370728, + "learning_rate": 1.9774946274768603e-05, + "loss": 0.3919, "step": 665 }, { - "epoch": 0.017007234420611753, - "grad_norm": 127.48331451416016, - "learning_rate": 1.9886618437195923e-05, - "loss": 0.5663, + "epoch": 0.034011878775572366, + "grad_norm": 1.0354373455047607, + "learning_rate": 1.9773254141496186e-05, + "loss": 0.4386, "step": 670 }, { - "epoch": 0.017134154080467066, - "grad_norm": 2.171943187713623, - "learning_rate": 1.988577230613022e-05, - "loss": 0.5335, + "epoch": 0.03426569876643484, + "grad_norm": 2.865295171737671, + "learning_rate": 1.977156200822377e-05, + "loss": 0.4476, "step": 675 }, { - "epoch": 0.017261073740322375, - "grad_norm": 9.2117338180542, - "learning_rate": 1.988492617506452e-05, - "loss": 0.6349, + "epoch": 0.03451951875729733, + "grad_norm": 0.9796468615531921, + "learning_rate": 1.9769869874951354e-05, + "loss": 0.3822, "step": 680 }, { - "epoch": 0.017387993400177687, - "grad_norm": 2.184303045272827, - "learning_rate": 1.988408004399882e-05, - "loss": 0.599, + "epoch": 0.034773338748159806, + "grad_norm": 1.1535001993179321, + "learning_rate": 1.9768177741678937e-05, + "loss": 0.3935, "step": 685 }, { - "epoch": 0.017514913060033, - "grad_norm": 2.2831785678863525, - "learning_rate": 1.9883233912933117e-05, - "loss": 0.5951, + "epoch": 0.035027158739022284, + "grad_norm": 1.33408522605896, + "learning_rate": 1.976648560840652e-05, + "loss": 0.4267, "step": 690 }, { - "epoch": 0.017641832719888312, - "grad_norm": 6.112076759338379, - "learning_rate": 1.9882387781867415e-05, - "loss": 0.4657, + "epoch": 0.03528097872988477, + "grad_norm": 1.2834749221801758, + "learning_rate": 1.9764793475134104e-05, + "loss": 0.3954, "step": 695 }, { - "epoch": 0.01776875237974362, - "grad_norm": 1.83601975440979, - "learning_rate": 1.988154165080171e-05, - "loss": 0.5884, + "epoch": 0.035534798720747246, + "grad_norm": 0.8446087837219238, + "learning_rate": 1.9763101341861684e-05, + "loss": 0.3865, "step": 700 }, { - "epoch": 0.017895672039598933, - "grad_norm": 3.089984893798828, - "learning_rate": 1.988069551973601e-05, - "loss": 0.5587, + "epoch": 0.035788618711609724, + "grad_norm": 1.3027830123901367, + "learning_rate": 1.976140920858927e-05, + "loss": 0.4294, "step": 705 }, { - "epoch": 0.018022591699454246, - "grad_norm": 1.8256577253341675, - "learning_rate": 1.9879849388670307e-05, - "loss": 0.5146, + "epoch": 0.03604243870247221, + "grad_norm": 0.8204338550567627, + "learning_rate": 1.9759717075316855e-05, + "loss": 0.4074, "step": 710 }, { - "epoch": 0.018149511359309558, - "grad_norm": 1.2864331007003784, - "learning_rate": 1.9879003257604605e-05, - "loss": 0.5248, + "epoch": 0.036296258693334686, + "grad_norm": 1.232735276222229, + "learning_rate": 1.975802494204444e-05, + "loss": 0.413, "step": 715 }, { - "epoch": 0.01827643101916487, - "grad_norm": 2.7897603511810303, - "learning_rate": 1.98781571265389e-05, - "loss": 0.537, + "epoch": 0.036550078684197164, + "grad_norm": 1.436716079711914, + "learning_rate": 1.9756332808772022e-05, + "loss": 0.4133, "step": 720 }, { - "epoch": 0.01840335067902018, - "grad_norm": 1.6626474857330322, - "learning_rate": 1.98773109954732e-05, - "loss": 0.4944, + "epoch": 0.03680389867505965, + "grad_norm": 0.9789804816246033, + "learning_rate": 1.9754640675499602e-05, + "loss": 0.4078, "step": 725 }, { - "epoch": 0.01853027033887549, - "grad_norm": 1.8181853294372559, - "learning_rate": 1.9876464864407497e-05, - "loss": 0.5577, + "epoch": 0.037057718665922126, + "grad_norm": 1.6822584867477417, + "learning_rate": 1.975294854222719e-05, + "loss": 0.4372, "step": 730 }, { - "epoch": 0.018657189998730804, - "grad_norm": 484.7244873046875, - "learning_rate": 1.9875618733341796e-05, - "loss": 0.5389, + "epoch": 0.03731153865678461, + "grad_norm": 1.0740268230438232, + "learning_rate": 1.9751256408954773e-05, + "loss": 0.4162, "step": 735 }, { - "epoch": 0.018784109658586116, - "grad_norm": 405.4925537109375, - "learning_rate": 1.9874772602276094e-05, - "loss": 0.6821, + "epoch": 0.03756535864764709, + "grad_norm": 1.06478750705719, + "learning_rate": 1.9749564275682353e-05, + "loss": 0.4091, "step": 740 }, { - "epoch": 0.018911029318441425, - "grad_norm": 1.7263588905334473, - "learning_rate": 1.9873926471210392e-05, - "loss": 0.5014, + "epoch": 0.03781917863850957, + "grad_norm": 0.8764176368713379, + "learning_rate": 1.974787214240994e-05, + "loss": 0.4438, "step": 745 }, { - "epoch": 0.019037948978296738, - "grad_norm": 1.3942608833312988, - "learning_rate": 1.987308034014469e-05, - "loss": 0.5171, + "epoch": 0.03807299862937205, + "grad_norm": 0.9054062366485596, + "learning_rate": 1.974618000913752e-05, + "loss": 0.4423, "step": 750 }, { - "epoch": 0.01916486863815205, - "grad_norm": 1.584958791732788, - "learning_rate": 1.987223420907899e-05, - "loss": 0.5321, + "epoch": 0.03832681862023453, + "grad_norm": 1.0033007860183716, + "learning_rate": 1.9744487875865103e-05, + "loss": 0.4017, "step": 755 }, { - "epoch": 0.019291788298007363, - "grad_norm": 2.930830717086792, - "learning_rate": 1.9871388078013284e-05, - "loss": 0.6084, + "epoch": 0.03858063861109701, + "grad_norm": 1.1299196481704712, + "learning_rate": 1.974279574259269e-05, + "loss": 0.3955, "step": 760 }, { - "epoch": 0.01941870795786267, - "grad_norm": 2.345466375350952, - "learning_rate": 1.9870541946947583e-05, - "loss": 0.438, + "epoch": 0.03883445860195949, + "grad_norm": 1.2052655220031738, + "learning_rate": 1.974110360932027e-05, + "loss": 0.3745, "step": 765 }, { - "epoch": 0.019545627617717984, - "grad_norm": 1.4316664934158325, - "learning_rate": 1.986969581588188e-05, - "loss": 0.5083, + "epoch": 0.03908827859282197, + "grad_norm": 3.022794246673584, + "learning_rate": 1.9739411476047857e-05, + "loss": 0.3888, "step": 770 }, { - "epoch": 0.019672547277573296, - "grad_norm": 2.0762693881988525, - "learning_rate": 1.986884968481618e-05, - "loss": 0.5177, + "epoch": 0.039342098583684454, + "grad_norm": 5.622093200683594, + "learning_rate": 1.9737719342775437e-05, + "loss": 0.379, "step": 775 }, { - "epoch": 0.01979946693742861, - "grad_norm": 2.124021291732788, - "learning_rate": 1.9868003553750478e-05, - "loss": 0.4703, + "epoch": 0.03959591857454693, + "grad_norm": 1.1566879749298096, + "learning_rate": 1.973602720950302e-05, + "loss": 0.3739, "step": 780 }, { - "epoch": 0.019926386597283918, - "grad_norm": 1.331770658493042, - "learning_rate": 1.9867157422684776e-05, - "loss": 0.46, + "epoch": 0.03984973856540941, + "grad_norm": 2.6651649475097656, + "learning_rate": 1.9734335076230608e-05, + "loss": 0.3838, "step": 785 }, { - "epoch": 0.02005330625713923, - "grad_norm": 1.936895728111267, - "learning_rate": 1.9866311291619075e-05, - "loss": 0.4678, + "epoch": 0.040103558556271894, + "grad_norm": 1.4303189516067505, + "learning_rate": 1.9732642942958188e-05, + "loss": 0.4171, "step": 790 }, { - "epoch": 0.020180225916994542, - "grad_norm": 2.015153408050537, - "learning_rate": 1.9865465160553373e-05, - "loss": 0.4699, + "epoch": 0.04035737854713437, + "grad_norm": 0.97078937292099, + "learning_rate": 1.973095080968577e-05, + "loss": 0.3513, "step": 795 }, { - "epoch": 0.020307145576849855, - "grad_norm": 2.166449785232544, - "learning_rate": 1.9864619029487668e-05, - "loss": 0.4344, + "epoch": 0.04061119853799685, + "grad_norm": 0.9570413827896118, + "learning_rate": 1.9729258676413355e-05, + "loss": 0.3702, "step": 800 }, { - "epoch": 0.020434065236705167, - "grad_norm": 1.978447675704956, - "learning_rate": 1.9863772898421966e-05, - "loss": 0.4843, + "epoch": 0.040865018528859334, + "grad_norm": 1.7016774415969849, + "learning_rate": 1.972756654314094e-05, + "loss": 0.3608, "step": 805 }, { - "epoch": 0.020560984896560476, - "grad_norm": 1.6693263053894043, - "learning_rate": 1.9862926767356265e-05, - "loss": 0.4884, + "epoch": 0.04111883851972181, + "grad_norm": 1.1286097764968872, + "learning_rate": 1.9725874409868522e-05, + "loss": 0.3915, "step": 810 }, { - "epoch": 0.02068790455641579, - "grad_norm": 1.773596167564392, - "learning_rate": 1.9862080636290563e-05, - "loss": 0.4804, + "epoch": 0.0413726585105843, + "grad_norm": 1.0915040969848633, + "learning_rate": 1.9724182276596106e-05, + "loss": 0.3501, "step": 815 }, { - "epoch": 0.0208148242162711, - "grad_norm": 1.8305485248565674, - "learning_rate": 1.986123450522486e-05, - "loss": 0.5037, + "epoch": 0.041626478501446774, + "grad_norm": 3.019122362136841, + "learning_rate": 1.972249014332369e-05, + "loss": 0.3413, "step": 820 }, { - "epoch": 0.020941743876126413, - "grad_norm": 1.3299435377120972, - "learning_rate": 1.986038837415916e-05, - "loss": 0.4808, + "epoch": 0.04188029849230925, + "grad_norm": 1.6340800523757935, + "learning_rate": 1.9720798010051273e-05, + "loss": 0.3902, "step": 825 }, { - "epoch": 0.021068663535981722, - "grad_norm": 1.980573296546936, - "learning_rate": 1.985954224309346e-05, - "loss": 0.4423, + "epoch": 0.04213411848317174, + "grad_norm": 4.086325645446777, + "learning_rate": 1.9719105876778856e-05, + "loss": 0.4549, "step": 830 }, { - "epoch": 0.021195583195837035, - "grad_norm": 1.5860133171081543, - "learning_rate": 1.9858696112027757e-05, - "loss": 0.4424, + "epoch": 0.042387938474034215, + "grad_norm": 1.0035637617111206, + "learning_rate": 1.971741374350644e-05, + "loss": 0.3406, "step": 835 }, { - "epoch": 0.021322502855692347, - "grad_norm": 2.5545129776000977, - "learning_rate": 1.9857849980962052e-05, - "loss": 0.4822, + "epoch": 0.04264175846489669, + "grad_norm": 0.8659387230873108, + "learning_rate": 1.9715721610234024e-05, + "loss": 0.3907, "step": 840 }, { - "epoch": 0.02144942251554766, - "grad_norm": 1.492748737335205, - "learning_rate": 1.985700384989635e-05, - "loss": 0.4854, + "epoch": 0.04289557845575918, + "grad_norm": 1.208949327468872, + "learning_rate": 1.9714029476961607e-05, + "loss": 0.3448, "step": 845 }, { - "epoch": 0.02157634217540297, - "grad_norm": 2.3680524826049805, - "learning_rate": 1.985615771883065e-05, - "loss": 0.588, + "epoch": 0.043149398446621655, + "grad_norm": 1.0771589279174805, + "learning_rate": 1.971233734368919e-05, + "loss": 0.3617, "step": 850 }, { - "epoch": 0.02170326183525828, - "grad_norm": 2.0658044815063477, - "learning_rate": 1.9855311587764947e-05, - "loss": 0.5187, + "epoch": 0.04340321843748414, + "grad_norm": 1.216821312904358, + "learning_rate": 1.9710645210416774e-05, + "loss": 0.3516, "step": 855 }, { - "epoch": 0.021830181495113593, - "grad_norm": 1.6600284576416016, - "learning_rate": 1.9854465456699242e-05, - "loss": 0.535, + "epoch": 0.04365703842834662, + "grad_norm": 0.8121851086616516, + "learning_rate": 1.9708953077144358e-05, + "loss": 0.3514, "step": 860 }, { - "epoch": 0.021957101154968905, - "grad_norm": 10.994009971618652, - "learning_rate": 1.985361932563354e-05, - "loss": 0.4939, + "epoch": 0.043910858419209095, + "grad_norm": 0.9845564961433411, + "learning_rate": 1.970726094387194e-05, + "loss": 0.3626, "step": 865 }, { - "epoch": 0.022084020814824218, - "grad_norm": 4.777219295501709, - "learning_rate": 1.985277319456784e-05, - "loss": 0.5653, + "epoch": 0.04416467841007158, + "grad_norm": 276.0869140625, + "learning_rate": 1.9705568810599525e-05, + "loss": 0.3701, "step": 870 }, { - "epoch": 0.022210940474679527, - "grad_norm": 1.8281011581420898, - "learning_rate": 1.9851927063502137e-05, - "loss": 0.456, + "epoch": 0.04441849840093406, + "grad_norm": 0.9632354974746704, + "learning_rate": 1.970387667732711e-05, + "loss": 0.3553, "step": 875 }, { - "epoch": 0.02233786013453484, - "grad_norm": 1.5164823532104492, - "learning_rate": 1.9851080932436436e-05, - "loss": 0.4411, + "epoch": 0.044672318391796535, + "grad_norm": 1.2075852155685425, + "learning_rate": 1.9702184544054692e-05, + "loss": 0.3355, "step": 880 }, { - "epoch": 0.02246477979439015, - "grad_norm": 1.4120644330978394, - "learning_rate": 1.9850234801370734e-05, - "loss": 0.4323, + "epoch": 0.04492613838265902, + "grad_norm": 1.1684023141860962, + "learning_rate": 1.9700492410782275e-05, + "loss": 0.3593, "step": 885 }, { - "epoch": 0.022591699454245464, - "grad_norm": 1.5695284605026245, - "learning_rate": 1.9849388670305032e-05, - "loss": 0.4206, + "epoch": 0.0451799583735215, + "grad_norm": 1.093794345855713, + "learning_rate": 1.969880027750986e-05, + "loss": 0.3217, "step": 890 }, { - "epoch": 0.022718619114100773, - "grad_norm": 1.3185738325119019, - "learning_rate": 1.984854253923933e-05, - "loss": 0.404, + "epoch": 0.045433778364383975, + "grad_norm": 0.8607542514801025, + "learning_rate": 1.9697108144237443e-05, + "loss": 0.3595, "step": 895 }, { - "epoch": 0.022845538773956085, - "grad_norm": 1.3959448337554932, - "learning_rate": 1.9847696408173626e-05, - "loss": 0.4648, + "epoch": 0.04568759835524646, + "grad_norm": 1.0586198568344116, + "learning_rate": 1.9695416010965026e-05, + "loss": 0.3402, "step": 900 }, { - "epoch": 0.022972458433811398, - "grad_norm": 2.3599069118499756, - "learning_rate": 1.9846850277107924e-05, - "loss": 0.4892, + "epoch": 0.04594141834610894, + "grad_norm": 1.3861478567123413, + "learning_rate": 1.9693723877692606e-05, + "loss": 0.3389, "step": 905 }, { - "epoch": 0.02309937809366671, - "grad_norm": 1.9921172857284546, - "learning_rate": 1.9846004146042223e-05, - "loss": 0.7107, + "epoch": 0.04619523833697142, + "grad_norm": 1.1892589330673218, + "learning_rate": 1.9692031744420193e-05, + "loss": 0.3371, "step": 910 }, { - "epoch": 0.02322629775352202, - "grad_norm": 1.4697048664093018, - "learning_rate": 1.984515801497652e-05, - "loss": 0.4679, + "epoch": 0.0464490583278339, + "grad_norm": 1.0808658599853516, + "learning_rate": 1.9690339611147777e-05, + "loss": 0.3632, "step": 915 }, { - "epoch": 0.02335321741337733, - "grad_norm": 1.7421170473098755, - "learning_rate": 1.984431188391082e-05, - "loss": 0.4107, + "epoch": 0.04670287831869638, + "grad_norm": 6.600335121154785, + "learning_rate": 1.968864747787536e-05, + "loss": 0.3464, "step": 920 }, { - "epoch": 0.023480137073232644, - "grad_norm": 2.310112237930298, - "learning_rate": 1.9843465752845118e-05, - "loss": 0.4482, + "epoch": 0.04695669830955886, + "grad_norm": 0.9508718252182007, + "learning_rate": 1.9686955344602944e-05, + "loss": 0.3804, "step": 925 }, { - "epoch": 0.023607056733087956, - "grad_norm": 1.5403507947921753, - "learning_rate": 1.9842619621779416e-05, - "loss": 0.4698, + "epoch": 0.04721051830042134, + "grad_norm": 0.8976243734359741, + "learning_rate": 1.9685263211330524e-05, + "loss": 0.3379, "step": 930 }, { - "epoch": 0.02373397639294327, - "grad_norm": 1.6988937854766846, - "learning_rate": 1.9841773490713715e-05, - "loss": 0.4607, + "epoch": 0.04746433829128382, + "grad_norm": 0.816540539264679, + "learning_rate": 1.968357107805811e-05, + "loss": 0.3479, "step": 935 }, { - "epoch": 0.023860896052798578, - "grad_norm": 2.5309653282165527, - "learning_rate": 1.984092735964801e-05, - "loss": 0.4811, + "epoch": 0.0477181582821463, + "grad_norm": 1.652158498764038, + "learning_rate": 1.9681878944785694e-05, + "loss": 0.3458, "step": 940 }, { - "epoch": 0.02398781571265389, - "grad_norm": 1.3638319969177246, - "learning_rate": 1.9840081228582308e-05, - "loss": 0.464, + "epoch": 0.04797197827300878, + "grad_norm": 0.913906991481781, + "learning_rate": 1.9680186811513275e-05, + "loss": 0.3456, "step": 945 }, { - "epoch": 0.024114735372509202, - "grad_norm": 2.858698844909668, - "learning_rate": 1.9839235097516607e-05, - "loss": 0.512, + "epoch": 0.048225798263871265, + "grad_norm": 1.0308032035827637, + "learning_rate": 1.967849467824086e-05, + "loss": 0.3384, "step": 950 }, { - "epoch": 0.024241655032364515, - "grad_norm": 1.94855535030365, - "learning_rate": 1.9838388966450905e-05, - "loss": 0.4298, + "epoch": 0.04847961825473374, + "grad_norm": 1.0393309593200684, + "learning_rate": 1.9676802544968442e-05, + "loss": 0.3399, "step": 955 }, { - "epoch": 0.024368574692219824, - "grad_norm": 1.269916296005249, - "learning_rate": 1.9837542835385203e-05, - "loss": 0.413, + "epoch": 0.04873343824559622, + "grad_norm": 0.9788406491279602, + "learning_rate": 1.967511041169603e-05, + "loss": 0.3387, "step": 960 }, { - "epoch": 0.024495494352075136, - "grad_norm": 1.1188828945159912, - "learning_rate": 1.9836696704319502e-05, - "loss": 0.3874, + "epoch": 0.048987258236458706, + "grad_norm": 1.0427477359771729, + "learning_rate": 1.9673418278423612e-05, + "loss": 0.3398, "step": 965 }, { - "epoch": 0.02462241401193045, - "grad_norm": 1.2943288087844849, - "learning_rate": 1.98358505732538e-05, - "loss": 0.4212, + "epoch": 0.04924107822732118, + "grad_norm": 0.898420512676239, + "learning_rate": 1.9671726145151192e-05, + "loss": 0.3273, "step": 970 }, { - "epoch": 0.02474933367178576, - "grad_norm": 1.1663885116577148, - "learning_rate": 1.98350044421881e-05, - "loss": 0.3977, + "epoch": 0.04949489821818366, + "grad_norm": 1.0824823379516602, + "learning_rate": 1.967003401187878e-05, + "loss": 0.3541, "step": 975 }, { - "epoch": 0.02487625333164107, - "grad_norm": 1.3165886402130127, - "learning_rate": 1.9834158311122394e-05, - "loss": 0.3907, + "epoch": 0.049748718209046146, + "grad_norm": 0.8971902132034302, + "learning_rate": 1.966834187860636e-05, + "loss": 0.304, "step": 980 }, { - "epoch": 0.025003172991496382, - "grad_norm": 1.942629098892212, - "learning_rate": 1.9833312180056692e-05, - "loss": 0.4042, + "epoch": 0.050002538199908624, + "grad_norm": 2.5218119621276855, + "learning_rate": 1.9666649745333943e-05, + "loss": 0.3209, "step": 985 }, { - "epoch": 0.025130092651351695, - "grad_norm": 1.6610792875289917, - "learning_rate": 1.983246604899099e-05, - "loss": 0.4235, + "epoch": 0.05025635819077111, + "grad_norm": 0.8553778529167175, + "learning_rate": 1.9664957612061527e-05, + "loss": 0.3208, "step": 990 }, { - "epoch": 0.025257012311207007, - "grad_norm": 1.3497353792190552, - "learning_rate": 1.983161991792529e-05, - "loss": 0.4039, + "epoch": 0.050510178181633586, + "grad_norm": 1.212137222290039, + "learning_rate": 1.966326547878911e-05, + "loss": 0.3412, "step": 995 }, { - "epoch": 0.025383931971062316, - "grad_norm": 3.511706590652466, - "learning_rate": 1.9830773786859584e-05, - "loss": 0.4186, + "epoch": 0.050763998172496064, + "grad_norm": 0.7518376708030701, + "learning_rate": 1.9661573345516694e-05, + "loss": 0.4053, "step": 1000 }, { - "epoch": 0.025510851630917628, - "grad_norm": 2.033642292022705, - "learning_rate": 1.9829927655793882e-05, - "loss": 0.4059, + "epoch": 0.05101781816335855, + "grad_norm": 0.9260375499725342, + "learning_rate": 1.9659881212244277e-05, + "loss": 0.3254, "step": 1005 }, { - "epoch": 0.02563777129077294, - "grad_norm": 14.805602073669434, - "learning_rate": 1.982908152472818e-05, - "loss": 0.3637, + "epoch": 0.051271638154221026, + "grad_norm": 1.103661060333252, + "learning_rate": 1.965818907897186e-05, + "loss": 0.3211, "step": 1010 }, { - "epoch": 0.025764690950628253, - "grad_norm": 1.458915114402771, - "learning_rate": 1.982823539366248e-05, - "loss": 0.3988, + "epoch": 0.051525458145083504, + "grad_norm": 1.1954058408737183, + "learning_rate": 1.9656496945699444e-05, + "loss": 0.3486, "step": 1015 }, { - "epoch": 0.025891610610483565, - "grad_norm": 1.4925204515457153, - "learning_rate": 1.9827389262596777e-05, - "loss": 0.4159, + "epoch": 0.05177927813594599, + "grad_norm": 0.992374837398529, + "learning_rate": 1.9654804812427028e-05, + "loss": 0.3211, "step": 1020 }, { - "epoch": 0.026018530270338874, - "grad_norm": 1.2992656230926514, - "learning_rate": 1.9826543131531076e-05, - "loss": 0.3746, + "epoch": 0.052033098126808466, + "grad_norm": 4.1818413734436035, + "learning_rate": 1.965311267915461e-05, + "loss": 0.3095, "step": 1025 }, { - "epoch": 0.026145449930194187, - "grad_norm": 1.628023386001587, - "learning_rate": 1.9825697000465374e-05, - "loss": 0.4647, + "epoch": 0.05228691811767095, + "grad_norm": 1.0134354829788208, + "learning_rate": 1.9651420545882195e-05, + "loss": 0.3281, "step": 1030 }, { - "epoch": 0.0262723695900495, - "grad_norm": 1.6135765314102173, - "learning_rate": 1.9824850869399673e-05, - "loss": 0.3511, + "epoch": 0.05254073810853343, + "grad_norm": 0.9592474102973938, + "learning_rate": 1.964972841260978e-05, + "loss": 0.3289, "step": 1035 }, { - "epoch": 0.02639928924990481, - "grad_norm": 1.2254350185394287, - "learning_rate": 1.982400473833397e-05, - "loss": 0.373, + "epoch": 0.05279455809939591, + "grad_norm": 0.8658552765846252, + "learning_rate": 1.9648036279337362e-05, + "loss": 0.3318, "step": 1040 }, { - "epoch": 0.02652620890976012, - "grad_norm": 1.593988060951233, - "learning_rate": 1.9823158607268266e-05, - "loss": 0.3986, + "epoch": 0.05304837809025839, + "grad_norm": 0.9752548933029175, + "learning_rate": 1.9646344146064946e-05, + "loss": 0.3342, "step": 1045 }, { - "epoch": 0.026653128569615433, - "grad_norm": 2.180321455001831, - "learning_rate": 1.9822312476202564e-05, - "loss": 0.4129, + "epoch": 0.05330219808112087, + "grad_norm": 1.1210874319076538, + "learning_rate": 1.964465201279253e-05, + "loss": 0.2928, "step": 1050 }, { - "epoch": 0.026780048229470745, - "grad_norm": 1.5157248973846436, - "learning_rate": 1.9821466345136863e-05, - "loss": 0.3613, + "epoch": 0.05355601807198335, + "grad_norm": 0.7496079206466675, + "learning_rate": 1.9642959879520113e-05, + "loss": 0.3433, "step": 1055 }, { - "epoch": 0.026906967889326058, - "grad_norm": 1.452151894569397, - "learning_rate": 1.982062021407116e-05, - "loss": 0.4105, + "epoch": 0.05380983806284583, + "grad_norm": 1.119456171989441, + "learning_rate": 1.9641267746247696e-05, + "loss": 0.3038, "step": 1060 }, { - "epoch": 0.027033887549181367, - "grad_norm": 2.0679821968078613, - "learning_rate": 1.981977408300546e-05, - "loss": 0.3643, + "epoch": 0.05406365805370831, + "grad_norm": 0.9271594882011414, + "learning_rate": 1.963957561297528e-05, + "loss": 0.3138, "step": 1065 }, { - "epoch": 0.02716080720903668, - "grad_norm": 1.616434931755066, - "learning_rate": 1.9818927951939758e-05, - "loss": 0.4255, + "epoch": 0.05431747804457079, + "grad_norm": 1.0885213613510132, + "learning_rate": 1.9637883479702863e-05, + "loss": 0.3176, "step": 1070 }, { - "epoch": 0.02728772686889199, - "grad_norm": 1.3513835668563843, - "learning_rate": 1.9818081820874056e-05, - "loss": 0.3642, + "epoch": 0.05457129803543327, + "grad_norm": 1.075253963470459, + "learning_rate": 1.9636191346430447e-05, + "loss": 0.2996, "step": 1075 }, { - "epoch": 0.027414646528747304, - "grad_norm": 2.348637342453003, - "learning_rate": 1.9817235689808355e-05, - "loss": 0.352, + "epoch": 0.05482511802629575, + "grad_norm": 0.9437046647071838, + "learning_rate": 1.963449921315803e-05, + "loss": 0.3203, "step": 1080 }, { - "epoch": 0.027541566188602616, - "grad_norm": 1.506955862045288, - "learning_rate": 1.981638955874265e-05, - "loss": 0.4583, + "epoch": 0.055078938017158234, + "grad_norm": 0.8787136077880859, + "learning_rate": 1.9632807079885614e-05, + "loss": 0.3017, "step": 1085 }, { - "epoch": 0.027668485848457925, - "grad_norm": 1.4618726968765259, - "learning_rate": 1.9815543427676948e-05, - "loss": 0.4205, + "epoch": 0.05533275800802071, + "grad_norm": 0.9308958649635315, + "learning_rate": 1.9631114946613197e-05, + "loss": 0.3023, "step": 1090 }, { - "epoch": 0.027795405508313237, - "grad_norm": 1.2343381643295288, - "learning_rate": 1.9814697296611247e-05, - "loss": 0.422, + "epoch": 0.05558657799888319, + "grad_norm": 0.8047986626625061, + "learning_rate": 1.962942281334078e-05, + "loss": 0.3042, "step": 1095 }, { - "epoch": 0.02792232516816855, - "grad_norm": 1.90855073928833, - "learning_rate": 1.9813851165545545e-05, - "loss": 0.3427, + "epoch": 0.055840397989745674, + "grad_norm": 0.6756587028503418, + "learning_rate": 1.9627730680068364e-05, + "loss": 0.2961, "step": 1100 }, { - "epoch": 0.028049244828023862, - "grad_norm": 7.27864408493042, - "learning_rate": 1.9813005034479843e-05, - "loss": 0.4075, + "epoch": 0.05609421798060815, + "grad_norm": 1.1307016611099243, + "learning_rate": 1.9626038546795948e-05, + "loss": 0.3447, "step": 1105 }, { - "epoch": 0.02817616448787917, - "grad_norm": 1.404361605644226, - "learning_rate": 1.9812158903414142e-05, - "loss": 0.3633, + "epoch": 0.05634803797147063, + "grad_norm": 0.9675906896591187, + "learning_rate": 1.962434641352353e-05, + "loss": 0.3325, "step": 1110 }, { - "epoch": 0.028303084147734484, - "grad_norm": 1.609268307685852, - "learning_rate": 1.981131277234844e-05, - "loss": 0.4052, + "epoch": 0.056601857962333114, + "grad_norm": 0.9537900686264038, + "learning_rate": 1.9622654280251115e-05, + "loss": 0.3241, "step": 1115 }, { - "epoch": 0.028430003807589796, - "grad_norm": 0.9394638538360596, - "learning_rate": 1.981046664128274e-05, - "loss": 0.3848, + "epoch": 0.05685567795319559, + "grad_norm": 0.9516997337341309, + "learning_rate": 1.96209621469787e-05, + "loss": 0.3301, "step": 1120 }, { - "epoch": 0.02855692346744511, - "grad_norm": 1.1962140798568726, - "learning_rate": 1.9809620510217034e-05, - "loss": 0.403, + "epoch": 0.05710949794405808, + "grad_norm": 1.157469391822815, + "learning_rate": 1.9619270013706282e-05, + "loss": 0.306, "step": 1125 }, { - "epoch": 0.028683843127300417, - "grad_norm": 1.2600388526916504, - "learning_rate": 1.9808774379151332e-05, - "loss": 0.3649, + "epoch": 0.057363317934920555, + "grad_norm": 0.7865618467330933, + "learning_rate": 1.9617577880433866e-05, + "loss": 0.3097, "step": 1130 }, { - "epoch": 0.02881076278715573, - "grad_norm": 1.1457219123840332, - "learning_rate": 1.980792824808563e-05, - "loss": 0.4158, + "epoch": 0.05761713792578303, + "grad_norm": 0.8256521821022034, + "learning_rate": 1.9615885747161446e-05, + "loss": 0.3012, "step": 1135 }, { - "epoch": 0.028937682447011042, - "grad_norm": 2.2418243885040283, - "learning_rate": 1.980708211701993e-05, - "loss": 0.4511, + "epoch": 0.05787095791664552, + "grad_norm": 0.7554883360862732, + "learning_rate": 1.9614193613889033e-05, + "loss": 0.3226, "step": 1140 }, { - "epoch": 0.029064602106866354, - "grad_norm": 1.5815892219543457, - "learning_rate": 1.9806235985954224e-05, - "loss": 0.4032, + "epoch": 0.058124777907507995, + "grad_norm": 0.8436192870140076, + "learning_rate": 1.9612501480616616e-05, + "loss": 0.3068, "step": 1145 }, { - "epoch": 0.029191521766721667, - "grad_norm": 1.406893014907837, - "learning_rate": 1.9805389854888522e-05, - "loss": 0.3762, + "epoch": 0.05837859789837047, + "grad_norm": 1.5259509086608887, + "learning_rate": 1.9610809347344197e-05, + "loss": 0.3158, "step": 1150 }, { - "epoch": 0.029318441426576976, - "grad_norm": 1.2209657430648804, - "learning_rate": 1.980454372382282e-05, - "loss": 0.3734, + "epoch": 0.05863241788923296, + "grad_norm": 1.0376449823379517, + "learning_rate": 1.9609117214071783e-05, + "loss": 0.2944, "step": 1155 }, { - "epoch": 0.029445361086432288, - "grad_norm": 1.1528421640396118, - "learning_rate": 1.980369759275712e-05, - "loss": 0.3831, + "epoch": 0.058886237880095435, + "grad_norm": 0.9757311344146729, + "learning_rate": 1.9607425080799364e-05, + "loss": 0.3029, "step": 1160 }, { - "epoch": 0.0295722807462876, - "grad_norm": 1.61820650100708, - "learning_rate": 1.9802851461691417e-05, - "loss": 0.426, + "epoch": 0.05914005787095792, + "grad_norm": 1.0501054525375366, + "learning_rate": 1.960573294752695e-05, + "loss": 0.2927, "step": 1165 }, { - "epoch": 0.029699200406142913, - "grad_norm": 1.339820384979248, - "learning_rate": 1.9802005330625716e-05, - "loss": 0.3861, + "epoch": 0.0593938778618204, + "grad_norm": 0.7940589189529419, + "learning_rate": 1.9604040814254534e-05, + "loss": 0.3092, "step": 1170 }, { - "epoch": 0.029826120065998222, - "grad_norm": 1.622822880744934, - "learning_rate": 1.9801159199560014e-05, - "loss": 0.358, + "epoch": 0.059647697852682875, + "grad_norm": 1.0338493585586548, + "learning_rate": 1.9602348680982114e-05, + "loss": 0.3086, "step": 1175 }, { - "epoch": 0.029953039725853534, - "grad_norm": 1.3343102931976318, - "learning_rate": 1.9800313068494313e-05, - "loss": 0.3947, + "epoch": 0.05990151784354536, + "grad_norm": 0.8323164582252502, + "learning_rate": 1.96006565477097e-05, + "loss": 0.2797, "step": 1180 }, { - "epoch": 0.030079959385708847, - "grad_norm": 6.087907314300537, - "learning_rate": 1.9799466937428608e-05, - "loss": 0.4344, + "epoch": 0.06015533783440784, + "grad_norm": 1.0480012893676758, + "learning_rate": 1.959896441443728e-05, + "loss": 0.2908, "step": 1185 }, { - "epoch": 0.03020687904556416, - "grad_norm": 1.775268316268921, - "learning_rate": 1.9798620806362906e-05, - "loss": 0.3727, + "epoch": 0.060409157825270315, + "grad_norm": 0.8771175742149353, + "learning_rate": 1.9597272281164865e-05, + "loss": 0.2945, "step": 1190 }, { - "epoch": 0.030333798705419468, - "grad_norm": 1.5560256242752075, - "learning_rate": 1.9797774675297205e-05, - "loss": 0.3808, + "epoch": 0.0606629778161328, + "grad_norm": 0.968286395072937, + "learning_rate": 1.959558014789245e-05, + "loss": 0.2989, "step": 1195 }, { - "epoch": 0.03046071836527478, - "grad_norm": 1.6000893115997314, - "learning_rate": 1.9796928544231503e-05, - "loss": 0.338, + "epoch": 0.06091679780699528, + "grad_norm": 1.787429690361023, + "learning_rate": 1.9593888014620032e-05, + "loss": 0.3255, "step": 1200 }, { - "epoch": 0.030587638025130093, - "grad_norm": 1.5408012866973877, - "learning_rate": 1.97960824131658e-05, - "loss": 0.3491, + "epoch": 0.06117061779785776, + "grad_norm": 1.2332099676132202, + "learning_rate": 1.959219588134762e-05, + "loss": 0.2937, "step": 1205 }, { - "epoch": 0.030714557684985405, - "grad_norm": 1.471522331237793, - "learning_rate": 1.97952362821001e-05, - "loss": 0.3931, + "epoch": 0.06142443778872024, + "grad_norm": 0.9430225491523743, + "learning_rate": 1.95905037480752e-05, + "loss": 0.309, "step": 1210 }, { - "epoch": 0.030841477344840714, - "grad_norm": 1.413741946220398, - "learning_rate": 1.9794390151034398e-05, - "loss": 0.3406, + "epoch": 0.06167825777958272, + "grad_norm": 0.9462845921516418, + "learning_rate": 1.9588811614802783e-05, + "loss": 0.3156, "step": 1215 }, { - "epoch": 0.030968397004696026, - "grad_norm": 1.3870900869369507, - "learning_rate": 1.9793544019968697e-05, - "loss": 0.3648, + "epoch": 0.0619320777704452, + "grad_norm": 0.9460931420326233, + "learning_rate": 1.9587119481530366e-05, + "loss": 0.2851, "step": 1220 }, { - "epoch": 0.03109531666455134, - "grad_norm": 1.108593463897705, - "learning_rate": 1.979269788890299e-05, - "loss": 0.3098, + "epoch": 0.06218589776130768, + "grad_norm": 0.8023846745491028, + "learning_rate": 1.958542734825795e-05, + "loss": 0.304, "step": 1225 }, { - "epoch": 0.03122223632440665, - "grad_norm": 1.4947552680969238, - "learning_rate": 1.979185175783729e-05, - "loss": 0.3931, + "epoch": 0.06243971775217016, + "grad_norm": 1.055830955505371, + "learning_rate": 1.9583735214985533e-05, + "loss": 0.3123, "step": 1230 }, { - "epoch": 0.03134915598426196, - "grad_norm": 1.2890698909759521, - "learning_rate": 1.979100562677159e-05, - "loss": 0.4294, + "epoch": 0.06269353774303264, + "grad_norm": 0.7749910354614258, + "learning_rate": 1.9582043081713117e-05, + "loss": 0.3202, "step": 1235 }, { - "epoch": 0.03147607564411727, - "grad_norm": 1.260332465171814, - "learning_rate": 1.9790159495705887e-05, - "loss": 0.3838, + "epoch": 0.06294735773389512, + "grad_norm": 1.0144844055175781, + "learning_rate": 1.95803509484407e-05, + "loss": 0.2781, "step": 1240 }, { - "epoch": 0.031602995303972585, - "grad_norm": 1.4241175651550293, - "learning_rate": 1.9789313364640185e-05, - "loss": 0.3785, + "epoch": 0.0632011777247576, + "grad_norm": 0.9046682715415955, + "learning_rate": 1.9578658815168284e-05, + "loss": 0.3233, "step": 1245 }, { - "epoch": 0.0317299149638279, - "grad_norm": 1.7724463939666748, - "learning_rate": 1.9788467233574484e-05, - "loss": 0.3519, + "epoch": 0.06345499771562008, + "grad_norm": 71.55082702636719, + "learning_rate": 1.9576966681895867e-05, + "loss": 0.2724, "step": 1250 }, { - "epoch": 0.03185683462368321, - "grad_norm": 1.4607324600219727, - "learning_rate": 1.9787621102508782e-05, - "loss": 0.3889, + "epoch": 0.06370881770648257, + "grad_norm": 0.8242619037628174, + "learning_rate": 1.957527454862345e-05, + "loss": 0.2722, "step": 1255 }, { - "epoch": 0.03198375428353852, - "grad_norm": 1.1926449537277222, - "learning_rate": 1.978677497144308e-05, - "loss": 0.3189, + "epoch": 0.06396263769734505, + "grad_norm": 1.0677359104156494, + "learning_rate": 1.9573582415351035e-05, + "loss": 0.3172, "step": 1260 }, { - "epoch": 0.032110673943393835, - "grad_norm": 1.3880276679992676, - "learning_rate": 1.9785928840377375e-05, - "loss": 0.3607, + "epoch": 0.06421645768820752, + "grad_norm": 0.8488295078277588, + "learning_rate": 1.9571890282078618e-05, + "loss": 0.2966, "step": 1265 }, { - "epoch": 0.03223759360324914, - "grad_norm": 1.5964508056640625, - "learning_rate": 1.9785082709311674e-05, - "loss": 0.385, + "epoch": 0.06447027767907, + "grad_norm": 0.703192949295044, + "learning_rate": 1.95701981488062e-05, + "loss": 0.3108, "step": 1270 }, { - "epoch": 0.03236451326310445, - "grad_norm": 1.4477037191390991, - "learning_rate": 1.9784236578245972e-05, - "loss": 0.3363, + "epoch": 0.06472409766993248, + "grad_norm": 0.8458622694015503, + "learning_rate": 1.9568506015533785e-05, + "loss": 0.2884, "step": 1275 }, { - "epoch": 0.032491432922959765, - "grad_norm": 1.211918830871582, - "learning_rate": 1.978339044718027e-05, - "loss": 0.2967, + "epoch": 0.06497791766079497, + "grad_norm": 0.9391268491744995, + "learning_rate": 1.956681388226137e-05, + "loss": 0.293, "step": 1280 }, { - "epoch": 0.03261835258281508, - "grad_norm": 1.1527576446533203, - "learning_rate": 1.9782544316114566e-05, - "loss": 0.3566, + "epoch": 0.06523173765165745, + "grad_norm": 1.0053985118865967, + "learning_rate": 1.9565121748988952e-05, + "loss": 0.3244, "step": 1285 }, { - "epoch": 0.03274527224267039, - "grad_norm": 1.657171607017517, - "learning_rate": 1.9781698185048864e-05, - "loss": 0.4423, + "epoch": 0.06548555764251993, + "grad_norm": 0.8404228687286377, + "learning_rate": 1.9563429615716536e-05, + "loss": 0.2932, "step": 1290 }, { - "epoch": 0.0328721919025257, - "grad_norm": 1.5477948188781738, - "learning_rate": 1.9780852053983162e-05, - "loss": 0.3908, + "epoch": 0.0657393776333824, + "grad_norm": 1.3393089771270752, + "learning_rate": 1.956173748244412e-05, + "loss": 0.3064, "step": 1295 }, { - "epoch": 0.032999111562381014, - "grad_norm": 1.2508469820022583, - "learning_rate": 1.978000592291746e-05, - "loss": 0.362, + "epoch": 0.06599319762424488, + "grad_norm": 0.6775000691413879, + "learning_rate": 1.9560045349171703e-05, + "loss": 0.2931, "step": 1300 }, { - "epoch": 0.03312603122223633, - "grad_norm": 1.5207682847976685, - "learning_rate": 1.977915979185176e-05, - "loss": 0.3902, + "epoch": 0.06624701761510736, + "grad_norm": 0.9884696006774902, + "learning_rate": 1.9558353215899286e-05, + "loss": 0.285, "step": 1305 }, { - "epoch": 0.03325295088209164, - "grad_norm": 13.752100944519043, - "learning_rate": 1.9778313660786058e-05, - "loss": 0.4188, + "epoch": 0.06650083760596985, + "grad_norm": 1.0637091398239136, + "learning_rate": 1.955666108262687e-05, + "loss": 0.2468, "step": 1310 }, { - "epoch": 0.033379870541946945, - "grad_norm": 1.5323046445846558, - "learning_rate": 1.9777467529720356e-05, - "loss": 0.3376, + "epoch": 0.06675465759683233, + "grad_norm": 0.8244103193283081, + "learning_rate": 1.9554968949354454e-05, + "loss": 0.2582, "step": 1315 }, { - "epoch": 0.03350679020180226, - "grad_norm": 1.3431785106658936, - "learning_rate": 1.9776621398654654e-05, - "loss": 0.3223, + "epoch": 0.0670084775876948, + "grad_norm": 0.6568355560302734, + "learning_rate": 1.9553276816082037e-05, + "loss": 0.2774, "step": 1320 }, { - "epoch": 0.03363370986165757, - "grad_norm": 1.7274723052978516, - "learning_rate": 1.977577526758895e-05, - "loss": 0.4135, + "epoch": 0.06726229757855728, + "grad_norm": 1.0442551374435425, + "learning_rate": 1.955158468280962e-05, + "loss": 0.277, "step": 1325 }, { - "epoch": 0.03376062952151288, - "grad_norm": 1.412017583847046, - "learning_rate": 1.9774929136523248e-05, - "loss": 0.3483, + "epoch": 0.06751611756941976, + "grad_norm": 0.797654390335083, + "learning_rate": 1.9549892549537204e-05, + "loss": 0.2658, "step": 1330 }, { - "epoch": 0.033887549181368194, - "grad_norm": 1.5886249542236328, - "learning_rate": 1.9774083005457546e-05, - "loss": 0.366, + "epoch": 0.06776993756028225, + "grad_norm": 0.8469611406326294, + "learning_rate": 1.9548200416264788e-05, + "loss": 0.319, "step": 1335 }, { - "epoch": 0.03401446884122351, - "grad_norm": 1.5286506414413452, - "learning_rate": 1.9773236874391845e-05, - "loss": 0.3464, + "epoch": 0.06802375755114473, + "grad_norm": 38.28089141845703, + "learning_rate": 1.9546508282992368e-05, + "loss": 0.268, "step": 1340 }, { - "epoch": 0.03414138850107882, - "grad_norm": 1.26659095287323, - "learning_rate": 1.9772390743326143e-05, - "loss": 0.3861, + "epoch": 0.06827757754200721, + "grad_norm": 0.961740255355835, + "learning_rate": 1.9544816149719955e-05, + "loss": 0.3031, "step": 1345 }, { - "epoch": 0.03426830816093413, - "grad_norm": 3.2529611587524414, - "learning_rate": 1.977154461226044e-05, - "loss": 0.3615, + "epoch": 0.06853139753286969, + "grad_norm": 0.9473285675048828, + "learning_rate": 1.954312401644754e-05, + "loss": 0.2707, "step": 1350 }, { - "epoch": 0.03439522782078944, - "grad_norm": 1.4894890785217285, - "learning_rate": 1.977069848119474e-05, - "loss": 0.3171, + "epoch": 0.06878521752373216, + "grad_norm": 1.0564128160476685, + "learning_rate": 1.9541431883175122e-05, + "loss": 0.2747, "step": 1355 }, { - "epoch": 0.03452214748064475, - "grad_norm": 1.324841022491455, - "learning_rate": 1.9769852350129038e-05, - "loss": 0.3797, + "epoch": 0.06903903751459466, + "grad_norm": 1.3247959613800049, + "learning_rate": 1.9539739749902705e-05, + "loss": 0.2801, "step": 1360 }, { - "epoch": 0.03464906714050006, - "grad_norm": 1.3321356773376465, - "learning_rate": 1.9769006219063333e-05, - "loss": 0.4088, + "epoch": 0.06929285750545713, + "grad_norm": 1.2612837553024292, + "learning_rate": 1.9538047616630286e-05, + "loss": 0.314, "step": 1365 }, { - "epoch": 0.034775986800355374, - "grad_norm": 1.2543240785598755, - "learning_rate": 1.976816008799763e-05, - "loss": 0.3247, + "epoch": 0.06954667749631961, + "grad_norm": 0.8873483538627625, + "learning_rate": 1.9536355483357873e-05, + "loss": 0.2818, "step": 1370 }, { - "epoch": 0.034902906460210686, - "grad_norm": 1.238745093345642, - "learning_rate": 1.976731395693193e-05, - "loss": 0.353, + "epoch": 0.06980049748718209, + "grad_norm": 1.1142306327819824, + "learning_rate": 1.9534663350085453e-05, + "loss": 0.2964, "step": 1375 }, { - "epoch": 0.035029826120066, - "grad_norm": 1.1235698461532593, - "learning_rate": 1.976646782586623e-05, - "loss": 0.3486, + "epoch": 0.07005431747804457, + "grad_norm": 0.7834559679031372, + "learning_rate": 1.9532971216813036e-05, + "loss": 0.3129, "step": 1380 }, { - "epoch": 0.03515674577992131, - "grad_norm": 0.9554315209388733, - "learning_rate": 1.9765621694800527e-05, - "loss": 0.3787, + "epoch": 0.07030813746890704, + "grad_norm": 0.6153595447540283, + "learning_rate": 1.9531279083540623e-05, + "loss": 0.2775, "step": 1385 }, { - "epoch": 0.035283665439776624, - "grad_norm": 2.0769875049591064, - "learning_rate": 1.9764775563734825e-05, - "loss": 0.3446, + "epoch": 0.07056195745976954, + "grad_norm": 0.9495353102684021, + "learning_rate": 1.9529586950268203e-05, + "loss": 0.2902, "step": 1390 }, { - "epoch": 0.035410585099631936, - "grad_norm": 1.243664026260376, - "learning_rate": 1.9763929432669124e-05, - "loss": 0.3551, + "epoch": 0.07081577745063201, + "grad_norm": 1.0067836046218872, + "learning_rate": 1.9527894816995787e-05, + "loss": 0.2918, "step": 1395 }, { - "epoch": 0.03553750475948724, - "grad_norm": 1.6385518312454224, - "learning_rate": 1.9763083301603422e-05, - "loss": 0.3594, + "epoch": 0.07106959744149449, + "grad_norm": 1.068027377128601, + "learning_rate": 1.952620268372337e-05, + "loss": 0.2845, "step": 1400 }, { - "epoch": 0.035664424419342554, - "grad_norm": 1.4778023958206177, - "learning_rate": 1.9762237170537717e-05, - "loss": 0.3259, + "epoch": 0.07132341743235697, + "grad_norm": 1.1914137601852417, + "learning_rate": 1.9524510550450954e-05, + "loss": 0.2867, "step": 1405 }, { - "epoch": 0.035791344079197866, - "grad_norm": 1.35469651222229, - "learning_rate": 1.9761391039472015e-05, - "loss": 0.3567, + "epoch": 0.07157723742321945, + "grad_norm": 0.9969173669815063, + "learning_rate": 1.952281841717854e-05, + "loss": 0.2888, "step": 1410 }, { - "epoch": 0.03591826373905318, - "grad_norm": 1.3091235160827637, - "learning_rate": 1.9760544908406314e-05, - "loss": 0.3609, + "epoch": 0.07183105741408194, + "grad_norm": 0.8332482576370239, + "learning_rate": 1.952112628390612e-05, + "loss": 0.2833, "step": 1415 }, { - "epoch": 0.03604518339890849, - "grad_norm": 1.6333122253417969, - "learning_rate": 1.9759698777340612e-05, - "loss": 0.348, + "epoch": 0.07208487740494442, + "grad_norm": 1.3746370077133179, + "learning_rate": 1.9519434150633705e-05, + "loss": 0.2867, "step": 1420 }, { - "epoch": 0.0361721030587638, - "grad_norm": 1.461905598640442, - "learning_rate": 1.9758852646274907e-05, - "loss": 0.3241, + "epoch": 0.0723386973958069, + "grad_norm": 0.6186951398849487, + "learning_rate": 1.9517742017361288e-05, + "loss": 0.2754, "step": 1425 }, { - "epoch": 0.036299022718619116, - "grad_norm": 1.0004135370254517, - "learning_rate": 1.9758006515209206e-05, - "loss": 0.3411, + "epoch": 0.07259251738666937, + "grad_norm": 0.668218195438385, + "learning_rate": 1.951604988408887e-05, + "loss": 0.2538, "step": 1430 }, { - "epoch": 0.03642594237847443, - "grad_norm": 1.89811372756958, - "learning_rate": 1.9757160384143504e-05, - "loss": 0.3787, + "epoch": 0.07284633737753185, + "grad_norm": 0.900421679019928, + "learning_rate": 1.9514357750816455e-05, + "loss": 0.2657, "step": 1435 }, { - "epoch": 0.03655286203832974, - "grad_norm": 1.4608021974563599, - "learning_rate": 1.9756314253077803e-05, - "loss": 0.4002, + "epoch": 0.07310015736839433, + "grad_norm": 0.7691309452056885, + "learning_rate": 1.951266561754404e-05, + "loss": 0.3226, "step": 1440 }, { - "epoch": 0.036679781698185046, - "grad_norm": 1.1376680135726929, - "learning_rate": 1.97554681220121e-05, - "loss": 0.3393, + "epoch": 0.07335397735925682, + "grad_norm": 0.9059398770332336, + "learning_rate": 1.9510973484271622e-05, + "loss": 0.3005, "step": 1445 }, { - "epoch": 0.03680670135804036, - "grad_norm": 3.8639371395111084, - "learning_rate": 1.97546219909464e-05, - "loss": 0.3973, + "epoch": 0.0736077973501193, + "grad_norm": 1.2089287042617798, + "learning_rate": 1.9509281350999206e-05, + "loss": 0.2816, "step": 1450 }, { - "epoch": 0.03693362101789567, - "grad_norm": 1.3336995840072632, - "learning_rate": 1.9753775859880698e-05, - "loss": 0.3528, + "epoch": 0.07386161734098178, + "grad_norm": 0.8258754014968872, + "learning_rate": 1.950758921772679e-05, + "loss": 0.2874, "step": 1455 }, { - "epoch": 0.03706054067775098, - "grad_norm": 1.1879972219467163, - "learning_rate": 1.9752929728814996e-05, - "loss": 0.4305, + "epoch": 0.07411543733184425, + "grad_norm": 0.7340827584266663, + "learning_rate": 1.9505897084454373e-05, + "loss": 0.2884, "step": 1460 }, { - "epoch": 0.037187460337606296, - "grad_norm": 1.1704986095428467, - "learning_rate": 1.975208359774929e-05, - "loss": 0.3458, + "epoch": 0.07436925732270673, + "grad_norm": 0.741913914680481, + "learning_rate": 1.9504204951181956e-05, + "loss": 0.2929, "step": 1465 }, { - "epoch": 0.03731437999746161, - "grad_norm": 1.4859542846679688, - "learning_rate": 1.975123746668359e-05, - "loss": 0.3956, + "epoch": 0.07462307731356922, + "grad_norm": 0.7973793745040894, + "learning_rate": 1.950251281790954e-05, + "loss": 0.2963, "step": 1470 }, { - "epoch": 0.03744129965731692, - "grad_norm": 1.3937151432037354, - "learning_rate": 1.9750391335617888e-05, - "loss": 0.3329, + "epoch": 0.0748768973044317, + "grad_norm": 0.9016576409339905, + "learning_rate": 1.9500820684637124e-05, + "loss": 0.265, "step": 1475 }, { - "epoch": 0.03756821931717223, - "grad_norm": 0.9847153425216675, - "learning_rate": 1.9749545204552186e-05, - "loss": 0.3637, + "epoch": 0.07513071729529418, + "grad_norm": 1.0395702123641968, + "learning_rate": 1.9499128551364707e-05, + "loss": 0.3539, "step": 1480 }, { - "epoch": 0.03769513897702754, - "grad_norm": 1.2575620412826538, - "learning_rate": 1.9748699073486485e-05, - "loss": 0.3421, + "epoch": 0.07538453728615666, + "grad_norm": 0.7906758189201355, + "learning_rate": 1.949743641809229e-05, + "loss": 0.2769, "step": 1485 }, { - "epoch": 0.03782205863688285, - "grad_norm": 1.5573993921279907, - "learning_rate": 1.9747852942420783e-05, - "loss": 0.3611, + "epoch": 0.07563835727701913, + "grad_norm": 0.7581782341003418, + "learning_rate": 1.9495744284819874e-05, + "loss": 0.2911, "step": 1490 }, { - "epoch": 0.03794897829673816, - "grad_norm": 1.7347296476364136, - "learning_rate": 1.974700681135508e-05, - "loss": 0.3239, + "epoch": 0.07589217726788162, + "grad_norm": 0.9939795732498169, + "learning_rate": 1.9494052151547458e-05, + "loss": 0.2759, "step": 1495 }, { - "epoch": 0.038075897956593475, - "grad_norm": 1.744893193244934, - "learning_rate": 1.974616068028938e-05, - "loss": 0.3249, + "epoch": 0.0761459972587441, + "grad_norm": 0.7973515391349792, + "learning_rate": 1.949236001827504e-05, + "loss": 0.2673, "step": 1500 }, { - "epoch": 0.03820281761644879, - "grad_norm": 1.2477973699569702, - "learning_rate": 1.9745314549223675e-05, - "loss": 0.3158, + "epoch": 0.07639981724960658, + "grad_norm": 0.793901264667511, + "learning_rate": 1.9490667885002625e-05, + "loss": 0.2643, "step": 1505 }, { - "epoch": 0.0383297372763041, - "grad_norm": 1.36749267578125, - "learning_rate": 1.9744468418157973e-05, - "loss": 0.3497, + "epoch": 0.07665363724046906, + "grad_norm": 0.9730905294418335, + "learning_rate": 1.948897575173021e-05, + "loss": 0.2505, "step": 1510 }, { - "epoch": 0.03845665693615941, - "grad_norm": 1.0900284051895142, - "learning_rate": 1.9743622287092272e-05, - "loss": 0.3555, + "epoch": 0.07690745723133154, + "grad_norm": 0.7291544079780579, + "learning_rate": 1.9487283618457792e-05, + "loss": 0.2337, "step": 1515 }, { - "epoch": 0.038583576596014725, - "grad_norm": 1.0046569108963013, - "learning_rate": 1.974277615602657e-05, - "loss": 0.3552, + "epoch": 0.07716127722219401, + "grad_norm": 0.836552083492279, + "learning_rate": 1.9485591485185375e-05, + "loss": 0.274, "step": 1520 }, { - "epoch": 0.03871049625587004, - "grad_norm": 1.339502215385437, - "learning_rate": 1.974193002496087e-05, - "loss": 0.3492, + "epoch": 0.0774150972130565, + "grad_norm": 0.7705031037330627, + "learning_rate": 1.948389935191296e-05, + "loss": 0.2706, "step": 1525 }, { - "epoch": 0.03883741591572534, - "grad_norm": 0.951333224773407, - "learning_rate": 1.9741083893895167e-05, - "loss": 0.3138, + "epoch": 0.07766891720391898, + "grad_norm": 0.6896925568580627, + "learning_rate": 1.9482207218640543e-05, + "loss": 0.2807, "step": 1530 }, { - "epoch": 0.038964335575580655, - "grad_norm": 1.309401035308838, - "learning_rate": 1.9740237762829465e-05, - "loss": 0.3389, + "epoch": 0.07792273719478146, + "grad_norm": 1.161415696144104, + "learning_rate": 1.9480515085368126e-05, + "loss": 0.2574, "step": 1535 }, { - "epoch": 0.03909125523543597, - "grad_norm": 2.1443169116973877, - "learning_rate": 1.9739391631763764e-05, - "loss": 0.3007, + "epoch": 0.07817655718564394, + "grad_norm": 1.0795843601226807, + "learning_rate": 1.947882295209571e-05, + "loss": 0.2574, "step": 1540 }, { - "epoch": 0.03921817489529128, - "grad_norm": 1.173959732055664, - "learning_rate": 1.973854550069806e-05, - "loss": 0.3503, + "epoch": 0.07843037717650642, + "grad_norm": 0.6460023522377014, + "learning_rate": 1.947713081882329e-05, + "loss": 0.2794, "step": 1545 }, { - "epoch": 0.03934509455514659, - "grad_norm": 1.5687839984893799, - "learning_rate": 1.9737699369632357e-05, - "loss": 0.3157, + "epoch": 0.07868419716736891, + "grad_norm": 0.9282433390617371, + "learning_rate": 1.9475438685550877e-05, + "loss": 0.2564, "step": 1550 }, { - "epoch": 0.039472014215001905, - "grad_norm": 1.1727139949798584, - "learning_rate": 1.9736853238566656e-05, - "loss": 0.3635, + "epoch": 0.07893801715823139, + "grad_norm": 0.9009420871734619, + "learning_rate": 1.9473746552278457e-05, + "loss": 0.2607, "step": 1555 }, { - "epoch": 0.03959893387485722, - "grad_norm": 1.3723936080932617, - "learning_rate": 1.9736007107500954e-05, - "loss": 0.3087, + "epoch": 0.07919183714909386, + "grad_norm": 0.8371971845626831, + "learning_rate": 1.9472054419006044e-05, + "loss": 0.2732, "step": 1560 }, { - "epoch": 0.03972585353471253, - "grad_norm": 1.3399128913879395, - "learning_rate": 1.9735160976435252e-05, - "loss": 0.3557, + "epoch": 0.07944565713995634, + "grad_norm": 0.9188066124916077, + "learning_rate": 1.9470362285733627e-05, + "loss": 0.2457, "step": 1565 }, { - "epoch": 0.039852773194567835, - "grad_norm": 0.9384336471557617, - "learning_rate": 1.9734314845369547e-05, - "loss": 0.3377, + "epoch": 0.07969947713081882, + "grad_norm": 0.7665749788284302, + "learning_rate": 1.9468670152461208e-05, + "loss": 0.2373, "step": 1570 }, { - "epoch": 0.03997969285442315, - "grad_norm": 2.1580910682678223, - "learning_rate": 1.9733468714303846e-05, - "loss": 0.339, + "epoch": 0.07995329712168131, + "grad_norm": 1.9648408889770508, + "learning_rate": 1.9466978019188794e-05, + "loss": 0.2852, "step": 1575 }, { - "epoch": 0.04010661251427846, - "grad_norm": 1.0932481288909912, - "learning_rate": 1.9732622583238144e-05, - "loss": 0.3098, + "epoch": 0.08020711711254379, + "grad_norm": 0.8872603178024292, + "learning_rate": 1.9465285885916375e-05, + "loss": 0.3144, "step": 1580 }, { - "epoch": 0.04023353217413377, - "grad_norm": 14.296411514282227, - "learning_rate": 1.9731776452172443e-05, - "loss": 0.3827, + "epoch": 0.08046093710340627, + "grad_norm": 1.1950451135635376, + "learning_rate": 1.9463593752643958e-05, + "loss": 0.2723, "step": 1585 }, { - "epoch": 0.040360451833989085, - "grad_norm": 1.6708861589431763, - "learning_rate": 1.973093032110674e-05, - "loss": 0.3468, + "epoch": 0.08071475709426874, + "grad_norm": 0.8541845083236694, + "learning_rate": 1.9461901619371545e-05, + "loss": 0.2556, "step": 1590 }, { - "epoch": 0.0404873714938444, - "grad_norm": 1.262216329574585, - "learning_rate": 1.973008419004104e-05, - "loss": 0.2943, + "epoch": 0.08096857708513122, + "grad_norm": 0.8095240592956543, + "learning_rate": 1.9460209486099125e-05, + "loss": 0.2707, "step": 1595 }, { - "epoch": 0.04061429115369971, - "grad_norm": 1.486764669418335, - "learning_rate": 1.9729238058975338e-05, - "loss": 0.3268, + "epoch": 0.0812223970759937, + "grad_norm": 0.8487701416015625, + "learning_rate": 1.9458517352826712e-05, + "loss": 0.2712, "step": 1600 }, { - "epoch": 0.04074121081355502, - "grad_norm": 1.14958655834198, - "learning_rate": 1.9728391927909636e-05, - "loss": 0.353, + "epoch": 0.08147621706685619, + "grad_norm": 1.3312042951583862, + "learning_rate": 1.9456825219554292e-05, + "loss": 0.2592, "step": 1605 }, { - "epoch": 0.040868130473410334, - "grad_norm": 1.1040925979614258, - "learning_rate": 1.972754579684393e-05, - "loss": 0.3188, + "epoch": 0.08173003705771867, + "grad_norm": 0.7110800743103027, + "learning_rate": 1.9455133086281876e-05, + "loss": 0.2652, "step": 1610 }, { - "epoch": 0.04099505013326564, - "grad_norm": 0.9603783488273621, - "learning_rate": 1.972669966577823e-05, - "loss": 0.3346, + "epoch": 0.08198385704858115, + "grad_norm": 0.933272659778595, + "learning_rate": 1.9453440953009463e-05, + "loss": 0.3137, "step": 1615 }, { - "epoch": 0.04112196979312095, - "grad_norm": 1.0495407581329346, - "learning_rate": 1.9725853534712528e-05, - "loss": 0.3129, + "epoch": 0.08223767703944362, + "grad_norm": 1.1123526096343994, + "learning_rate": 1.9451748819737043e-05, + "loss": 0.2681, "step": 1620 }, { - "epoch": 0.041248889452976265, - "grad_norm": 1.4841678142547607, - "learning_rate": 1.9725007403646826e-05, - "loss": 0.3034, + "epoch": 0.0824914970303061, + "grad_norm": 1.181645393371582, + "learning_rate": 1.9450056686464627e-05, + "loss": 0.2715, "step": 1625 }, { - "epoch": 0.04137580911283158, - "grad_norm": 1.5129817724227905, - "learning_rate": 1.9724161272581125e-05, - "loss": 0.3674, + "epoch": 0.0827453170211686, + "grad_norm": 1.169384241104126, + "learning_rate": 1.944836455319221e-05, + "loss": 0.2534, "step": 1630 }, { - "epoch": 0.04150272877268689, - "grad_norm": 1.240013837814331, - "learning_rate": 1.9723315141515423e-05, - "loss": 0.3015, + "epoch": 0.08299913701203107, + "grad_norm": 0.8438488245010376, + "learning_rate": 1.9446672419919794e-05, + "loss": 0.2797, "step": 1635 }, { - "epoch": 0.0416296484325422, - "grad_norm": 1.219146966934204, - "learning_rate": 1.972246901044972e-05, - "loss": 0.351, + "epoch": 0.08325295700289355, + "grad_norm": 0.6939857602119446, + "learning_rate": 1.9444980286647377e-05, + "loss": 0.2649, "step": 1640 }, { - "epoch": 0.041756568092397514, - "grad_norm": 1.1922099590301514, - "learning_rate": 1.972162287938402e-05, - "loss": 0.3082, + "epoch": 0.08350677699375603, + "grad_norm": 0.803497314453125, + "learning_rate": 1.944328815337496e-05, + "loss": 0.2699, "step": 1645 }, { - "epoch": 0.041883487752252826, - "grad_norm": 1.133436918258667, - "learning_rate": 1.9720776748318315e-05, - "loss": 0.323, + "epoch": 0.0837605969846185, + "grad_norm": 0.6957250237464905, + "learning_rate": 1.9441596020102544e-05, + "loss": 0.2411, "step": 1650 }, { - "epoch": 0.04201040741210814, - "grad_norm": 1.3422448635101318, - "learning_rate": 1.9719930617252613e-05, - "loss": 0.3546, + "epoch": 0.08401441697548098, + "grad_norm": 0.8274784684181213, + "learning_rate": 1.9439903886830128e-05, + "loss": 0.2345, "step": 1655 }, { - "epoch": 0.042137327071963444, - "grad_norm": 1.011436104774475, - "learning_rate": 1.9719084486186912e-05, - "loss": 0.3107, + "epoch": 0.08426823696634347, + "grad_norm": 0.9105510711669922, + "learning_rate": 1.943821175355771e-05, + "loss": 0.2779, "step": 1660 }, { - "epoch": 0.04226424673181876, - "grad_norm": 1.313360571861267, - "learning_rate": 1.971823835512121e-05, - "loss": 0.3315, + "epoch": 0.08452205695720595, + "grad_norm": 0.8826274275779724, + "learning_rate": 1.9436519620285295e-05, + "loss": 0.2501, "step": 1665 }, { - "epoch": 0.04239116639167407, - "grad_norm": 1.1215654611587524, - "learning_rate": 1.971739222405551e-05, - "loss": 0.2874, + "epoch": 0.08477587694806843, + "grad_norm": 1.227468729019165, + "learning_rate": 1.943482748701288e-05, + "loss": 0.2628, "step": 1670 }, { - "epoch": 0.04251808605152938, - "grad_norm": 1.0646857023239136, - "learning_rate": 1.9716546092989807e-05, - "loss": 0.3404, + "epoch": 0.08502969693893091, + "grad_norm": 0.8807783722877502, + "learning_rate": 1.9433135353740462e-05, + "loss": 0.319, "step": 1675 }, { - "epoch": 0.042645005711384694, - "grad_norm": 1.059161901473999, - "learning_rate": 1.9715699961924105e-05, - "loss": 0.3538, + "epoch": 0.08528351692979338, + "grad_norm": 0.829677164554596, + "learning_rate": 1.9431443220468045e-05, + "loss": 0.2426, "step": 1680 }, { - "epoch": 0.042771925371240006, - "grad_norm": 0.8854466676712036, - "learning_rate": 1.9714853830858404e-05, - "loss": 0.308, + "epoch": 0.08553733692065588, + "grad_norm": 0.7359742522239685, + "learning_rate": 1.942975108719563e-05, + "loss": 0.2444, "step": 1685 }, { - "epoch": 0.04289884503109532, - "grad_norm": 1.3497017621994019, - "learning_rate": 1.97140076997927e-05, - "loss": 0.3324, + "epoch": 0.08579115691151835, + "grad_norm": 0.8583529591560364, + "learning_rate": 1.9428058953923213e-05, + "loss": 0.2667, "step": 1690 }, { - "epoch": 0.04302576469095063, - "grad_norm": 1.5567781925201416, - "learning_rate": 1.9713161568726997e-05, - "loss": 0.3595, + "epoch": 0.08604497690238083, + "grad_norm": 0.8555963039398193, + "learning_rate": 1.9426366820650796e-05, + "loss": 0.2694, "step": 1695 }, { - "epoch": 0.04315268435080594, - "grad_norm": 1.0552408695220947, - "learning_rate": 1.9712315437661296e-05, - "loss": 0.2996, + "epoch": 0.08629879689324331, + "grad_norm": 0.7486214637756348, + "learning_rate": 1.942467468737838e-05, + "loss": 0.2464, "step": 1700 }, { - "epoch": 0.04327960401066125, - "grad_norm": 2.9463236331939697, - "learning_rate": 1.9711469306595594e-05, - "loss": 0.3113, + "epoch": 0.08655261688410579, + "grad_norm": 0.9317395687103271, + "learning_rate": 1.9422982554105963e-05, + "loss": 0.2421, "step": 1705 }, { - "epoch": 0.04340652367051656, - "grad_norm": 0.8234306573867798, - "learning_rate": 1.971062317552989e-05, - "loss": 0.2931, + "epoch": 0.08680643687496828, + "grad_norm": 1.1423553228378296, + "learning_rate": 1.9421290420833547e-05, + "loss": 0.2773, "step": 1710 }, { - "epoch": 0.043533443330371874, - "grad_norm": 1.280196189880371, - "learning_rate": 1.9709777044464188e-05, - "loss": 0.276, + "epoch": 0.08706025686583076, + "grad_norm": 0.7504492402076721, + "learning_rate": 1.941959828756113e-05, + "loss": 0.2531, "step": 1715 }, { - "epoch": 0.043660362990227186, - "grad_norm": 1.3435417413711548, - "learning_rate": 1.9708930913398486e-05, - "loss": 0.2913, + "epoch": 0.08731407685669323, + "grad_norm": 0.7574618458747864, + "learning_rate": 1.9417906154288714e-05, + "loss": 0.2537, "step": 1720 }, { - "epoch": 0.0437872826500825, - "grad_norm": 1.3524742126464844, - "learning_rate": 1.9708084782332784e-05, - "loss": 0.3268, + "epoch": 0.08756789684755571, + "grad_norm": 0.7793534994125366, + "learning_rate": 1.9416214021016297e-05, + "loss": 0.2856, "step": 1725 }, { - "epoch": 0.04391420230993781, - "grad_norm": 1.0745275020599365, - "learning_rate": 1.9707238651267083e-05, - "loss": 0.3093, + "epoch": 0.08782171683841819, + "grad_norm": 0.7327775955200195, + "learning_rate": 1.941452188774388e-05, + "loss": 0.2499, "step": 1730 }, { - "epoch": 0.04404112196979312, - "grad_norm": 1.1804392337799072, - "learning_rate": 1.970639252020138e-05, - "loss": 0.309, + "epoch": 0.08807553682928067, + "grad_norm": 0.7954163551330566, + "learning_rate": 1.941282975447146e-05, + "loss": 0.2599, "step": 1735 }, { - "epoch": 0.044168041629648436, - "grad_norm": 1.388710379600525, - "learning_rate": 1.970554638913568e-05, - "loss": 0.3484, + "epoch": 0.08832935682014316, + "grad_norm": 0.7551532983779907, + "learning_rate": 1.9411137621199048e-05, + "loss": 0.2548, "step": 1740 }, { - "epoch": 0.04429496128950374, - "grad_norm": 1.2118122577667236, - "learning_rate": 1.9704700258069978e-05, - "loss": 0.3624, + "epoch": 0.08858317681100564, + "grad_norm": 0.8406553864479065, + "learning_rate": 1.940944548792663e-05, + "loss": 0.2291, "step": 1745 }, { - "epoch": 0.044421880949359054, - "grad_norm": 1.5213100910186768, - "learning_rate": 1.9703854127004273e-05, - "loss": 0.3031, + "epoch": 0.08883699680186811, + "grad_norm": 0.6450394988059998, + "learning_rate": 1.9407753354654215e-05, + "loss": 0.2654, "step": 1750 }, { - "epoch": 0.044548800609214366, - "grad_norm": 1.3896737098693848, - "learning_rate": 1.970300799593857e-05, - "loss": 0.3351, + "epoch": 0.08909081679273059, + "grad_norm": 0.6348497271537781, + "learning_rate": 1.94060612213818e-05, + "loss": 0.2302, "step": 1755 }, { - "epoch": 0.04467572026906968, - "grad_norm": 1.1387258768081665, - "learning_rate": 1.970216186487287e-05, - "loss": 0.2727, + "epoch": 0.08934463678359307, + "grad_norm": 0.589336097240448, + "learning_rate": 1.940436908810938e-05, + "loss": 0.242, "step": 1760 }, { - "epoch": 0.04480263992892499, - "grad_norm": 1.3927512168884277, - "learning_rate": 1.9701315733807168e-05, - "loss": 0.2996, + "epoch": 0.08959845677445556, + "grad_norm": 1.3192259073257446, + "learning_rate": 1.9402676954836966e-05, + "loss": 0.2606, "step": 1765 }, { - "epoch": 0.0449295595887803, - "grad_norm": 1.0781632661819458, - "learning_rate": 1.9700469602741467e-05, - "loss": 0.2957, + "epoch": 0.08985227676531804, + "grad_norm": 1.3367799520492554, + "learning_rate": 1.940098482156455e-05, + "loss": 0.2755, "step": 1770 }, { - "epoch": 0.045056479248635616, - "grad_norm": 0.910588800907135, - "learning_rate": 1.9699623471675765e-05, - "loss": 0.2849, + "epoch": 0.09010609675618052, + "grad_norm": 0.8470580577850342, + "learning_rate": 1.939929268829213e-05, + "loss": 0.2596, "step": 1775 }, { - "epoch": 0.04518339890849093, - "grad_norm": 2.040825366973877, - "learning_rate": 1.9698777340610063e-05, - "loss": 0.2944, + "epoch": 0.090359916747043, + "grad_norm": 0.7992636561393738, + "learning_rate": 1.9397600555019716e-05, + "loss": 0.259, "step": 1780 }, { - "epoch": 0.04531031856834623, - "grad_norm": 1.7719142436981201, - "learning_rate": 1.9697931209544362e-05, - "loss": 0.2915, + "epoch": 0.09061373673790547, + "grad_norm": 0.6407599449157715, + "learning_rate": 1.9395908421747297e-05, + "loss": 0.2247, "step": 1785 }, { - "epoch": 0.045437238228201546, - "grad_norm": 1.4313111305236816, - "learning_rate": 1.9697085078478657e-05, - "loss": 0.2928, + "epoch": 0.09086755672876795, + "grad_norm": 0.9677866101264954, + "learning_rate": 1.939421628847488e-05, + "loss": 0.2329, "step": 1790 }, { - "epoch": 0.04556415788805686, - "grad_norm": 1.1658649444580078, - "learning_rate": 1.9696238947412955e-05, - "loss": 0.3358, + "epoch": 0.09112137671963044, + "grad_norm": 0.7822664976119995, + "learning_rate": 1.9392524155202467e-05, + "loss": 0.2402, "step": 1795 }, { - "epoch": 0.04569107754791217, - "grad_norm": 1.22987699508667, - "learning_rate": 1.9695392816347254e-05, - "loss": 0.3415, + "epoch": 0.09137519671049292, + "grad_norm": 0.7724019289016724, + "learning_rate": 1.9390832021930047e-05, + "loss": 0.2504, "step": 1800 }, { - "epoch": 0.04581799720776748, - "grad_norm": 1.1061509847640991, - "learning_rate": 1.9694546685281552e-05, - "loss": 0.2857, + "epoch": 0.0916290167013554, + "grad_norm": 0.7643800973892212, + "learning_rate": 1.9389139888657634e-05, + "loss": 0.2409, "step": 1805 }, { - "epoch": 0.045944916867622795, - "grad_norm": 1.2974392175674438, - "learning_rate": 1.969370055421585e-05, - "loss": 0.2969, + "epoch": 0.09188283669221788, + "grad_norm": 0.84905606508255, + "learning_rate": 1.9387447755385214e-05, + "loss": 0.2391, "step": 1810 }, { - "epoch": 0.04607183652747811, - "grad_norm": 0.9391538500785828, - "learning_rate": 1.969285442315015e-05, - "loss": 0.326, + "epoch": 0.09213665668308035, + "grad_norm": 3.136202812194824, + "learning_rate": 1.9385755622112798e-05, + "loss": 0.3056, "step": 1815 }, { - "epoch": 0.04619875618733342, - "grad_norm": 1.2237040996551514, - "learning_rate": 1.9692008292084447e-05, - "loss": 0.3225, + "epoch": 0.09239047667394285, + "grad_norm": 1.0175585746765137, + "learning_rate": 1.9384063488840385e-05, + "loss": 0.2591, "step": 1820 }, { - "epoch": 0.04632567584718873, - "grad_norm": 1.7678003311157227, - "learning_rate": 1.9691162161018746e-05, - "loss": 0.3228, + "epoch": 0.09264429666480532, + "grad_norm": 1.1569483280181885, + "learning_rate": 1.9382371355567965e-05, + "loss": 0.2738, "step": 1825 }, { - "epoch": 0.04645259550704404, - "grad_norm": 0.9727216958999634, - "learning_rate": 1.969031602995304e-05, - "loss": 0.3169, + "epoch": 0.0928981166556678, + "grad_norm": 0.648151159286499, + "learning_rate": 1.938067922229555e-05, + "loss": 0.2288, "step": 1830 }, { - "epoch": 0.04657951516689935, - "grad_norm": 1.6418404579162598, - "learning_rate": 1.968946989888734e-05, - "loss": 0.427, + "epoch": 0.09315193664653028, + "grad_norm": 0.5930745005607605, + "learning_rate": 1.9378987089023132e-05, + "loss": 0.2324, "step": 1835 }, { - "epoch": 0.04670643482675466, - "grad_norm": 17.158559799194336, - "learning_rate": 1.9688623767821637e-05, - "loss": 0.3755, + "epoch": 0.09340575663739276, + "grad_norm": 0.7623791098594666, + "learning_rate": 1.9377294955750716e-05, + "loss": 0.2287, "step": 1840 }, { - "epoch": 0.046833354486609975, - "grad_norm": 1.1064811944961548, - "learning_rate": 1.9687777636755936e-05, - "loss": 0.2791, + "epoch": 0.09365957662825525, + "grad_norm": 1.552193284034729, + "learning_rate": 1.9375602822478302e-05, + "loss": 0.2388, "step": 1845 }, { - "epoch": 0.04696027414646529, - "grad_norm": 1.051291584968567, - "learning_rate": 1.968693150569023e-05, - "loss": 0.3142, + "epoch": 0.09391339661911773, + "grad_norm": 0.6569783687591553, + "learning_rate": 1.9373910689205883e-05, + "loss": 0.2415, "step": 1850 }, { - "epoch": 0.0470871938063206, - "grad_norm": 1.1256203651428223, - "learning_rate": 1.968608537462453e-05, - "loss": 0.303, + "epoch": 0.0941672166099802, + "grad_norm": 0.8682032823562622, + "learning_rate": 1.9372218555933466e-05, + "loss": 0.2376, "step": 1855 }, { - "epoch": 0.04721411346617591, - "grad_norm": 1.1579105854034424, - "learning_rate": 1.9685239243558828e-05, - "loss": 0.2887, + "epoch": 0.09442103660084268, + "grad_norm": 0.7503842115402222, + "learning_rate": 1.937052642266105e-05, + "loss": 0.2479, "step": 1860 }, { - "epoch": 0.047341033126031225, - "grad_norm": 1.1546697616577148, - "learning_rate": 1.9684393112493126e-05, - "loss": 0.2829, + "epoch": 0.09467485659170516, + "grad_norm": 1.0479599237442017, + "learning_rate": 1.9368834289388633e-05, + "loss": 0.2598, "step": 1865 }, { - "epoch": 0.04746795278588654, - "grad_norm": 1.2457284927368164, - "learning_rate": 1.9683546981427424e-05, - "loss": 0.2969, + "epoch": 0.09492867658256764, + "grad_norm": 3.8259048461914062, + "learning_rate": 1.9367142156116217e-05, + "loss": 0.2883, "step": 1870 }, { - "epoch": 0.04759487244574184, - "grad_norm": 1.3598731756210327, - "learning_rate": 1.9682700850361723e-05, - "loss": 0.3386, + "epoch": 0.09518249657343013, + "grad_norm": 0.7968519330024719, + "learning_rate": 1.93654500228438e-05, + "loss": 0.2404, "step": 1875 }, { - "epoch": 0.047721792105597155, - "grad_norm": 0.9349241852760315, - "learning_rate": 1.968185471929602e-05, - "loss": 0.2716, + "epoch": 0.0954363165642926, + "grad_norm": 0.9822812080383301, + "learning_rate": 1.9363757889571384e-05, + "loss": 0.2585, "step": 1880 }, { - "epoch": 0.04784871176545247, - "grad_norm": 2.084773540496826, - "learning_rate": 1.968100858823032e-05, - "loss": 0.2917, + "epoch": 0.09569013655515508, + "grad_norm": 1.0278716087341309, + "learning_rate": 1.9362065756298967e-05, + "loss": 0.2263, "step": 1885 }, { - "epoch": 0.04797563142530778, - "grad_norm": 1.3005043268203735, - "learning_rate": 1.9680162457164615e-05, - "loss": 0.3182, + "epoch": 0.09594395654601756, + "grad_norm": 0.7152266502380371, + "learning_rate": 1.936037362302655e-05, + "loss": 0.2329, "step": 1890 }, { - "epoch": 0.04810255108516309, - "grad_norm": 0.9799903631210327, - "learning_rate": 1.9679316326098913e-05, - "loss": 0.2995, + "epoch": 0.09619777653688004, + "grad_norm": 0.9978631138801575, + "learning_rate": 1.9358681489754135e-05, + "loss": 0.2461, "step": 1895 }, { - "epoch": 0.048229470745018405, - "grad_norm": 1.527892827987671, - "learning_rate": 1.967847019503321e-05, - "loss": 0.2954, + "epoch": 0.09645159652774253, + "grad_norm": 0.8091859221458435, + "learning_rate": 1.9356989356481718e-05, + "loss": 0.248, "step": 1900 }, { - "epoch": 0.04835639040487372, - "grad_norm": 2.011814594268799, - "learning_rate": 1.967762406396751e-05, - "loss": 0.3003, + "epoch": 0.09670541651860501, + "grad_norm": 0.714453935623169, + "learning_rate": 1.93552972232093e-05, + "loss": 0.2521, "step": 1905 }, { - "epoch": 0.04848331006472903, - "grad_norm": 1.2556824684143066, - "learning_rate": 1.9676777932901808e-05, - "loss": 0.3068, + "epoch": 0.09695923650946749, + "grad_norm": 0.9081348180770874, + "learning_rate": 1.9353605089936885e-05, + "loss": 0.2538, "step": 1910 }, { - "epoch": 0.048610229724584335, - "grad_norm": 1.090436339378357, - "learning_rate": 1.9675931801836107e-05, - "loss": 0.3111, + "epoch": 0.09721305650032996, + "grad_norm": 1.0938876867294312, + "learning_rate": 1.935191295666447e-05, + "loss": 0.2338, "step": 1915 }, { - "epoch": 0.04873714938443965, - "grad_norm": 1.9537619352340698, - "learning_rate": 1.9675085670770405e-05, - "loss": 0.3054, + "epoch": 0.09746687649119244, + "grad_norm": 0.6565501093864441, + "learning_rate": 1.9350220823392052e-05, + "loss": 0.2505, "step": 1920 }, { - "epoch": 0.04886406904429496, - "grad_norm": 0.8241440653800964, - "learning_rate": 1.9674239539704703e-05, - "loss": 0.2786, + "epoch": 0.09772069648205493, + "grad_norm": 0.615381121635437, + "learning_rate": 1.9348528690119636e-05, + "loss": 0.228, "step": 1925 }, { - "epoch": 0.04899098870415027, - "grad_norm": 1.7155784368515015, - "learning_rate": 1.9673393408639e-05, - "loss": 0.2903, + "epoch": 0.09797451647291741, + "grad_norm": 1.128936529159546, + "learning_rate": 1.934683655684722e-05, + "loss": 0.2206, "step": 1930 }, { - "epoch": 0.049117908364005584, - "grad_norm": 1.9285296201705933, - "learning_rate": 1.9672547277573297e-05, - "loss": 0.3009, + "epoch": 0.09822833646377989, + "grad_norm": 0.9524216651916504, + "learning_rate": 1.9345144423574803e-05, + "loss": 0.2608, "step": 1935 }, { - "epoch": 0.0492448280238609, - "grad_norm": 1.094623327255249, - "learning_rate": 1.9671701146507595e-05, - "loss": 0.2639, + "epoch": 0.09848215645464237, + "grad_norm": 0.7258911728858948, + "learning_rate": 1.9343452290302383e-05, + "loss": 0.247, "step": 1940 }, { - "epoch": 0.04937174768371621, - "grad_norm": 1.1016596555709839, - "learning_rate": 1.9670855015441894e-05, - "loss": 0.2895, + "epoch": 0.09873597644550484, + "grad_norm": 0.7854008674621582, + "learning_rate": 1.934176015702997e-05, + "loss": 0.2611, "step": 1945 }, { - "epoch": 0.04949866734357152, - "grad_norm": 1.205510139465332, - "learning_rate": 1.9670008884376192e-05, - "loss": 0.2817, + "epoch": 0.09898979643636732, + "grad_norm": 0.9118169546127319, + "learning_rate": 1.9340068023757554e-05, + "loss": 0.2428, "step": 1950 }, { - "epoch": 0.049625587003426834, - "grad_norm": 1.3268259763717651, - "learning_rate": 1.966916275331049e-05, - "loss": 0.3326, + "epoch": 0.09924361642722981, + "grad_norm": 0.9793441891670227, + "learning_rate": 1.9338375890485137e-05, + "loss": 0.2296, "step": 1955 }, { - "epoch": 0.04975250666328214, - "grad_norm": 1.222472071647644, - "learning_rate": 1.966831662224479e-05, - "loss": 0.3113, + "epoch": 0.09949743641809229, + "grad_norm": 0.601921558380127, + "learning_rate": 1.933668375721272e-05, + "loss": 0.2492, "step": 1960 }, { - "epoch": 0.04987942632313745, - "grad_norm": 1.153444528579712, - "learning_rate": 1.9667470491179087e-05, - "loss": 0.293, + "epoch": 0.09975125640895477, + "grad_norm": 0.6406486630439758, + "learning_rate": 1.93349916239403e-05, + "loss": 0.2456, "step": 1965 }, { - "epoch": 0.050006345982992764, - "grad_norm": 0.8176862001419067, - "learning_rate": 1.9666624360113382e-05, - "loss": 0.2676, + "epoch": 0.10000507639981725, + "grad_norm": 0.935122013092041, + "learning_rate": 1.9333299490667888e-05, + "loss": 0.2373, "step": 1970 }, { - "epoch": 0.05013326564284808, - "grad_norm": 0.9448211193084717, - "learning_rate": 1.966577822904768e-05, - "loss": 0.313, + "epoch": 0.10025889639067972, + "grad_norm": 0.6741816997528076, + "learning_rate": 1.933160735739547e-05, + "loss": 0.2215, "step": 1975 }, { - "epoch": 0.05026018530270339, - "grad_norm": 0.9713761806488037, - "learning_rate": 1.966493209798198e-05, - "loss": 0.3404, + "epoch": 0.10051271638154222, + "grad_norm": 1.050551414489746, + "learning_rate": 1.932991522412305e-05, + "loss": 0.2436, "step": 1980 }, { - "epoch": 0.0503871049625587, - "grad_norm": 1.2133755683898926, - "learning_rate": 1.9664085966916277e-05, - "loss": 0.299, + "epoch": 0.1007665363724047, + "grad_norm": 0.9753983020782471, + "learning_rate": 1.932822309085064e-05, + "loss": 0.2506, "step": 1985 }, { - "epoch": 0.050514024622414014, - "grad_norm": 1.1480767726898193, - "learning_rate": 1.9663239835850573e-05, - "loss": 0.3161, + "epoch": 0.10102035636326717, + "grad_norm": 0.5514481067657471, + "learning_rate": 1.932653095757822e-05, + "loss": 0.2287, "step": 1990 }, { - "epoch": 0.050640944282269326, - "grad_norm": 1.5364185571670532, - "learning_rate": 1.966239370478487e-05, - "loss": 0.328, + "epoch": 0.10127417635412965, + "grad_norm": 0.6138185262680054, + "learning_rate": 1.9324838824305805e-05, + "loss": 0.2546, "step": 1995 }, { - "epoch": 0.05076786394212463, - "grad_norm": 1.8201708793640137, - "learning_rate": 1.966154757371917e-05, - "loss": 0.3211, + "epoch": 0.10152799634499213, + "grad_norm": 0.7834172248840332, + "learning_rate": 1.932314669103339e-05, + "loss": 0.2398, "step": 2000 }, { - "epoch": 0.050894783601979944, - "grad_norm": 0.9378014206886292, - "learning_rate": 1.9660701442653468e-05, - "loss": 0.3339, + "epoch": 0.1017818163358546, + "grad_norm": 0.7345184683799744, + "learning_rate": 1.932145455776097e-05, + "loss": 0.2264, "step": 2005 }, { - "epoch": 0.051021703261835256, - "grad_norm": 0.999104917049408, - "learning_rate": 1.9659855311587766e-05, - "loss": 0.2828, + "epoch": 0.1020356363267171, + "grad_norm": 0.7603473663330078, + "learning_rate": 1.9319762424488556e-05, + "loss": 0.2579, "step": 2010 }, { - "epoch": 0.05114862292169057, - "grad_norm": 1.147027850151062, - "learning_rate": 1.9659009180522065e-05, - "loss": 0.2769, + "epoch": 0.10228945631757957, + "grad_norm": 0.5276412963867188, + "learning_rate": 1.9318070291216136e-05, + "loss": 0.2575, "step": 2015 }, { - "epoch": 0.05127554258154588, - "grad_norm": 0.960622251033783, - "learning_rate": 1.9658163049456363e-05, - "loss": 0.2699, + "epoch": 0.10254327630844205, + "grad_norm": 0.5985664129257202, + "learning_rate": 1.931637815794372e-05, + "loss": 0.2357, "step": 2020 }, { - "epoch": 0.051402462241401194, - "grad_norm": 0.8650773167610168, - "learning_rate": 1.965731691839066e-05, - "loss": 0.2743, + "epoch": 0.10279709629930453, + "grad_norm": 1.0004132986068726, + "learning_rate": 1.9314686024671307e-05, + "loss": 0.2415, "step": 2025 }, { - "epoch": 0.051529381901256506, - "grad_norm": 0.9522210359573364, - "learning_rate": 1.9656470787324956e-05, - "loss": 0.2966, + "epoch": 0.10305091629016701, + "grad_norm": 0.9058099389076233, + "learning_rate": 1.9312993891398887e-05, + "loss": 0.2489, "step": 2030 }, { - "epoch": 0.05165630156111182, - "grad_norm": 1.297050952911377, - "learning_rate": 1.9655624656259255e-05, - "loss": 0.3059, + "epoch": 0.1033047362810295, + "grad_norm": 0.6580535769462585, + "learning_rate": 1.931130175812647e-05, + "loss": 0.2282, "step": 2035 }, { - "epoch": 0.05178322122096713, - "grad_norm": 1.6889631748199463, - "learning_rate": 1.9654778525193553e-05, - "loss": 0.3033, + "epoch": 0.10355855627189198, + "grad_norm": 0.8212989568710327, + "learning_rate": 1.9309609624854054e-05, + "loss": 0.2486, "step": 2040 }, { - "epoch": 0.051910140880822436, - "grad_norm": 1.104992389678955, - "learning_rate": 1.965393239412785e-05, - "loss": 0.2874, + "epoch": 0.10381237626275445, + "grad_norm": 0.6100918650627136, + "learning_rate": 1.9307917491581637e-05, + "loss": 0.2817, "step": 2045 }, { - "epoch": 0.05203706054067775, - "grad_norm": 0.9527480006217957, - "learning_rate": 1.965308626306215e-05, - "loss": 0.3022, + "epoch": 0.10406619625361693, + "grad_norm": 0.9934706687927246, + "learning_rate": 1.9306225358309224e-05, + "loss": 0.2319, "step": 2050 }, { - "epoch": 0.05216398020053306, - "grad_norm": 1.2669683694839478, - "learning_rate": 1.965224013199645e-05, - "loss": 0.2926, + "epoch": 0.10432001624447941, + "grad_norm": 0.7338758111000061, + "learning_rate": 1.9304533225036805e-05, + "loss": 0.2284, "step": 2055 }, { - "epoch": 0.05229089986038837, - "grad_norm": 0.9837329983711243, - "learning_rate": 1.9651394000930747e-05, - "loss": 0.2597, + "epoch": 0.1045738362353419, + "grad_norm": 0.7655614614486694, + "learning_rate": 1.9302841091764388e-05, + "loss": 0.2106, "step": 2060 }, { - "epoch": 0.052417819520243686, - "grad_norm": 1.30946946144104, - "learning_rate": 1.9650547869865045e-05, - "loss": 0.2887, + "epoch": 0.10482765622620438, + "grad_norm": 0.8056983351707458, + "learning_rate": 1.930114895849197e-05, + "loss": 0.2301, "step": 2065 }, { - "epoch": 0.052544739180099, - "grad_norm": 1.149692177772522, - "learning_rate": 1.9649701738799344e-05, - "loss": 0.2809, + "epoch": 0.10508147621706686, + "grad_norm": 0.6935755610466003, + "learning_rate": 1.9299456825219555e-05, + "loss": 0.1977, "step": 2070 }, { - "epoch": 0.05267165883995431, - "grad_norm": 1.1292020082473755, - "learning_rate": 1.964885560773364e-05, - "loss": 0.2962, + "epoch": 0.10533529620792934, + "grad_norm": 0.7659327387809753, + "learning_rate": 1.929776469194714e-05, + "loss": 0.2319, "step": 2075 }, { - "epoch": 0.05279857849980962, - "grad_norm": 0.9312173128128052, - "learning_rate": 1.9648009476667937e-05, - "loss": 0.3036, + "epoch": 0.10558911619879181, + "grad_norm": 0.8755415081977844, + "learning_rate": 1.9296072558674722e-05, + "loss": 0.2531, "step": 2080 }, { - "epoch": 0.052925498159664935, - "grad_norm": 1.0098931789398193, - "learning_rate": 1.9647163345602235e-05, - "loss": 0.2564, + "epoch": 0.10584293618965429, + "grad_norm": 0.7641196846961975, + "learning_rate": 1.9294380425402306e-05, + "loss": 0.2262, "step": 2085 }, { - "epoch": 0.05305241781952024, - "grad_norm": 1.227110743522644, - "learning_rate": 1.9646317214536534e-05, - "loss": 0.2926, + "epoch": 0.10609675618051678, + "grad_norm": 0.5032942295074463, + "learning_rate": 1.929268829212989e-05, + "loss": 0.2371, "step": 2090 }, { - "epoch": 0.05317933747937555, - "grad_norm": 1.3902233839035034, - "learning_rate": 1.9645471083470832e-05, - "loss": 0.2958, + "epoch": 0.10635057617137926, + "grad_norm": 0.8890644311904907, + "learning_rate": 1.9290996158857473e-05, + "loss": 0.2262, "step": 2095 }, { - "epoch": 0.053306257139230866, - "grad_norm": 1.4526315927505493, - "learning_rate": 1.964462495240513e-05, - "loss": 0.2775, + "epoch": 0.10660439616224174, + "grad_norm": 0.6854649186134338, + "learning_rate": 1.9289304025585056e-05, + "loss": 0.2407, "step": 2100 }, { - "epoch": 0.05343317679908618, - "grad_norm": 1.8315290212631226, - "learning_rate": 1.964377882133943e-05, - "loss": 0.3036, + "epoch": 0.10685821615310422, + "grad_norm": 0.7222452163696289, + "learning_rate": 1.928761189231264e-05, + "loss": 0.2246, "step": 2105 }, { - "epoch": 0.05356009645894149, - "grad_norm": 1.089212417602539, - "learning_rate": 1.9642932690273727e-05, - "loss": 0.3, + "epoch": 0.1071120361439667, + "grad_norm": 0.5661618709564209, + "learning_rate": 1.9285919759040224e-05, + "loss": 0.236, "step": 2110 }, { - "epoch": 0.0536870161187968, - "grad_norm": 1.0979262590408325, - "learning_rate": 1.9642086559208022e-05, - "loss": 0.3529, + "epoch": 0.10736585613482919, + "grad_norm": 0.6868450045585632, + "learning_rate": 1.9284227625767807e-05, + "loss": 0.2206, "step": 2115 }, { - "epoch": 0.053813935778652115, - "grad_norm": 0.9019028544425964, - "learning_rate": 1.964124042814232e-05, - "loss": 0.2765, + "epoch": 0.10761967612569166, + "grad_norm": 0.6375486254692078, + "learning_rate": 1.928253549249539e-05, + "loss": 0.2075, "step": 2120 }, { - "epoch": 0.05394085543850743, - "grad_norm": 1.3091398477554321, - "learning_rate": 1.964039429707662e-05, - "loss": 0.3204, + "epoch": 0.10787349611655414, + "grad_norm": 0.7467653751373291, + "learning_rate": 1.9280843359222974e-05, + "loss": 0.2168, "step": 2125 }, { - "epoch": 0.05406777509836273, - "grad_norm": 1.1603845357894897, - "learning_rate": 1.9639548166010918e-05, - "loss": 0.2636, + "epoch": 0.10812731610741662, + "grad_norm": 1.1524518728256226, + "learning_rate": 1.9279151225950558e-05, + "loss": 0.2622, "step": 2130 }, { - "epoch": 0.054194694758218045, - "grad_norm": 0.9875229597091675, - "learning_rate": 1.9638702034945213e-05, - "loss": 0.3235, + "epoch": 0.1083811360982791, + "grad_norm": 0.8847719430923462, + "learning_rate": 1.927745909267814e-05, + "loss": 0.2297, "step": 2135 }, { - "epoch": 0.05432161441807336, - "grad_norm": 1.4558963775634766, - "learning_rate": 1.963785590387951e-05, - "loss": 0.2695, + "epoch": 0.10863495608914157, + "grad_norm": 1.002732515335083, + "learning_rate": 1.9275766959405725e-05, + "loss": 0.2322, "step": 2140 }, { - "epoch": 0.05444853407792867, - "grad_norm": 4.678642749786377, - "learning_rate": 1.963700977281381e-05, - "loss": 0.3094, + "epoch": 0.10888877608000407, + "grad_norm": 1.2165062427520752, + "learning_rate": 1.927407482613331e-05, + "loss": 0.2213, "step": 2145 }, { - "epoch": 0.05457545373778398, - "grad_norm": 0.9270572066307068, - "learning_rate": 1.9636163641748108e-05, - "loss": 0.3071, + "epoch": 0.10914259607086654, + "grad_norm": 0.7660351395606995, + "learning_rate": 1.9272382692860892e-05, + "loss": 0.2369, "step": 2150 }, { - "epoch": 0.054702373397639295, - "grad_norm": 1.018460988998413, - "learning_rate": 1.9635317510682406e-05, - "loss": 0.276, + "epoch": 0.10939641606172902, + "grad_norm": 0.6399320960044861, + "learning_rate": 1.9270690559588475e-05, + "loss": 0.2226, "step": 2155 }, { - "epoch": 0.05482929305749461, - "grad_norm": 0.7545543313026428, - "learning_rate": 1.9634471379616705e-05, - "loss": 0.3167, + "epoch": 0.1096502360525915, + "grad_norm": 0.929460883140564, + "learning_rate": 1.926899842631606e-05, + "loss": 0.2345, "step": 2160 }, { - "epoch": 0.05495621271734992, - "grad_norm": 0.7807610034942627, - "learning_rate": 1.9633625248551003e-05, - "loss": 0.2567, + "epoch": 0.10990405604345398, + "grad_norm": 0.7910303473472595, + "learning_rate": 1.9267306293043643e-05, + "loss": 0.1851, "step": 2165 }, { - "epoch": 0.05508313237720523, - "grad_norm": 0.6823940873146057, - "learning_rate": 1.96327791174853e-05, - "loss": 0.2578, + "epoch": 0.11015787603431647, + "grad_norm": 0.6536149978637695, + "learning_rate": 1.9265614159771223e-05, + "loss": 0.2259, "step": 2170 }, { - "epoch": 0.05521005203706054, - "grad_norm": 1.1852221488952637, - "learning_rate": 1.9631932986419596e-05, - "loss": 0.2908, + "epoch": 0.11041169602517895, + "grad_norm": 0.6428697109222412, + "learning_rate": 1.926392202649881e-05, + "loss": 0.2169, "step": 2175 }, { - "epoch": 0.05533697169691585, - "grad_norm": 1.0429812669754028, - "learning_rate": 1.9631086855353895e-05, - "loss": 0.2702, + "epoch": 0.11066551601604142, + "grad_norm": 0.6982537508010864, + "learning_rate": 1.9262229893226393e-05, + "loss": 0.2084, "step": 2180 }, { - "epoch": 0.05546389135677116, - "grad_norm": 1.0177878141403198, - "learning_rate": 1.9630240724288193e-05, - "loss": 0.3147, + "epoch": 0.1109193360069039, + "grad_norm": 0.7507615089416504, + "learning_rate": 1.9260537759953973e-05, + "loss": 0.2433, "step": 2185 }, { - "epoch": 0.055590811016626475, - "grad_norm": 1.021012544631958, - "learning_rate": 1.962939459322249e-05, - "loss": 0.2789, + "epoch": 0.11117315599776638, + "grad_norm": 0.8022400140762329, + "learning_rate": 1.925884562668156e-05, + "loss": 0.2215, "step": 2190 }, { - "epoch": 0.05571773067648179, - "grad_norm": 0.9845540523529053, - "learning_rate": 1.962854846215679e-05, - "loss": 0.2713, + "epoch": 0.11142697598862887, + "grad_norm": 0.5533608794212341, + "learning_rate": 1.925715349340914e-05, + "loss": 0.2013, "step": 2195 }, { - "epoch": 0.0558446503363371, - "grad_norm": 1.1643091440200806, - "learning_rate": 1.962770233109109e-05, - "loss": 0.2663, + "epoch": 0.11168079597949135, + "grad_norm": 0.6407077312469482, + "learning_rate": 1.9255461360136727e-05, + "loss": 0.2131, "step": 2200 }, { - "epoch": 0.05597156999619241, - "grad_norm": 1.1126625537872314, - "learning_rate": 1.9626856200025387e-05, + "epoch": 0.11193461597035383, + "grad_norm": 1.1785430908203125, + "learning_rate": 1.925376922686431e-05, "loss": 0.2623, "step": 2205 }, { - "epoch": 0.056098489656047724, - "grad_norm": 0.7765973210334778, - "learning_rate": 1.9626010068959685e-05, - "loss": 0.2924, + "epoch": 0.1121884359612163, + "grad_norm": 0.6303373575210571, + "learning_rate": 1.925207709359189e-05, + "loss": 0.2143, "step": 2210 }, { - "epoch": 0.05622540931590303, - "grad_norm": 1.976304054260254, - "learning_rate": 1.962516393789398e-05, - "loss": 0.2651, + "epoch": 0.11244225595207878, + "grad_norm": 0.8575406074523926, + "learning_rate": 1.9250384960319478e-05, + "loss": 0.2237, "step": 2215 }, { - "epoch": 0.05635232897575834, - "grad_norm": 1.0603781938552856, - "learning_rate": 1.962431780682828e-05, - "loss": 0.2348, + "epoch": 0.11269607594294126, + "grad_norm": 0.9760498404502869, + "learning_rate": 1.9248692827047058e-05, + "loss": 0.214, "step": 2220 }, { - "epoch": 0.056479248635613655, - "grad_norm": 1.6487213373184204, - "learning_rate": 1.9623471675762577e-05, - "loss": 0.277, + "epoch": 0.11294989593380375, + "grad_norm": 1.455056071281433, + "learning_rate": 1.924700069377464e-05, + "loss": 0.2518, "step": 2225 }, { - "epoch": 0.05660616829546897, - "grad_norm": 1.235588788986206, - "learning_rate": 1.9622625544696875e-05, - "loss": 0.314, + "epoch": 0.11320371592466623, + "grad_norm": 0.8172942996025085, + "learning_rate": 1.924530856050223e-05, + "loss": 0.2165, "step": 2230 }, { - "epoch": 0.05673308795532428, - "grad_norm": 1.288875937461853, - "learning_rate": 1.9621779413631174e-05, - "loss": 0.2964, + "epoch": 0.1134575359155287, + "grad_norm": 0.9945189952850342, + "learning_rate": 1.924361642722981e-05, + "loss": 0.2459, "step": 2235 }, { - "epoch": 0.05686000761517959, - "grad_norm": 1.0221779346466064, - "learning_rate": 1.9620933282565472e-05, - "loss": 0.2885, + "epoch": 0.11371135590639118, + "grad_norm": 1.0252959728240967, + "learning_rate": 1.9241924293957396e-05, + "loss": 0.2323, "step": 2240 }, { - "epoch": 0.056986927275034904, - "grad_norm": 1.9039572477340698, - "learning_rate": 1.962008715149977e-05, - "loss": 0.2713, + "epoch": 0.11396517589725366, + "grad_norm": 0.5688400268554688, + "learning_rate": 1.9240232160684976e-05, + "loss": 0.1913, "step": 2245 }, { - "epoch": 0.05711384693489022, - "grad_norm": 1.164128303527832, - "learning_rate": 1.961924102043407e-05, - "loss": 0.2977, + "epoch": 0.11421899588811615, + "grad_norm": 0.6286960244178772, + "learning_rate": 1.923854002741256e-05, + "loss": 0.2405, "step": 2250 }, { - "epoch": 0.05724076659474553, - "grad_norm": 1.2583063840866089, - "learning_rate": 1.9618394889368364e-05, - "loss": 0.2693, + "epoch": 0.11447281587897863, + "grad_norm": 1.2339733839035034, + "learning_rate": 1.9236847894140146e-05, + "loss": 0.2043, "step": 2255 }, { - "epoch": 0.057367686254600835, - "grad_norm": 0.9793537855148315, - "learning_rate": 1.9617548758302663e-05, - "loss": 0.2667, + "epoch": 0.11472663586984111, + "grad_norm": 0.5691444873809814, + "learning_rate": 1.9235155760867726e-05, + "loss": 0.221, "step": 2260 }, { - "epoch": 0.05749460591445615, - "grad_norm": 1.2149391174316406, - "learning_rate": 1.961670262723696e-05, - "loss": 0.2955, + "epoch": 0.11498045586070359, + "grad_norm": 1.1626230478286743, + "learning_rate": 1.923346362759531e-05, + "loss": 0.225, "step": 2265 }, { - "epoch": 0.05762152557431146, - "grad_norm": 0.8665825128555298, - "learning_rate": 1.961585649617126e-05, - "loss": 0.3185, + "epoch": 0.11523427585156606, + "grad_norm": 0.7354618906974792, + "learning_rate": 1.9231771494322894e-05, + "loss": 0.2281, "step": 2270 }, { - "epoch": 0.05774844523416677, - "grad_norm": 1.0212774276733398, - "learning_rate": 1.9615010365105554e-05, - "loss": 0.3584, + "epoch": 0.11548809584242856, + "grad_norm": 0.5635653734207153, + "learning_rate": 1.9230079361050477e-05, + "loss": 0.2105, "step": 2275 }, { - "epoch": 0.057875364894022084, - "grad_norm": 1.1131477355957031, - "learning_rate": 1.9614164234039853e-05, - "loss": 0.2761, + "epoch": 0.11574191583329103, + "grad_norm": 1.019686222076416, + "learning_rate": 1.922838722777806e-05, + "loss": 0.2232, "step": 2280 }, { - "epoch": 0.058002284553877397, - "grad_norm": 1.061976671218872, - "learning_rate": 1.961331810297415e-05, - "loss": 0.2741, + "epoch": 0.11599573582415351, + "grad_norm": 0.5987036228179932, + "learning_rate": 1.9226695094505644e-05, + "loss": 0.2232, "step": 2285 }, { - "epoch": 0.05812920421373271, - "grad_norm": 1.3814352750778198, - "learning_rate": 1.961247197190845e-05, - "loss": 0.3003, + "epoch": 0.11624955581501599, + "grad_norm": 0.8209441304206848, + "learning_rate": 1.9225002961233228e-05, + "loss": 0.2016, "step": 2290 }, { - "epoch": 0.05825612387358802, - "grad_norm": 0.6653125286102295, - "learning_rate": 1.9611625840842748e-05, - "loss": 0.2743, + "epoch": 0.11650337580587847, + "grad_norm": 0.576745331287384, + "learning_rate": 1.922331082796081e-05, + "loss": 0.239, "step": 2295 }, { - "epoch": 0.058383043533443334, - "grad_norm": 0.8912113308906555, - "learning_rate": 1.9610779709777046e-05, - "loss": 0.2566, + "epoch": 0.11675719579674095, + "grad_norm": 1.0648269653320312, + "learning_rate": 1.9221618694688395e-05, + "loss": 0.1987, "step": 2300 }, { - "epoch": 0.05850996319329864, - "grad_norm": 1.0567492246627808, - "learning_rate": 1.9609933578711345e-05, - "loss": 0.2833, + "epoch": 0.11701101578760344, + "grad_norm": 0.5774346590042114, + "learning_rate": 1.921992656141598e-05, + "loss": 0.225, "step": 2305 }, { - "epoch": 0.05863688285315395, - "grad_norm": 1.0399706363677979, - "learning_rate": 1.9609087447645643e-05, - "loss": 0.2561, + "epoch": 0.11726483577846591, + "grad_norm": 0.6183851957321167, + "learning_rate": 1.9218234428143562e-05, + "loss": 0.2028, "step": 2310 }, { - "epoch": 0.058763802513009264, - "grad_norm": 1.3249002695083618, - "learning_rate": 1.9608241316579938e-05, - "loss": 0.2916, + "epoch": 0.11751865576932839, + "grad_norm": 0.8308761715888977, + "learning_rate": 1.9216542294871145e-05, + "loss": 0.2405, "step": 2315 }, { - "epoch": 0.058890722172864576, - "grad_norm": 1.0697038173675537, - "learning_rate": 1.9607395185514237e-05, - "loss": 0.2751, + "epoch": 0.11777247576019087, + "grad_norm": 0.8884351253509521, + "learning_rate": 1.921485016159873e-05, + "loss": 0.2338, "step": 2320 }, { - "epoch": 0.05901764183271989, - "grad_norm": 1.169650912284851, - "learning_rate": 1.9606549054448535e-05, - "loss": 0.2364, + "epoch": 0.11802629575105335, + "grad_norm": 0.8659582138061523, + "learning_rate": 1.9213158028326313e-05, + "loss": 0.2082, "step": 2325 }, { - "epoch": 0.0591445614925752, - "grad_norm": 1.0063879489898682, - "learning_rate": 1.9605702923382833e-05, - "loss": 0.2811, + "epoch": 0.11828011574191584, + "grad_norm": 0.7193389534950256, + "learning_rate": 1.9211465895053896e-05, + "loss": 0.211, "step": 2330 }, { - "epoch": 0.059271481152430514, - "grad_norm": 1.1024349927902222, - "learning_rate": 1.9604856792317132e-05, - "loss": 0.2704, + "epoch": 0.11853393573277832, + "grad_norm": 0.6831737756729126, + "learning_rate": 1.920977376178148e-05, + "loss": 0.2214, "step": 2335 }, { - "epoch": 0.059398400812285826, - "grad_norm": 0.8975645899772644, - "learning_rate": 1.960401066125143e-05, - "loss": 0.2718, + "epoch": 0.1187877557236408, + "grad_norm": 0.7798734903335571, + "learning_rate": 1.9208081628509063e-05, + "loss": 0.2062, "step": 2340 }, { - "epoch": 0.05952532047214113, - "grad_norm": 0.9476246237754822, - "learning_rate": 1.960316453018573e-05, - "loss": 0.2511, + "epoch": 0.11904157571450327, + "grad_norm": 0.7005655169487, + "learning_rate": 1.9206389495236647e-05, + "loss": 0.2081, "step": 2345 }, { - "epoch": 0.059652240131996444, - "grad_norm": 1.0240213871002197, - "learning_rate": 1.9602318399120027e-05, - "loss": 0.2795, + "epoch": 0.11929539570536575, + "grad_norm": 0.694050669670105, + "learning_rate": 1.920469736196423e-05, + "loss": 0.2348, "step": 2350 }, { - "epoch": 0.059779159791851756, - "grad_norm": 0.8008368015289307, - "learning_rate": 1.9601472268054322e-05, - "loss": 0.3225, + "epoch": 0.11954921569622823, + "grad_norm": 0.5242777466773987, + "learning_rate": 1.9203005228691814e-05, + "loss": 0.2103, "step": 2355 }, { - "epoch": 0.05990607945170707, - "grad_norm": 1.1486769914627075, - "learning_rate": 1.960062613698862e-05, - "loss": 0.2913, + "epoch": 0.11980303568709072, + "grad_norm": 0.9790678024291992, + "learning_rate": 1.9201313095419397e-05, + "loss": 0.2186, "step": 2360 }, { - "epoch": 0.06003299911156238, - "grad_norm": 1.0168883800506592, - "learning_rate": 1.959978000592292e-05, - "loss": 0.2697, + "epoch": 0.1200568556779532, + "grad_norm": 0.6226420998573303, + "learning_rate": 1.919962096214698e-05, + "loss": 0.2132, "step": 2365 }, { - "epoch": 0.06015991877141769, - "grad_norm": 1.6374428272247314, - "learning_rate": 1.9598933874857217e-05, - "loss": 0.2979, + "epoch": 0.12031067566881568, + "grad_norm": 0.6048428416252136, + "learning_rate": 1.9197928828874564e-05, + "loss": 0.2111, "step": 2370 }, { - "epoch": 0.060286838431273006, - "grad_norm": 1.9557150602340698, - "learning_rate": 1.9598087743791516e-05, - "loss": 0.2665, + "epoch": 0.12056449565967815, + "grad_norm": 0.4867452383041382, + "learning_rate": 1.9196236695602145e-05, + "loss": 0.1954, "step": 2375 }, { - "epoch": 0.06041375809112832, - "grad_norm": 1.2505282163619995, - "learning_rate": 1.9597241612725814e-05, - "loss": 0.293, + "epoch": 0.12081831565054063, + "grad_norm": 0.8069561123847961, + "learning_rate": 1.919454456232973e-05, + "loss": 0.2194, "step": 2380 }, { - "epoch": 0.06054067775098363, - "grad_norm": 1.265020489692688, - "learning_rate": 1.9596395481660112e-05, - "loss": 0.2967, + "epoch": 0.12107213564140312, + "grad_norm": 0.9302831888198853, + "learning_rate": 1.9192852429057315e-05, + "loss": 0.2132, "step": 2385 }, { - "epoch": 0.060667597410838936, - "grad_norm": 1.0600754022598267, - "learning_rate": 1.959554935059441e-05, - "loss": 0.2514, + "epoch": 0.1213259556322656, + "grad_norm": 0.6558440327644348, + "learning_rate": 1.91911602957849e-05, + "loss": 0.2138, "step": 2390 }, { - "epoch": 0.06079451707069425, - "grad_norm": 1.7423964738845825, - "learning_rate": 1.9594703219528706e-05, - "loss": 0.2865, + "epoch": 0.12157977562312808, + "grad_norm": 0.9373264908790588, + "learning_rate": 1.9189468162512482e-05, + "loss": 0.2006, "step": 2395 }, { - "epoch": 0.06092143673054956, - "grad_norm": 0.9203000068664551, - "learning_rate": 1.9593857088463004e-05, - "loss": 0.2361, + "epoch": 0.12183359561399056, + "grad_norm": 1.0025991201400757, + "learning_rate": 1.9187776029240062e-05, + "loss": 0.2473, "step": 2400 }, { - "epoch": 0.06104835639040487, - "grad_norm": 2.6963071823120117, - "learning_rate": 1.9593010957397303e-05, - "loss": 0.2778, + "epoch": 0.12208741560485303, + "grad_norm": 0.6057084202766418, + "learning_rate": 1.918608389596765e-05, + "loss": 0.2271, "step": 2405 }, { - "epoch": 0.061175276050260186, - "grad_norm": 1.0200892686843872, - "learning_rate": 1.95921648263316e-05, - "loss": 0.2802, + "epoch": 0.12234123559571553, + "grad_norm": 0.7220245599746704, + "learning_rate": 1.9184391762695233e-05, + "loss": 0.2219, "step": 2410 }, { - "epoch": 0.0613021957101155, - "grad_norm": 0.8529621362686157, - "learning_rate": 1.9591318695265896e-05, - "loss": 0.2903, + "epoch": 0.122595055586578, + "grad_norm": 0.7604368329048157, + "learning_rate": 1.9182699629422813e-05, + "loss": 0.2019, "step": 2415 }, { - "epoch": 0.06142911536997081, - "grad_norm": 0.9501874446868896, - "learning_rate": 1.9590472564200194e-05, - "loss": 0.2485, + "epoch": 0.12284887557744048, + "grad_norm": 0.5124613046646118, + "learning_rate": 1.91810074961504e-05, + "loss": 0.2054, "step": 2420 }, { - "epoch": 0.06155603502982612, - "grad_norm": 1.0502595901489258, - "learning_rate": 1.9589626433134493e-05, - "loss": 0.2714, + "epoch": 0.12310269556830296, + "grad_norm": 0.6220692992210388, + "learning_rate": 1.917931536287798e-05, + "loss": 0.2201, "step": 2425 }, { - "epoch": 0.06168295468968143, - "grad_norm": 0.937046229839325, - "learning_rate": 1.958878030206879e-05, - "loss": 0.3035, + "epoch": 0.12335651555916544, + "grad_norm": 1.0920076370239258, + "learning_rate": 1.9177623229605564e-05, + "loss": 0.2245, "step": 2430 }, { - "epoch": 0.06180987434953674, - "grad_norm": 0.9749279618263245, - "learning_rate": 1.958793417100309e-05, - "loss": 0.2564, + "epoch": 0.12361033555002791, + "grad_norm": 0.9251731038093567, + "learning_rate": 1.917593109633315e-05, + "loss": 0.2204, "step": 2435 }, { - "epoch": 0.06193679400939205, - "grad_norm": 1.1891180276870728, - "learning_rate": 1.9587088039937388e-05, - "loss": 0.3012, + "epoch": 0.1238641555408904, + "grad_norm": 0.9124245047569275, + "learning_rate": 1.917423896306073e-05, + "loss": 0.215, "step": 2440 }, { - "epoch": 0.062063713669247365, - "grad_norm": 0.8438390493392944, - "learning_rate": 1.9586241908871686e-05, - "loss": 0.2301, + "epoch": 0.12411797553175288, + "grad_norm": 2.2037713527679443, + "learning_rate": 1.9172546829788318e-05, + "loss": 0.2279, "step": 2445 }, { - "epoch": 0.06219063332910268, - "grad_norm": 1.456046462059021, - "learning_rate": 1.9585395777805985e-05, - "loss": 0.2978, + "epoch": 0.12437179552261536, + "grad_norm": 0.696209728717804, + "learning_rate": 1.9170854696515898e-05, + "loss": 0.2353, "step": 2450 }, { - "epoch": 0.06231755298895799, - "grad_norm": 2.2127904891967773, - "learning_rate": 1.958454964674028e-05, - "loss": 0.2763, + "epoch": 0.12462561551347784, + "grad_norm": 0.8244996070861816, + "learning_rate": 1.916916256324348e-05, + "loss": 0.2185, "step": 2455 }, { - "epoch": 0.0624444726488133, - "grad_norm": 0.9495311975479126, - "learning_rate": 1.9583703515674578e-05, - "loss": 0.2925, + "epoch": 0.12487943550434032, + "grad_norm": 1.1219316720962524, + "learning_rate": 1.9167470429971068e-05, + "loss": 0.2361, "step": 2460 }, { - "epoch": 0.06257139230866861, - "grad_norm": 1.1087098121643066, - "learning_rate": 1.9582857384608877e-05, - "loss": 0.291, + "epoch": 0.1251332554952028, + "grad_norm": 0.6167690753936768, + "learning_rate": 1.916577829669865e-05, + "loss": 0.2075, "step": 2465 }, { - "epoch": 0.06269831196852392, - "grad_norm": 1.0302941799163818, - "learning_rate": 1.9582011253543175e-05, - "loss": 0.2887, + "epoch": 0.12538707548606529, + "grad_norm": 0.663831889629364, + "learning_rate": 1.9164086163426232e-05, + "loss": 0.2179, "step": 2470 }, { - "epoch": 0.06282523162837923, - "grad_norm": 1.146024465560913, - "learning_rate": 1.9581165122477473e-05, - "loss": 0.2789, + "epoch": 0.12564089547692775, + "grad_norm": 0.7804519534111023, + "learning_rate": 1.9162394030153816e-05, + "loss": 0.2319, "step": 2475 }, { - "epoch": 0.06295215128823455, - "grad_norm": 0.8595502376556396, - "learning_rate": 1.9580318991411772e-05, - "loss": 0.2738, + "epoch": 0.12589471546779024, + "grad_norm": 0.5112632513046265, + "learning_rate": 1.91607018968814e-05, + "loss": 0.2121, "step": 2480 }, { - "epoch": 0.06307907094808986, - "grad_norm": 0.8726088404655457, - "learning_rate": 1.957947286034607e-05, - "loss": 0.2274, + "epoch": 0.12614853545865273, + "grad_norm": 1.0492627620697021, + "learning_rate": 1.9159009763608986e-05, + "loss": 0.1928, "step": 2485 }, { - "epoch": 0.06320599060794517, - "grad_norm": 0.7995723485946655, - "learning_rate": 1.957862672928037e-05, - "loss": 0.2516, + "epoch": 0.1264023554495152, + "grad_norm": 0.8314067721366882, + "learning_rate": 1.9157317630336566e-05, + "loss": 0.2102, "step": 2490 }, { - "epoch": 0.06333291026780048, - "grad_norm": 2.2664833068847656, - "learning_rate": 1.9577780598214664e-05, - "loss": 0.2221, + "epoch": 0.1266561754403777, + "grad_norm": 0.6615179181098938, + "learning_rate": 1.915562549706415e-05, + "loss": 0.2098, "step": 2495 }, { - "epoch": 0.0634598299276558, - "grad_norm": 1.8560844659805298, - "learning_rate": 1.9576934467148962e-05, - "loss": 0.264, + "epoch": 0.12690999543124015, + "grad_norm": 1.4945785999298096, + "learning_rate": 1.9153933363791733e-05, + "loss": 0.2256, "step": 2500 }, { - "epoch": 0.06358674958751111, - "grad_norm": 1.4099839925765991, - "learning_rate": 1.957608833608326e-05, - "loss": 0.2208, + "epoch": 0.12716381542210264, + "grad_norm": 0.6474151015281677, + "learning_rate": 1.9152241230519317e-05, + "loss": 0.2119, "step": 2505 }, { - "epoch": 0.06371366924736642, - "grad_norm": 0.7587975263595581, - "learning_rate": 1.957524220501756e-05, - "loss": 0.249, + "epoch": 0.12741763541296514, + "grad_norm": 0.7189993858337402, + "learning_rate": 1.91505490972469e-05, + "loss": 0.1996, "step": 2510 }, { - "epoch": 0.06384058890722173, - "grad_norm": 1.352735161781311, - "learning_rate": 1.9574396073951857e-05, - "loss": 0.2364, + "epoch": 0.1276714554038276, + "grad_norm": 0.6964658498764038, + "learning_rate": 1.9148856963974484e-05, + "loss": 0.247, "step": 2515 }, { - "epoch": 0.06396750856707704, - "grad_norm": 0.9857727885246277, - "learning_rate": 1.9573549942886156e-05, - "loss": 0.2208, + "epoch": 0.1279252753946901, + "grad_norm": 0.5863429307937622, + "learning_rate": 1.9147164830702067e-05, + "loss": 0.2197, "step": 2520 }, { - "epoch": 0.06409442822693236, - "grad_norm": 0.9859919548034668, - "learning_rate": 1.9572703811820454e-05, - "loss": 0.2601, + "epoch": 0.12817909538555255, + "grad_norm": 0.7309147715568542, + "learning_rate": 1.914547269742965e-05, + "loss": 0.2016, "step": 2525 }, { - "epoch": 0.06422134788678767, - "grad_norm": 1.323164701461792, - "learning_rate": 1.9571857680754752e-05, - "loss": 0.2728, + "epoch": 0.12843291537641505, + "grad_norm": 0.6691656112670898, + "learning_rate": 1.9143780564157235e-05, + "loss": 0.2326, "step": 2530 }, { - "epoch": 0.06434826754664298, - "grad_norm": 1.2939083576202393, - "learning_rate": 1.9571011549689048e-05, - "loss": 0.2538, + "epoch": 0.12868673536727754, + "grad_norm": 1.2683653831481934, + "learning_rate": 1.9142088430884818e-05, + "loss": 0.214, "step": 2535 }, { - "epoch": 0.06447518720649828, - "grad_norm": 1.8918570280075073, - "learning_rate": 1.9570165418623346e-05, - "loss": 0.2843, + "epoch": 0.12894055535814, + "grad_norm": 0.9203475713729858, + "learning_rate": 1.91403962976124e-05, + "loss": 0.2125, "step": 2540 }, { - "epoch": 0.06460210686635359, - "grad_norm": 1.0386077165603638, - "learning_rate": 1.9569319287557644e-05, - "loss": 0.2672, + "epoch": 0.1291943753490025, + "grad_norm": 0.636577308177948, + "learning_rate": 1.9138704164339985e-05, + "loss": 0.2013, "step": 2545 }, { - "epoch": 0.0647290265262089, - "grad_norm": 0.9629946947097778, - "learning_rate": 1.9568473156491943e-05, - "loss": 0.2599, + "epoch": 0.12944819533986496, + "grad_norm": 0.5804703235626221, + "learning_rate": 1.913701203106757e-05, + "loss": 0.2053, "step": 2550 }, { - "epoch": 0.06485594618606422, - "grad_norm": 1.2051010131835938, - "learning_rate": 1.9567627025426238e-05, - "loss": 0.2616, + "epoch": 0.12970201533072745, + "grad_norm": 0.5855862498283386, + "learning_rate": 1.9135319897795152e-05, + "loss": 0.1958, "step": 2555 }, { - "epoch": 0.06498286584591953, - "grad_norm": 1.2914990186691284, - "learning_rate": 1.9566780894360536e-05, - "loss": 0.2939, + "epoch": 0.12995583532158994, + "grad_norm": 0.7482487559318542, + "learning_rate": 1.9133627764522736e-05, + "loss": 0.2257, "step": 2560 }, { - "epoch": 0.06510978550577484, - "grad_norm": 0.9499424695968628, - "learning_rate": 1.9565934763294835e-05, - "loss": 0.2367, + "epoch": 0.1302096553124524, + "grad_norm": 0.7025090456008911, + "learning_rate": 1.913193563125032e-05, + "loss": 0.226, "step": 2565 }, { - "epoch": 0.06523670516563015, - "grad_norm": 0.8482542037963867, - "learning_rate": 1.9565088632229133e-05, - "loss": 0.2826, + "epoch": 0.1304634753033149, + "grad_norm": 0.8828746676445007, + "learning_rate": 1.9130243497977903e-05, + "loss": 0.2187, "step": 2570 }, { - "epoch": 0.06536362482548547, - "grad_norm": 0.8635600209236145, - "learning_rate": 1.956424250116343e-05, - "loss": 0.3003, + "epoch": 0.13071729529417736, + "grad_norm": 1.0203245878219604, + "learning_rate": 1.9128551364705486e-05, + "loss": 0.2288, "step": 2575 }, { - "epoch": 0.06549054448534078, - "grad_norm": 1.1908882856369019, - "learning_rate": 1.956339637009773e-05, - "loss": 0.2437, + "epoch": 0.13097111528503985, + "grad_norm": 0.6295384168624878, + "learning_rate": 1.912685923143307e-05, + "loss": 0.2034, "step": 2580 }, { - "epoch": 0.06561746414519609, - "grad_norm": 1.2954161167144775, - "learning_rate": 1.9562550239032028e-05, - "loss": 0.2499, + "epoch": 0.13122493527590234, + "grad_norm": 1.2643804550170898, + "learning_rate": 1.9125167098160654e-05, + "loss": 0.2278, "step": 2585 }, { - "epoch": 0.0657443838050514, - "grad_norm": 1.051544189453125, - "learning_rate": 1.9561704107966327e-05, - "loss": 0.2614, + "epoch": 0.1314787552667648, + "grad_norm": 0.6384063959121704, + "learning_rate": 1.9123474964888237e-05, + "loss": 0.1976, "step": 2590 }, { - "epoch": 0.06587130346490672, - "grad_norm": 1.2207762002944946, - "learning_rate": 1.9560857976900625e-05, - "loss": 0.2698, + "epoch": 0.1317325752576273, + "grad_norm": 0.6057654619216919, + "learning_rate": 1.912178283161582e-05, + "loss": 0.2108, "step": 2595 }, { - "epoch": 0.06599822312476203, - "grad_norm": 0.8034548163414001, - "learning_rate": 1.956001184583492e-05, - "loss": 0.2752, + "epoch": 0.13198639524848976, + "grad_norm": 0.6316978335380554, + "learning_rate": 1.9120090698343404e-05, + "loss": 0.2051, "step": 2600 }, { - "epoch": 0.06612514278461734, - "grad_norm": 0.9166296124458313, - "learning_rate": 1.955916571476922e-05, - "loss": 0.2631, + "epoch": 0.13224021523935225, + "grad_norm": 0.6967063546180725, + "learning_rate": 1.9118398565070984e-05, + "loss": 0.2016, "step": 2605 }, { - "epoch": 0.06625206244447265, - "grad_norm": 0.8596564531326294, - "learning_rate": 1.9558319583703517e-05, - "loss": 0.2639, + "epoch": 0.13249403523021472, + "grad_norm": 0.8182034492492676, + "learning_rate": 1.911670643179857e-05, + "loss": 0.1894, "step": 2610 }, { - "epoch": 0.06637898210432797, - "grad_norm": 0.9332006573677063, - "learning_rate": 1.9557473452637815e-05, - "loss": 0.2483, + "epoch": 0.1327478552210772, + "grad_norm": 0.5409026741981506, + "learning_rate": 1.9115014298526155e-05, + "loss": 0.2166, "step": 2615 }, { - "epoch": 0.06650590176418328, - "grad_norm": 1.2086237668991089, - "learning_rate": 1.9556627321572114e-05, - "loss": 0.282, + "epoch": 0.1330016752119397, + "grad_norm": 0.7620669007301331, + "learning_rate": 1.9113322165253735e-05, + "loss": 0.2018, "step": 2620 }, { - "epoch": 0.06663282142403858, - "grad_norm": 1.285081148147583, - "learning_rate": 1.9555781190506412e-05, - "loss": 0.2531, + "epoch": 0.13325549520280217, + "grad_norm": 0.8058112859725952, + "learning_rate": 1.9111630031981322e-05, + "loss": 0.1956, "step": 2625 }, { - "epoch": 0.06675974108389389, - "grad_norm": 1.4917584657669067, - "learning_rate": 1.955493505944071e-05, - "loss": 0.257, + "epoch": 0.13350931519366466, + "grad_norm": 0.9924262166023254, + "learning_rate": 1.9109937898708902e-05, + "loss": 0.2005, "step": 2630 }, { - "epoch": 0.0668866607437492, - "grad_norm": 0.9418209195137024, - "learning_rate": 1.955408892837501e-05, - "loss": 0.2524, + "epoch": 0.13376313518452712, + "grad_norm": 0.7677115797996521, + "learning_rate": 1.910824576543649e-05, + "loss": 0.2136, "step": 2635 }, { - "epoch": 0.06701358040360451, - "grad_norm": 1.0669775009155273, - "learning_rate": 1.9553242797309304e-05, - "loss": 0.2584, + "epoch": 0.1340169551753896, + "grad_norm": 0.8996549844741821, + "learning_rate": 1.9106553632164072e-05, + "loss": 0.2144, "step": 2640 }, { - "epoch": 0.06714050006345983, - "grad_norm": 1.1288447380065918, - "learning_rate": 1.9552396666243602e-05, - "loss": 0.2676, + "epoch": 0.1342707751662521, + "grad_norm": 1.055097222328186, + "learning_rate": 1.9104861498891653e-05, + "loss": 0.199, "step": 2645 }, { - "epoch": 0.06726741972331514, - "grad_norm": 1.0136168003082275, - "learning_rate": 1.95515505351779e-05, - "loss": 0.2434, + "epoch": 0.13452459515711457, + "grad_norm": 0.6172511577606201, + "learning_rate": 1.910316936561924e-05, + "loss": 0.2001, "step": 2650 }, { - "epoch": 0.06739433938317045, - "grad_norm": 1.6168822050094604, - "learning_rate": 1.95507044041122e-05, - "loss": 0.2517, + "epoch": 0.13477841514797706, + "grad_norm": 0.7040294408798218, + "learning_rate": 1.910147723234682e-05, + "loss": 0.2091, "step": 2655 }, { - "epoch": 0.06752125904302576, - "grad_norm": 1.1408613920211792, - "learning_rate": 1.9549858273046497e-05, - "loss": 0.2615, + "epoch": 0.13503223513883952, + "grad_norm": 0.6608620285987854, + "learning_rate": 1.9099785099074403e-05, + "loss": 0.1907, "step": 2660 }, { - "epoch": 0.06764817870288108, - "grad_norm": 1.118350863456726, - "learning_rate": 1.9549012141980796e-05, - "loss": 0.2579, + "epoch": 0.13528605512970202, + "grad_norm": 0.7029122114181519, + "learning_rate": 1.909809296580199e-05, + "loss": 0.207, "step": 2665 }, { - "epoch": 0.06777509836273639, - "grad_norm": 0.9326890110969543, - "learning_rate": 1.9548166010915094e-05, - "loss": 0.2119, + "epoch": 0.1355398751205645, + "grad_norm": 0.7303573489189148, + "learning_rate": 1.909640083252957e-05, + "loss": 0.2103, "step": 2670 }, { - "epoch": 0.0679020180225917, - "grad_norm": 1.6173900365829468, - "learning_rate": 1.9547319879849393e-05, - "loss": 0.2576, + "epoch": 0.13579369511142697, + "grad_norm": 0.7197701334953308, + "learning_rate": 1.9094708699257154e-05, + "loss": 0.198, "step": 2675 }, { - "epoch": 0.06802893768244701, - "grad_norm": 1.02213716506958, - "learning_rate": 1.9546473748783688e-05, - "loss": 0.2647, + "epoch": 0.13604751510228946, + "grad_norm": 2.0488266944885254, + "learning_rate": 1.9093016565984737e-05, + "loss": 0.2288, "step": 2680 }, { - "epoch": 0.06815585734230233, - "grad_norm": 0.8943336009979248, - "learning_rate": 1.9545627617717986e-05, - "loss": 0.2309, + "epoch": 0.13630133509315193, + "grad_norm": 0.7889509201049805, + "learning_rate": 1.909132443271232e-05, + "loss": 0.2184, "step": 2685 }, { - "epoch": 0.06828277700215764, - "grad_norm": 0.9621109366416931, - "learning_rate": 1.9544781486652284e-05, - "loss": 0.2494, + "epoch": 0.13655515508401442, + "grad_norm": 0.8902899622917175, + "learning_rate": 1.9089632299439908e-05, + "loss": 0.2161, "step": 2690 }, { - "epoch": 0.06840969666201295, - "grad_norm": 0.8185399770736694, - "learning_rate": 1.9543935355586583e-05, - "loss": 0.3202, + "epoch": 0.1368089750748769, + "grad_norm": 0.6209053993225098, + "learning_rate": 1.9087940166167488e-05, + "loss": 0.1956, "step": 2695 }, { - "epoch": 0.06853661632186826, - "grad_norm": 1.0591787099838257, - "learning_rate": 1.9543089224520878e-05, - "loss": 0.2512, + "epoch": 0.13706279506573937, + "grad_norm": 0.5760391354560852, + "learning_rate": 1.908624803289507e-05, + "loss": 0.2005, "step": 2700 }, { - "epoch": 0.06866353598172358, - "grad_norm": 0.7472973465919495, - "learning_rate": 1.9542243093455176e-05, - "loss": 0.2592, + "epoch": 0.13731661505660187, + "grad_norm": 0.9128169417381287, + "learning_rate": 1.9084555899622655e-05, + "loss": 0.2036, "step": 2705 }, { - "epoch": 0.06879045564157887, - "grad_norm": 1.088402271270752, - "learning_rate": 1.9541396962389475e-05, - "loss": 0.2746, + "epoch": 0.13757043504746433, + "grad_norm": 0.8320951461791992, + "learning_rate": 1.908286376635024e-05, + "loss": 0.2108, "step": 2710 }, { - "epoch": 0.06891737530143419, - "grad_norm": 0.9524895548820496, - "learning_rate": 1.9540550831323773e-05, - "loss": 0.2612, + "epoch": 0.13782425503832682, + "grad_norm": 0.7470078468322754, + "learning_rate": 1.9081171633077822e-05, + "loss": 0.2058, "step": 2715 }, { - "epoch": 0.0690442949612895, - "grad_norm": 1.4742704629898071, - "learning_rate": 1.953970470025807e-05, - "loss": 0.2571, + "epoch": 0.1380780750291893, + "grad_norm": 0.7988982796669006, + "learning_rate": 1.9079479499805406e-05, + "loss": 0.242, "step": 2720 }, { - "epoch": 0.06917121462114481, - "grad_norm": 0.7230198383331299, - "learning_rate": 1.953885856919237e-05, - "loss": 0.2579, + "epoch": 0.13833189502005178, + "grad_norm": 0.8993115425109863, + "learning_rate": 1.907778736653299e-05, + "loss": 0.212, "step": 2725 }, { - "epoch": 0.06929813428100012, - "grad_norm": 1.0274730920791626, - "learning_rate": 1.9538012438126668e-05, - "loss": 0.256, + "epoch": 0.13858571501091427, + "grad_norm": 0.6929076313972473, + "learning_rate": 1.9076095233260573e-05, + "loss": 0.2509, "step": 2730 }, { - "epoch": 0.06942505394085544, - "grad_norm": 1.39117431640625, - "learning_rate": 1.9537166307060967e-05, - "loss": 0.2714, + "epoch": 0.13883953500177673, + "grad_norm": 0.6944275498390198, + "learning_rate": 1.9074403099988156e-05, + "loss": 0.2116, "step": 2735 }, { - "epoch": 0.06955197360071075, - "grad_norm": 0.5796716809272766, - "learning_rate": 1.953632017599526e-05, - "loss": 0.2119, + "epoch": 0.13909335499263922, + "grad_norm": 0.6491143107414246, + "learning_rate": 1.907271096671574e-05, + "loss": 0.1943, "step": 2740 }, { - "epoch": 0.06967889326056606, - "grad_norm": 1.0476022958755493, - "learning_rate": 1.953547404492956e-05, - "loss": 0.2464, + "epoch": 0.1393471749835017, + "grad_norm": 0.6222745776176453, + "learning_rate": 1.9071018833443324e-05, + "loss": 0.2144, "step": 2745 }, { - "epoch": 0.06980581292042137, - "grad_norm": 0.9882228374481201, - "learning_rate": 1.953462791386386e-05, - "loss": 0.2414, + "epoch": 0.13960099497436418, + "grad_norm": 0.863029420375824, + "learning_rate": 1.9069326700170907e-05, + "loss": 0.1972, "step": 2750 }, { - "epoch": 0.06993273258027669, - "grad_norm": 5.916758060455322, - "learning_rate": 1.9533781782798157e-05, - "loss": 0.289, + "epoch": 0.13985481496522667, + "grad_norm": 0.5419744253158569, + "learning_rate": 1.906763456689849e-05, + "loss": 0.1913, "step": 2755 }, { - "epoch": 0.070059652240132, - "grad_norm": 0.8646323084831238, - "learning_rate": 1.9532935651732455e-05, - "loss": 0.2404, + "epoch": 0.14010863495608913, + "grad_norm": 0.5654199719429016, + "learning_rate": 1.9065942433626074e-05, + "loss": 0.1994, "step": 2760 }, { - "epoch": 0.07018657189998731, - "grad_norm": 1.1767001152038574, - "learning_rate": 1.9532089520666754e-05, - "loss": 0.266, + "epoch": 0.14036245494695163, + "grad_norm": 0.7003618478775024, + "learning_rate": 1.9064250300353658e-05, + "loss": 0.1955, "step": 2765 }, { - "epoch": 0.07031349155984262, - "grad_norm": 0.7235515117645264, - "learning_rate": 1.9531243389601052e-05, - "loss": 0.2416, + "epoch": 0.1406162749378141, + "grad_norm": 1.1119288206100464, + "learning_rate": 1.906255816708124e-05, + "loss": 0.1984, "step": 2770 }, { - "epoch": 0.07044041121969793, - "grad_norm": 1.6409966945648193, - "learning_rate": 1.953039725853535e-05, - "loss": 0.2762, + "epoch": 0.14087009492867658, + "grad_norm": 0.496934711933136, + "learning_rate": 1.9060866033808825e-05, + "loss": 0.1855, "step": 2775 }, { - "epoch": 0.07056733087955325, - "grad_norm": 1.1260305643081665, - "learning_rate": 1.9529551127469645e-05, - "loss": 0.276, + "epoch": 0.14112391491953907, + "grad_norm": 0.9058437943458557, + "learning_rate": 1.905917390053641e-05, + "loss": 0.188, "step": 2780 }, { - "epoch": 0.07069425053940856, - "grad_norm": 1.1117349863052368, - "learning_rate": 1.9528704996403944e-05, - "loss": 0.2619, + "epoch": 0.14137773491040154, + "grad_norm": 0.6056883335113525, + "learning_rate": 1.9057481767263992e-05, + "loss": 0.2088, "step": 2785 }, { - "epoch": 0.07082117019926387, - "grad_norm": 1.0882619619369507, - "learning_rate": 1.9527858865338242e-05, - "loss": 0.2995, + "epoch": 0.14163155490126403, + "grad_norm": 0.6516966223716736, + "learning_rate": 1.9055789633991575e-05, + "loss": 0.1905, "step": 2790 }, { - "epoch": 0.07094808985911918, - "grad_norm": 0.7826209664344788, - "learning_rate": 1.952701273427254e-05, - "loss": 0.2704, + "epoch": 0.1418853748921265, + "grad_norm": 0.6040582060813904, + "learning_rate": 1.905409750071916e-05, + "loss": 0.2125, "step": 2795 }, { - "epoch": 0.07107500951897448, - "grad_norm": 1.3271095752716064, - "learning_rate": 1.952616660320684e-05, - "loss": 0.2446, + "epoch": 0.14213919488298898, + "grad_norm": 0.5323441624641418, + "learning_rate": 1.9052405367446743e-05, + "loss": 0.1771, "step": 2800 }, { - "epoch": 0.0712019291788298, - "grad_norm": 1.613561749458313, - "learning_rate": 1.9525320472141138e-05, - "loss": 0.2307, + "epoch": 0.14239301487385148, + "grad_norm": 0.4898989796638489, + "learning_rate": 1.9050713234174326e-05, + "loss": 0.1863, "step": 2805 }, { - "epoch": 0.07132884883868511, - "grad_norm": 0.742684543132782, - "learning_rate": 1.9524474341075436e-05, - "loss": 0.2532, + "epoch": 0.14264683486471394, + "grad_norm": 0.5168548226356506, + "learning_rate": 1.9049021100901906e-05, + "loss": 0.1941, "step": 2810 }, { - "epoch": 0.07145576849854042, - "grad_norm": 0.8742664456367493, - "learning_rate": 1.9523628210009734e-05, - "loss": 0.2344, + "epoch": 0.14290065485557643, + "grad_norm": 0.5550641417503357, + "learning_rate": 1.9047328967629493e-05, + "loss": 0.2024, "step": 2815 }, { - "epoch": 0.07158268815839573, - "grad_norm": 1.1323363780975342, - "learning_rate": 1.952278207894403e-05, - "loss": 0.239, + "epoch": 0.1431544748464389, + "grad_norm": 0.8888419270515442, + "learning_rate": 1.9045636834357077e-05, + "loss": 0.2091, "step": 2820 }, { - "epoch": 0.07170960781825104, - "grad_norm": 1.159314751625061, - "learning_rate": 1.9521935947878328e-05, - "loss": 0.2738, + "epoch": 0.1434082948373014, + "grad_norm": 0.6956480741500854, + "learning_rate": 1.904394470108466e-05, + "loss": 0.2006, "step": 2825 }, { - "epoch": 0.07183652747810636, - "grad_norm": 0.9051412343978882, - "learning_rate": 1.9521089816812626e-05, - "loss": 0.2727, + "epoch": 0.14366211482816388, + "grad_norm": 0.6455702781677246, + "learning_rate": 1.9042252567812244e-05, + "loss": 0.1969, "step": 2830 }, { - "epoch": 0.07196344713796167, - "grad_norm": 0.7527676820755005, - "learning_rate": 1.9520243685746925e-05, - "loss": 0.2881, + "epoch": 0.14391593481902634, + "grad_norm": 0.7298540472984314, + "learning_rate": 1.9040560434539824e-05, + "loss": 0.2041, "step": 2835 }, { - "epoch": 0.07209036679781698, - "grad_norm": 1.659684181213379, - "learning_rate": 1.951939755468122e-05, - "loss": 0.2718, + "epoch": 0.14416975480988883, + "grad_norm": 0.9407358169555664, + "learning_rate": 1.903886830126741e-05, + "loss": 0.2226, "step": 2840 }, { - "epoch": 0.0722172864576723, - "grad_norm": 1.3006374835968018, - "learning_rate": 1.9518551423615518e-05, - "loss": 0.2447, + "epoch": 0.1444235748007513, + "grad_norm": 0.6761470437049866, + "learning_rate": 1.9037176167994994e-05, + "loss": 0.2073, "step": 2845 }, { - "epoch": 0.0723442061175276, - "grad_norm": 1.2195900678634644, - "learning_rate": 1.9517705292549816e-05, - "loss": 0.2717, + "epoch": 0.1446773947916138, + "grad_norm": 0.9858958125114441, + "learning_rate": 1.9035484034722575e-05, + "loss": 0.2191, "step": 2850 }, { - "epoch": 0.07247112577738292, - "grad_norm": 0.9328087568283081, - "learning_rate": 1.9516859161484115e-05, - "loss": 0.2471, + "epoch": 0.14493121478247628, + "grad_norm": 0.7643491625785828, + "learning_rate": 1.903379190145016e-05, + "loss": 0.1855, "step": 2855 }, { - "epoch": 0.07259804543723823, - "grad_norm": 1.4288475513458252, - "learning_rate": 1.9516013030418413e-05, - "loss": 0.2711, + "epoch": 0.14518503477333874, + "grad_norm": 0.5545926690101624, + "learning_rate": 1.903209976817774e-05, + "loss": 0.1854, "step": 2860 }, { - "epoch": 0.07272496509709354, - "grad_norm": 0.8574407696723938, - "learning_rate": 1.951516689935271e-05, - "loss": 0.2389, + "epoch": 0.14543885476420124, + "grad_norm": 0.7087584137916565, + "learning_rate": 1.9030407634905325e-05, + "loss": 0.1925, "step": 2865 }, { - "epoch": 0.07285188475694886, - "grad_norm": 1.284792184829712, - "learning_rate": 1.951432076828701e-05, - "loss": 0.2431, + "epoch": 0.1456926747550637, + "grad_norm": 0.9376761317253113, + "learning_rate": 1.9028715501632912e-05, + "loss": 0.2171, "step": 2870 }, { - "epoch": 0.07297880441680417, - "grad_norm": 1.25221586227417, - "learning_rate": 1.951347463722131e-05, - "loss": 0.225, + "epoch": 0.1459464947459262, + "grad_norm": 0.5529223680496216, + "learning_rate": 1.9027023368360492e-05, + "loss": 0.1849, "step": 2875 }, { - "epoch": 0.07310572407665948, - "grad_norm": 1.4007045030593872, - "learning_rate": 1.9512628506155603e-05, - "loss": 0.2527, + "epoch": 0.14620031473678866, + "grad_norm": 0.6258545517921448, + "learning_rate": 1.902533123508808e-05, + "loss": 0.195, "step": 2880 }, { - "epoch": 0.07323264373651478, - "grad_norm": 1.0066944360733032, - "learning_rate": 1.9511782375089902e-05, - "loss": 0.2326, + "epoch": 0.14645413472765115, + "grad_norm": 0.5800721645355225, + "learning_rate": 1.902363910181566e-05, + "loss": 0.1879, "step": 2885 }, { - "epoch": 0.07335956339637009, - "grad_norm": 0.7517261505126953, - "learning_rate": 1.95109362440242e-05, - "loss": 0.2254, + "epoch": 0.14670795471851364, + "grad_norm": 0.9016756415367126, + "learning_rate": 1.9021946968543243e-05, + "loss": 0.1978, "step": 2890 }, { - "epoch": 0.0734864830562254, - "grad_norm": 1.0653655529022217, - "learning_rate": 1.95100901129585e-05, - "loss": 0.2181, + "epoch": 0.1469617747093761, + "grad_norm": 0.6598945260047913, + "learning_rate": 1.902025483527083e-05, + "loss": 0.2201, "step": 2895 }, { - "epoch": 0.07361340271608072, - "grad_norm": 1.906826138496399, - "learning_rate": 1.9509243981892797e-05, - "loss": 0.2469, + "epoch": 0.1472155947002386, + "grad_norm": 0.5882731080055237, + "learning_rate": 1.901856270199841e-05, + "loss": 0.1942, "step": 2900 }, { - "epoch": 0.07374032237593603, - "grad_norm": 1.0673446655273438, - "learning_rate": 1.9508397850827095e-05, - "loss": 0.2592, + "epoch": 0.14746941469110106, + "grad_norm": 0.898997962474823, + "learning_rate": 1.9016870568725994e-05, + "loss": 0.2089, "step": 2905 }, { - "epoch": 0.07386724203579134, - "grad_norm": 0.9890732169151306, - "learning_rate": 1.9507551719761394e-05, - "loss": 0.2582, + "epoch": 0.14772323468196355, + "grad_norm": 0.547217071056366, + "learning_rate": 1.9015178435453577e-05, + "loss": 0.1957, "step": 2910 }, { - "epoch": 0.07399416169564665, - "grad_norm": 1.3210183382034302, - "learning_rate": 1.9506705588695692e-05, - "loss": 0.2842, + "epoch": 0.14797705467282604, + "grad_norm": 0.5666927099227905, + "learning_rate": 1.901348630218116e-05, + "loss": 0.2036, "step": 2915 }, { - "epoch": 0.07412108135550197, - "grad_norm": 0.7532540559768677, - "learning_rate": 1.9505859457629987e-05, - "loss": 0.2419, + "epoch": 0.1482308746636885, + "grad_norm": 0.7027495503425598, + "learning_rate": 1.9011794168908744e-05, + "loss": 0.2027, "step": 2920 }, { - "epoch": 0.07424800101535728, - "grad_norm": 0.9747022390365601, - "learning_rate": 1.9505013326564286e-05, - "loss": 0.2863, + "epoch": 0.148484694654551, + "grad_norm": 0.9105992317199707, + "learning_rate": 1.9010102035636328e-05, + "loss": 0.2185, "step": 2925 }, { - "epoch": 0.07437492067521259, - "grad_norm": 1.0006359815597534, - "learning_rate": 1.9504167195498584e-05, - "loss": 0.259, + "epoch": 0.14873851464541346, + "grad_norm": 0.9565317630767822, + "learning_rate": 1.900840990236391e-05, + "loss": 0.1891, "step": 2930 }, { - "epoch": 0.0745018403350679, - "grad_norm": 1.4117757081985474, - "learning_rate": 1.9503321064432882e-05, - "loss": 0.2466, + "epoch": 0.14899233463627595, + "grad_norm": 0.6969782710075378, + "learning_rate": 1.9006717769091495e-05, + "loss": 0.215, "step": 2935 }, { - "epoch": 0.07462875999492322, - "grad_norm": 1.1727391481399536, - "learning_rate": 1.950247493336718e-05, - "loss": 0.2455, + "epoch": 0.14924615462713844, + "grad_norm": 0.5370941758155823, + "learning_rate": 1.900502563581908e-05, + "loss": 0.1872, "step": 2940 }, { - "epoch": 0.07475567965477853, - "grad_norm": 1.5032821893692017, - "learning_rate": 1.950162880230148e-05, - "loss": 0.291, + "epoch": 0.1494999746180009, + "grad_norm": 0.932841420173645, + "learning_rate": 1.9003333502546662e-05, + "loss": 0.2144, "step": 2945 }, { - "epoch": 0.07488259931463384, - "grad_norm": 1.2274036407470703, - "learning_rate": 1.9500782671235778e-05, - "loss": 0.2459, + "epoch": 0.1497537946088634, + "grad_norm": 0.6696744561195374, + "learning_rate": 1.9001641369274245e-05, + "loss": 0.2132, "step": 2950 }, { - "epoch": 0.07500951897448915, - "grad_norm": 1.0904048681259155, - "learning_rate": 1.9499936540170076e-05, - "loss": 0.236, + "epoch": 0.15000761459972586, + "grad_norm": 0.8368297219276428, + "learning_rate": 1.899994923600183e-05, + "loss": 0.1922, "step": 2955 }, { - "epoch": 0.07513643863434447, - "grad_norm": 1.4898486137390137, - "learning_rate": 1.949909040910437e-05, - "loss": 0.2114, + "epoch": 0.15026143459058836, + "grad_norm": 0.7820245623588562, + "learning_rate": 1.8998257102729413e-05, + "loss": 0.1922, "step": 2960 }, { - "epoch": 0.07526335829419978, - "grad_norm": 0.9972125291824341, - "learning_rate": 1.949824427803867e-05, - "loss": 0.2617, + "epoch": 0.15051525458145085, + "grad_norm": 0.6239885091781616, + "learning_rate": 1.8996564969456996e-05, + "loss": 0.2171, "step": 2965 }, { - "epoch": 0.07539027795405508, - "grad_norm": 0.818337082862854, - "learning_rate": 1.9497398146972968e-05, - "loss": 0.2419, + "epoch": 0.1507690745723133, + "grad_norm": 0.6773269176483154, + "learning_rate": 1.899487283618458e-05, + "loss": 0.2039, "step": 2970 }, { - "epoch": 0.07551719761391039, - "grad_norm": 0.7856605648994446, - "learning_rate": 1.9496552015907266e-05, - "loss": 0.2692, + "epoch": 0.1510228945631758, + "grad_norm": 0.6102594137191772, + "learning_rate": 1.8993180702912163e-05, + "loss": 0.1843, "step": 2975 }, { - "epoch": 0.0756441172737657, - "grad_norm": 0.9213743209838867, - "learning_rate": 1.949570588484156e-05, - "loss": 0.2329, + "epoch": 0.15127671455403827, + "grad_norm": 0.7514229416847229, + "learning_rate": 1.8991488569639747e-05, + "loss": 0.1819, "step": 2980 }, { - "epoch": 0.07577103693362101, - "grad_norm": 1.0686559677124023, - "learning_rate": 1.949485975377586e-05, - "loss": 0.2416, + "epoch": 0.15153053454490076, + "grad_norm": 3.802446126937866, + "learning_rate": 1.898979643636733e-05, + "loss": 0.1858, "step": 2985 }, { - "epoch": 0.07589795659347633, - "grad_norm": 0.824860155582428, - "learning_rate": 1.9494013622710158e-05, - "loss": 0.2598, + "epoch": 0.15178435453576325, + "grad_norm": 0.984550416469574, + "learning_rate": 1.8988104303094914e-05, + "loss": 0.1861, "step": 2990 }, { - "epoch": 0.07602487625333164, - "grad_norm": 0.9339609146118164, - "learning_rate": 1.9493167491644456e-05, - "loss": 0.2303, + "epoch": 0.1520381745266257, + "grad_norm": 0.672860324382782, + "learning_rate": 1.8986412169822497e-05, + "loss": 0.1966, "step": 2995 }, { - "epoch": 0.07615179591318695, - "grad_norm": 2.4739184379577637, - "learning_rate": 1.9492321360578755e-05, - "loss": 0.2811, + "epoch": 0.1522919945174882, + "grad_norm": 0.5952281355857849, + "learning_rate": 1.898472003655008e-05, + "loss": 0.1876, "step": 3000 }, { - "epoch": 0.07627871557304226, - "grad_norm": 0.8868097066879272, - "learning_rate": 1.9491475229513053e-05, - "loss": 0.2499, + "epoch": 0.15254581450835067, + "grad_norm": 0.8867749571800232, + "learning_rate": 1.8983027903277664e-05, + "loss": 0.2116, "step": 3005 }, { - "epoch": 0.07640563523289758, - "grad_norm": 0.9817695617675781, - "learning_rate": 1.949062909844735e-05, - "loss": 0.2729, + "epoch": 0.15279963449921316, + "grad_norm": 0.7406168580055237, + "learning_rate": 1.8981335770005248e-05, + "loss": 0.1867, "step": 3010 }, { - "epoch": 0.07653255489275289, - "grad_norm": 0.9023828506469727, - "learning_rate": 1.948978296738165e-05, - "loss": 0.2537, + "epoch": 0.15305345449007565, + "grad_norm": 0.8236103653907776, + "learning_rate": 1.8979643636732828e-05, + "loss": 0.2004, "step": 3015 }, { - "epoch": 0.0766594745526082, - "grad_norm": 0.9986405968666077, - "learning_rate": 1.9488936836315945e-05, - "loss": 0.245, + "epoch": 0.15330727448093812, + "grad_norm": 0.5611258149147034, + "learning_rate": 1.8977951503460415e-05, + "loss": 0.1773, "step": 3020 }, { - "epoch": 0.07678639421246351, - "grad_norm": 0.692940890789032, - "learning_rate": 1.9488090705250243e-05, - "loss": 0.223, + "epoch": 0.1535610944718006, + "grad_norm": 0.8456403017044067, + "learning_rate": 1.8976259370188e-05, + "loss": 0.2209, "step": 3025 }, { - "epoch": 0.07691331387231883, - "grad_norm": 0.8411860466003418, - "learning_rate": 1.9487244574184542e-05, - "loss": 0.2368, + "epoch": 0.15381491446266307, + "grad_norm": 1.378568172454834, + "learning_rate": 1.8974567236915582e-05, + "loss": 0.2122, "step": 3030 }, { - "epoch": 0.07704023353217414, - "grad_norm": 1.0636227130889893, - "learning_rate": 1.948639844311884e-05, - "loss": 0.2462, + "epoch": 0.15406873445352556, + "grad_norm": 0.599615216255188, + "learning_rate": 1.8972875103643166e-05, + "loss": 0.1886, "step": 3035 }, { - "epoch": 0.07716715319202945, - "grad_norm": 0.9986701607704163, - "learning_rate": 1.948555231205314e-05, - "loss": 0.2691, + "epoch": 0.15432255444438803, + "grad_norm": 0.5790411233901978, + "learning_rate": 1.8971182970370746e-05, + "loss": 0.1923, "step": 3040 }, { - "epoch": 0.07729407285188476, - "grad_norm": 0.9838452935218811, - "learning_rate": 1.9484706180987437e-05, - "loss": 0.2793, + "epoch": 0.15457637443525052, + "grad_norm": 5.2787299156188965, + "learning_rate": 1.8969490837098333e-05, + "loss": 0.1875, "step": 3045 }, { - "epoch": 0.07742099251174007, - "grad_norm": 0.9636114835739136, - "learning_rate": 1.9483860049921735e-05, - "loss": 0.2636, + "epoch": 0.154830194426113, + "grad_norm": 0.5498223304748535, + "learning_rate": 1.8967798703825916e-05, + "loss": 0.1898, "step": 3050 }, { - "epoch": 0.07754791217159537, - "grad_norm": 1.443455696105957, - "learning_rate": 1.9483013918856034e-05, - "loss": 0.2623, + "epoch": 0.15508401441697547, + "grad_norm": 0.5836355686187744, + "learning_rate": 1.8966106570553497e-05, + "loss": 0.1873, "step": 3055 }, { - "epoch": 0.07767483183145069, - "grad_norm": 0.913651704788208, - "learning_rate": 1.948216778779033e-05, - "loss": 0.2458, + "epoch": 0.15533783440783797, + "grad_norm": 0.6591739654541016, + "learning_rate": 1.8964414437281083e-05, + "loss": 0.2141, "step": 3060 }, { - "epoch": 0.077801751491306, - "grad_norm": 0.7896534204483032, - "learning_rate": 1.9481321656724627e-05, - "loss": 0.2368, + "epoch": 0.15559165439870043, + "grad_norm": 0.5879199504852295, + "learning_rate": 1.8962722304008664e-05, + "loss": 0.199, "step": 3065 }, { - "epoch": 0.07792867115116131, - "grad_norm": 0.8969899415969849, - "learning_rate": 1.9480475525658926e-05, - "loss": 0.2485, + "epoch": 0.15584547438956292, + "grad_norm": 0.6252302527427673, + "learning_rate": 1.8961030170736247e-05, + "loss": 0.183, "step": 3070 }, { - "epoch": 0.07805559081101662, - "grad_norm": 0.7885770201683044, - "learning_rate": 1.9479629394593224e-05, - "loss": 0.2339, + "epoch": 0.1560992943804254, + "grad_norm": 0.6322395205497742, + "learning_rate": 1.8959338037463834e-05, + "loss": 0.2204, "step": 3075 }, { - "epoch": 0.07818251047087194, - "grad_norm": 0.7185413837432861, - "learning_rate": 1.9478783263527523e-05, - "loss": 0.2211, + "epoch": 0.15635311437128788, + "grad_norm": 0.8022140860557556, + "learning_rate": 1.8957645904191414e-05, + "loss": 0.1974, "step": 3080 }, { - "epoch": 0.07830943013072725, - "grad_norm": 1.540074348449707, - "learning_rate": 1.947793713246182e-05, - "loss": 0.2282, + "epoch": 0.15660693436215037, + "grad_norm": 0.7623772025108337, + "learning_rate": 1.8955953770919e-05, + "loss": 0.1824, "step": 3085 }, { - "epoch": 0.07843634979058256, - "grad_norm": 1.0445549488067627, - "learning_rate": 1.947709100139612e-05, - "loss": 0.256, + "epoch": 0.15686075435301283, + "grad_norm": 0.6760655641555786, + "learning_rate": 1.895426163764658e-05, + "loss": 0.2041, "step": 3090 }, { - "epoch": 0.07856326945043787, - "grad_norm": 1.1047964096069336, - "learning_rate": 1.9476244870330418e-05, - "loss": 0.2372, + "epoch": 0.15711457434387532, + "grad_norm": 0.6074882745742798, + "learning_rate": 1.8952569504374165e-05, + "loss": 0.19, "step": 3095 }, { - "epoch": 0.07869018911029318, - "grad_norm": 1.1287318468093872, - "learning_rate": 1.9475398739264716e-05, - "loss": 0.2282, + "epoch": 0.15736839433473782, + "grad_norm": 0.5363246202468872, + "learning_rate": 1.8950877371101752e-05, + "loss": 0.1732, "step": 3100 }, { - "epoch": 0.0788171087701485, - "grad_norm": 0.6673896312713623, - "learning_rate": 1.947455260819901e-05, - "loss": 0.2403, + "epoch": 0.15762221432560028, + "grad_norm": 0.9593762159347534, + "learning_rate": 1.8949185237829332e-05, + "loss": 0.1973, "step": 3105 }, { - "epoch": 0.07894402843000381, - "grad_norm": 1.3726545572280884, - "learning_rate": 1.947370647713331e-05, - "loss": 0.2467, + "epoch": 0.15787603431646277, + "grad_norm": 3.48103666305542, + "learning_rate": 1.8947493104556916e-05, + "loss": 0.2306, "step": 3110 }, { - "epoch": 0.07907094808985912, - "grad_norm": 1.705492615699768, - "learning_rate": 1.9472860346067608e-05, - "loss": 0.2395, + "epoch": 0.15812985430732523, + "grad_norm": 0.593743085861206, + "learning_rate": 1.89458009712845e-05, + "loss": 0.2023, "step": 3115 }, { - "epoch": 0.07919786774971443, - "grad_norm": 1.3698228597640991, - "learning_rate": 1.9472014215001906e-05, - "loss": 0.2794, + "epoch": 0.15838367429818773, + "grad_norm": 0.8416429758071899, + "learning_rate": 1.8944108838012083e-05, + "loss": 0.1661, "step": 3120 }, { - "epoch": 0.07932478740956975, - "grad_norm": 0.7300431132316589, - "learning_rate": 1.94711680839362e-05, - "loss": 0.2355, + "epoch": 0.15863749428905022, + "grad_norm": 0.5875362157821655, + "learning_rate": 1.894241670473967e-05, + "loss": 0.1693, "step": 3125 }, { - "epoch": 0.07945170706942506, - "grad_norm": 0.963846743106842, - "learning_rate": 1.94703219528705e-05, - "loss": 0.195, + "epoch": 0.15889131427991268, + "grad_norm": 0.5337428450584412, + "learning_rate": 1.894072457146725e-05, + "loss": 0.2014, "step": 3130 }, { - "epoch": 0.07957862672928037, - "grad_norm": 1.0164843797683716, - "learning_rate": 1.9469475821804798e-05, - "loss": 0.2448, + "epoch": 0.15914513427077517, + "grad_norm": 0.705847978591919, + "learning_rate": 1.8939032438194833e-05, + "loss": 0.1769, "step": 3135 }, { - "epoch": 0.07970554638913567, - "grad_norm": 0.8222649693489075, - "learning_rate": 1.9468629690739097e-05, - "loss": 0.2267, + "epoch": 0.15939895426163764, + "grad_norm": 1.11298406124115, + "learning_rate": 1.8937340304922417e-05, + "loss": 0.1957, "step": 3140 }, { - "epoch": 0.07983246604899098, - "grad_norm": 0.8104264736175537, - "learning_rate": 1.9467783559673395e-05, - "loss": 0.2222, + "epoch": 0.15965277425250013, + "grad_norm": 0.7969094514846802, + "learning_rate": 1.893564817165e-05, + "loss": 0.2062, "step": 3145 }, { - "epoch": 0.0799593857088463, - "grad_norm": 0.8509145975112915, - "learning_rate": 1.9466937428607693e-05, - "loss": 0.2435, + "epoch": 0.15990659424336262, + "grad_norm": 0.6891659498214722, + "learning_rate": 1.8933956038377584e-05, + "loss": 0.1837, "step": 3150 }, { - "epoch": 0.08008630536870161, - "grad_norm": 1.4300439357757568, - "learning_rate": 1.9466091297541992e-05, - "loss": 0.2438, + "epoch": 0.16016041423422508, + "grad_norm": 0.7349233627319336, + "learning_rate": 1.8932263905105167e-05, + "loss": 0.1962, "step": 3155 }, { - "epoch": 0.08021322502855692, - "grad_norm": 1.3444732427597046, - "learning_rate": 1.946524516647629e-05, - "loss": 0.2802, + "epoch": 0.16041423422508758, + "grad_norm": 0.6409569382667542, + "learning_rate": 1.893057177183275e-05, + "loss": 0.1762, "step": 3160 }, { - "epoch": 0.08034014468841223, - "grad_norm": 1.9719382524490356, - "learning_rate": 1.9464399035410585e-05, - "loss": 0.2043, + "epoch": 0.16066805421595004, + "grad_norm": 0.6636890769004822, + "learning_rate": 1.8928879638560335e-05, + "loss": 0.1863, "step": 3165 }, { - "epoch": 0.08046706434826754, - "grad_norm": 1.374517560005188, - "learning_rate": 1.9463552904344884e-05, - "loss": 0.2316, + "epoch": 0.16092187420681253, + "grad_norm": 0.6937609314918518, + "learning_rate": 1.8927187505287918e-05, + "loss": 0.1784, "step": 3170 }, { - "epoch": 0.08059398400812286, - "grad_norm": 1.2833272218704224, - "learning_rate": 1.9462706773279182e-05, - "loss": 0.2507, + "epoch": 0.161175694197675, + "grad_norm": 0.5253648161888123, + "learning_rate": 1.89254953720155e-05, + "loss": 0.1917, "step": 3175 }, { - "epoch": 0.08072090366797817, - "grad_norm": 1.14992094039917, - "learning_rate": 1.946186064221348e-05, - "loss": 0.2257, + "epoch": 0.1614295141885375, + "grad_norm": 0.6246563196182251, + "learning_rate": 1.8923803238743085e-05, + "loss": 0.1687, "step": 3180 }, { - "epoch": 0.08084782332783348, - "grad_norm": 0.8860575556755066, - "learning_rate": 1.946101451114778e-05, - "loss": 0.2467, + "epoch": 0.16168333417939998, + "grad_norm": 0.664107084274292, + "learning_rate": 1.892211110547067e-05, + "loss": 0.1761, "step": 3185 }, { - "epoch": 0.0809747429876888, - "grad_norm": 0.952331006526947, - "learning_rate": 1.9460168380082077e-05, - "loss": 0.223, + "epoch": 0.16193715417026244, + "grad_norm": 0.7611233592033386, + "learning_rate": 1.8920418972198252e-05, + "loss": 0.1977, "step": 3190 }, { - "epoch": 0.0811016626475441, - "grad_norm": 1.3879573345184326, - "learning_rate": 1.9459322249016376e-05, - "loss": 0.2529, + "epoch": 0.16219097416112493, + "grad_norm": 0.6064574718475342, + "learning_rate": 1.8918726838925836e-05, + "loss": 0.1824, "step": 3195 }, { - "epoch": 0.08122858230739942, - "grad_norm": 1.8083316087722778, - "learning_rate": 1.9458476117950674e-05, - "loss": 0.1969, + "epoch": 0.1624447941519874, + "grad_norm": 0.8105899691581726, + "learning_rate": 1.891703470565342e-05, + "loss": 0.1892, "step": 3200 }, { - "epoch": 0.08135550196725473, - "grad_norm": 0.8846744298934937, - "learning_rate": 1.945762998688497e-05, - "loss": 0.24, + "epoch": 0.1626986141428499, + "grad_norm": 0.6670052409172058, + "learning_rate": 1.8915342572381003e-05, + "loss": 0.1657, "step": 3205 }, { - "epoch": 0.08148242162711004, - "grad_norm": 0.6902865171432495, - "learning_rate": 1.9456783855819267e-05, - "loss": 0.2273, + "epoch": 0.16295243413371238, + "grad_norm": 0.8309715986251831, + "learning_rate": 1.8913650439108586e-05, + "loss": 0.1826, "step": 3210 }, { - "epoch": 0.08160934128696536, - "grad_norm": 1.0633777379989624, - "learning_rate": 1.9455937724753566e-05, - "loss": 0.2454, + "epoch": 0.16320625412457485, + "grad_norm": 0.6957319378852844, + "learning_rate": 1.891195830583617e-05, + "loss": 0.1981, "step": 3215 }, { - "epoch": 0.08173626094682067, - "grad_norm": 0.7138794660568237, - "learning_rate": 1.9455091593687864e-05, - "loss": 0.2538, + "epoch": 0.16346007411543734, + "grad_norm": 0.5244278311729431, + "learning_rate": 1.8910266172563753e-05, + "loss": 0.1835, "step": 3220 }, { - "epoch": 0.08186318060667598, - "grad_norm": 1.1508457660675049, - "learning_rate": 1.9454245462622163e-05, - "loss": 0.3882, + "epoch": 0.1637138941062998, + "grad_norm": 0.6687745451927185, + "learning_rate": 1.8908574039291337e-05, + "loss": 0.1818, "step": 3225 }, { - "epoch": 0.08199010026653128, - "grad_norm": 1.0387147665023804, - "learning_rate": 1.945339933155646e-05, - "loss": 0.2448, + "epoch": 0.1639677140971623, + "grad_norm": 0.5225896835327148, + "learning_rate": 1.890688190601892e-05, + "loss": 0.1972, "step": 3230 }, { - "epoch": 0.08211701992638659, - "grad_norm": 1.0170204639434814, - "learning_rate": 1.945255320049076e-05, - "loss": 0.2232, + "epoch": 0.16422153408802478, + "grad_norm": 0.71306973695755, + "learning_rate": 1.8905189772746504e-05, + "loss": 0.1793, "step": 3235 }, { - "epoch": 0.0822439395862419, - "grad_norm": 0.9592982530593872, - "learning_rate": 1.9451707069425058e-05, - "loss": 0.2216, + "epoch": 0.16447535407888725, + "grad_norm": 0.6916504502296448, + "learning_rate": 1.8903497639474088e-05, + "loss": 0.1796, "step": 3240 }, { - "epoch": 0.08237085924609722, - "grad_norm": 1.6328798532485962, - "learning_rate": 1.9450860938359353e-05, - "loss": 0.2819, + "epoch": 0.16472917406974974, + "grad_norm": 0.7372540831565857, + "learning_rate": 1.8901805506201668e-05, + "loss": 0.1891, "step": 3245 }, { - "epoch": 0.08249777890595253, - "grad_norm": 0.9943515658378601, - "learning_rate": 1.945001480729365e-05, - "loss": 0.2436, + "epoch": 0.1649829940606122, + "grad_norm": 0.5736752152442932, + "learning_rate": 1.8900113372929255e-05, + "loss": 0.2053, "step": 3250 }, { - "epoch": 0.08262469856580784, - "grad_norm": 1.1356008052825928, - "learning_rate": 1.944916867622795e-05, - "loss": 0.2299, + "epoch": 0.1652368140514747, + "grad_norm": 0.5286284685134888, + "learning_rate": 1.889842123965684e-05, + "loss": 0.1989, "step": 3255 }, { - "epoch": 0.08275161822566315, - "grad_norm": 0.9671212434768677, - "learning_rate": 1.9448322545162248e-05, - "loss": 0.2834, + "epoch": 0.1654906340423372, + "grad_norm": 0.9421578049659729, + "learning_rate": 1.889672910638442e-05, + "loss": 0.2002, "step": 3260 }, { - "epoch": 0.08287853788551847, - "grad_norm": 1.284661889076233, - "learning_rate": 1.9447476414096543e-05, - "loss": 0.2622, + "epoch": 0.16574445403319965, + "grad_norm": 0.721328616142273, + "learning_rate": 1.8895036973112005e-05, + "loss": 0.1938, "step": 3265 }, { - "epoch": 0.08300545754537378, - "grad_norm": 0.9568707346916199, - "learning_rate": 1.944663028303084e-05, - "loss": 0.209, + "epoch": 0.16599827402406214, + "grad_norm": 0.6766708493232727, + "learning_rate": 1.8893344839839586e-05, + "loss": 0.1981, "step": 3270 }, { - "epoch": 0.08313237720522909, - "grad_norm": 0.6576051712036133, - "learning_rate": 1.944578415196514e-05, - "loss": 0.2154, + "epoch": 0.1662520940149246, + "grad_norm": 0.7163949608802795, + "learning_rate": 1.8891652706567172e-05, + "loss": 0.1856, "step": 3275 }, { - "epoch": 0.0832592968650844, - "grad_norm": 1.526079773902893, - "learning_rate": 1.9444938020899438e-05, - "loss": 0.3078, + "epoch": 0.1665059140057871, + "grad_norm": 0.673416793346405, + "learning_rate": 1.8889960573294756e-05, + "loss": 0.1928, "step": 3280 }, { - "epoch": 0.08338621652493972, - "grad_norm": 1.0187851190567017, - "learning_rate": 1.9444091889833737e-05, - "loss": 0.2061, + "epoch": 0.1667597339966496, + "grad_norm": 0.6823815703392029, + "learning_rate": 1.8888268440022336e-05, + "loss": 0.1953, "step": 3285 }, { - "epoch": 0.08351313618479503, - "grad_norm": 0.9445210695266724, - "learning_rate": 1.9443245758768035e-05, - "loss": 0.234, + "epoch": 0.16701355398751205, + "grad_norm": 0.6587371826171875, + "learning_rate": 1.8886576306749923e-05, + "loss": 0.1985, "step": 3290 }, { - "epoch": 0.08364005584465034, - "grad_norm": 44.54368209838867, - "learning_rate": 1.9442399627702333e-05, - "loss": 0.2196, + "epoch": 0.16726737397837455, + "grad_norm": 0.7395918965339661, + "learning_rate": 1.8884884173477503e-05, + "loss": 0.1859, "step": 3295 }, { - "epoch": 0.08376697550450565, - "grad_norm": 1.3533403873443604, - "learning_rate": 1.9441553496636632e-05, - "loss": 0.226, + "epoch": 0.167521193969237, + "grad_norm": 0.6410045623779297, + "learning_rate": 1.8883192040205087e-05, + "loss": 0.1916, "step": 3300 }, { - "epoch": 0.08389389516436097, - "grad_norm": 0.8056994080543518, - "learning_rate": 1.9440707365570927e-05, - "loss": 0.2665, + "epoch": 0.1677750139600995, + "grad_norm": 0.5092994570732117, + "learning_rate": 1.8881499906932674e-05, + "loss": 0.1782, "step": 3305 }, { - "epoch": 0.08402081482421628, - "grad_norm": 0.8996313810348511, - "learning_rate": 1.9439861234505225e-05, - "loss": 0.2321, + "epoch": 0.16802883395096196, + "grad_norm": 0.6259738206863403, + "learning_rate": 1.8879807773660254e-05, + "loss": 0.2074, "step": 3310 }, { - "epoch": 0.08414773448407158, - "grad_norm": 0.8883455991744995, - "learning_rate": 1.9439015103439524e-05, - "loss": 0.2136, + "epoch": 0.16828265394182446, + "grad_norm": 0.8457812070846558, + "learning_rate": 1.8878115640387837e-05, + "loss": 0.1983, "step": 3315 }, { - "epoch": 0.08427465414392689, - "grad_norm": 1.2427653074264526, - "learning_rate": 1.9438168972373822e-05, - "loss": 0.2139, + "epoch": 0.16853647393268695, + "grad_norm": 0.4995182752609253, + "learning_rate": 1.887642350711542e-05, + "loss": 0.1781, "step": 3320 }, { - "epoch": 0.0844015738037822, - "grad_norm": 1.4158670902252197, - "learning_rate": 1.943732284130812e-05, - "loss": 0.2256, + "epoch": 0.1687902939235494, + "grad_norm": 0.6008071899414062, + "learning_rate": 1.8874731373843005e-05, + "loss": 0.1904, "step": 3325 }, { - "epoch": 0.08452849346363751, - "grad_norm": 1.7367017269134521, - "learning_rate": 1.943647671024242e-05, - "loss": 0.2127, + "epoch": 0.1690441139144119, + "grad_norm": 0.7149403691291809, + "learning_rate": 1.8873039240570588e-05, + "loss": 0.1846, "step": 3330 }, { - "epoch": 0.08465541312349283, - "grad_norm": 0.7993677258491516, - "learning_rate": 1.9435630579176717e-05, - "loss": 0.2332, + "epoch": 0.16929793390527437, + "grad_norm": 1.0425550937652588, + "learning_rate": 1.887134710729817e-05, + "loss": 0.1979, "step": 3335 }, { - "epoch": 0.08478233278334814, - "grad_norm": 0.7845265865325928, - "learning_rate": 1.9434784448111016e-05, - "loss": 0.2242, + "epoch": 0.16955175389613686, + "grad_norm": 0.6860085725784302, + "learning_rate": 1.8869654974025755e-05, + "loss": 0.1808, "step": 3340 }, { - "epoch": 0.08490925244320345, - "grad_norm": 1.3306083679199219, - "learning_rate": 1.943393831704531e-05, - "loss": 0.24, + "epoch": 0.16980557388699935, + "grad_norm": 0.8324023485183716, + "learning_rate": 1.886796284075334e-05, + "loss": 0.2056, "step": 3345 }, { - "epoch": 0.08503617210305876, - "grad_norm": 1.273103952407837, - "learning_rate": 1.943309218597961e-05, - "loss": 0.2438, + "epoch": 0.17005939387786181, + "grad_norm": 0.6411967873573303, + "learning_rate": 1.8866270707480922e-05, + "loss": 0.1771, "step": 3350 }, { - "epoch": 0.08516309176291408, - "grad_norm": 1.091101050376892, - "learning_rate": 1.9432246054913908e-05, - "loss": 0.2224, + "epoch": 0.1703132138687243, + "grad_norm": 0.5328414440155029, + "learning_rate": 1.8864578574208506e-05, + "loss": 0.1957, "step": 3355 }, { - "epoch": 0.08529001142276939, - "grad_norm": 0.8079588413238525, - "learning_rate": 1.9431399923848206e-05, - "loss": 0.2175, + "epoch": 0.17056703385958677, + "grad_norm": 0.7603042125701904, + "learning_rate": 1.886288644093609e-05, + "loss": 0.2017, "step": 3360 }, { - "epoch": 0.0854169310826247, - "grad_norm": 1.592157006263733, - "learning_rate": 1.9430553792782504e-05, - "loss": 0.2439, + "epoch": 0.17082085385044926, + "grad_norm": 0.8825428485870361, + "learning_rate": 1.8861194307663673e-05, + "loss": 0.1989, "step": 3365 }, { - "epoch": 0.08554385074248001, - "grad_norm": 1.0686033964157104, - "learning_rate": 1.9429707661716803e-05, - "loss": 0.2091, + "epoch": 0.17107467384131175, + "grad_norm": 0.657129168510437, + "learning_rate": 1.8859502174391256e-05, + "loss": 0.1801, "step": 3370 }, { - "epoch": 0.08567077040233533, - "grad_norm": 0.7402673363685608, - "learning_rate": 1.94288615306511e-05, - "loss": 0.2167, + "epoch": 0.17132849383217422, + "grad_norm": 0.7885820269584656, + "learning_rate": 1.885781004111884e-05, + "loss": 0.2056, "step": 3375 }, { - "epoch": 0.08579769006219064, - "grad_norm": 1.2086679935455322, - "learning_rate": 1.94280153995854e-05, - "loss": 0.2417, + "epoch": 0.1715823138230367, + "grad_norm": 0.5244271159172058, + "learning_rate": 1.8856117907846424e-05, + "loss": 0.1646, "step": 3380 }, { - "epoch": 0.08592460972204595, - "grad_norm": 0.9473960995674133, - "learning_rate": 1.9427169268519695e-05, - "loss": 0.2328, + "epoch": 0.17183613381389917, + "grad_norm": 0.6291254758834839, + "learning_rate": 1.8854425774574007e-05, + "loss": 0.1832, "step": 3385 }, { - "epoch": 0.08605152938190126, - "grad_norm": 1.093590497970581, - "learning_rate": 1.9426323137453993e-05, - "loss": 0.226, + "epoch": 0.17208995380476166, + "grad_norm": 0.6272834539413452, + "learning_rate": 1.885273364130159e-05, + "loss": 0.1888, "step": 3390 }, { - "epoch": 0.08617844904175657, - "grad_norm": 1.016284704208374, - "learning_rate": 1.942547700638829e-05, - "loss": 0.2441, + "epoch": 0.17234377379562416, + "grad_norm": 0.727254331111908, + "learning_rate": 1.8851041508029174e-05, + "loss": 0.1812, "step": 3395 }, { - "epoch": 0.08630536870161187, - "grad_norm": 0.9065755009651184, - "learning_rate": 1.942463087532259e-05, - "loss": 0.2195, + "epoch": 0.17259759378648662, + "grad_norm": 0.8115093111991882, + "learning_rate": 1.8849349374756758e-05, + "loss": 0.1872, "step": 3400 }, { - "epoch": 0.08643228836146719, - "grad_norm": 1.2055087089538574, - "learning_rate": 1.9423784744256885e-05, - "loss": 0.236, + "epoch": 0.1728514137773491, + "grad_norm": 0.6561703085899353, + "learning_rate": 1.884765724148434e-05, + "loss": 0.1813, "step": 3405 }, { - "epoch": 0.0865592080213225, - "grad_norm": 1.060442566871643, - "learning_rate": 1.9422938613191183e-05, - "loss": 0.2267, + "epoch": 0.17310523376821157, + "grad_norm": 0.5098863840103149, + "learning_rate": 1.8845965108211925e-05, + "loss": 0.1873, "step": 3410 }, { - "epoch": 0.08668612768117781, - "grad_norm": 2.648592948913574, - "learning_rate": 1.942209248212548e-05, - "loss": 0.2616, + "epoch": 0.17335905375907407, + "grad_norm": 0.6273576021194458, + "learning_rate": 1.884427297493951e-05, + "loss": 0.1893, "step": 3415 }, { - "epoch": 0.08681304734103312, - "grad_norm": 0.9016095995903015, - "learning_rate": 1.942124635105978e-05, - "loss": 0.2279, + "epoch": 0.17361287374993656, + "grad_norm": 0.622138261795044, + "learning_rate": 1.8842580841667092e-05, + "loss": 0.1596, "step": 3420 }, { - "epoch": 0.08693996700088844, - "grad_norm": 1.255208134651184, - "learning_rate": 1.942040021999408e-05, - "loss": 0.231, + "epoch": 0.17386669374079902, + "grad_norm": 0.8276758790016174, + "learning_rate": 1.8840888708394675e-05, + "loss": 0.1774, "step": 3425 }, { - "epoch": 0.08706688666074375, - "grad_norm": 0.8711854815483093, - "learning_rate": 1.9419554088928377e-05, - "loss": 0.2093, + "epoch": 0.1741205137316615, + "grad_norm": 0.6109856963157654, + "learning_rate": 1.883919657512226e-05, + "loss": 0.1927, "step": 3430 }, { - "epoch": 0.08719380632059906, - "grad_norm": 0.8700282573699951, - "learning_rate": 1.9418707957862675e-05, - "loss": 0.233, + "epoch": 0.17437433372252398, + "grad_norm": 0.6221029758453369, + "learning_rate": 1.8837504441849843e-05, + "loss": 0.1951, "step": 3435 }, { - "epoch": 0.08732072598045437, - "grad_norm": 0.8836633563041687, - "learning_rate": 1.9417861826796974e-05, - "loss": 0.2398, + "epoch": 0.17462815371338647, + "grad_norm": 0.567482590675354, + "learning_rate": 1.8835812308577426e-05, + "loss": 0.1848, "step": 3440 }, { - "epoch": 0.08744764564030968, - "grad_norm": 1.0759438276290894, - "learning_rate": 1.941701569573127e-05, - "loss": 0.1947, + "epoch": 0.17488197370424893, + "grad_norm": 0.7713445425033569, + "learning_rate": 1.883412017530501e-05, + "loss": 0.1676, "step": 3445 }, { - "epoch": 0.087574565300165, - "grad_norm": 0.8483268618583679, - "learning_rate": 1.9416169564665567e-05, - "loss": 0.2152, + "epoch": 0.17513579369511142, + "grad_norm": 0.6191247701644897, + "learning_rate": 1.883242804203259e-05, + "loss": 0.1867, "step": 3450 }, { - "epoch": 0.08770148496002031, - "grad_norm": 1.421238660812378, - "learning_rate": 1.9415323433599865e-05, - "loss": 0.265, + "epoch": 0.17538961368597392, + "grad_norm": 0.40963295102119446, + "learning_rate": 1.8830735908760177e-05, + "loss": 0.1683, "step": 3455 }, { - "epoch": 0.08782840461987562, - "grad_norm": 1.194441795349121, - "learning_rate": 1.9414477302534164e-05, - "loss": 0.2418, + "epoch": 0.17564343367683638, + "grad_norm": 0.5506500601768494, + "learning_rate": 1.882904377548776e-05, + "loss": 0.1755, "step": 3460 }, { - "epoch": 0.08795532427973093, - "grad_norm": 0.8135920166969299, - "learning_rate": 1.9413631171468462e-05, - "loss": 0.2771, + "epoch": 0.17589725366769887, + "grad_norm": 0.5656499266624451, + "learning_rate": 1.8827351642215344e-05, + "loss": 0.1856, "step": 3465 }, { - "epoch": 0.08808224393958625, - "grad_norm": 1.0236079692840576, - "learning_rate": 1.941278504040276e-05, - "loss": 0.2727, + "epoch": 0.17615107365856134, + "grad_norm": 0.9259144067764282, + "learning_rate": 1.8825659508942927e-05, + "loss": 0.1683, "step": 3470 }, { - "epoch": 0.08820916359944156, - "grad_norm": 0.7948024868965149, - "learning_rate": 1.941193890933706e-05, - "loss": 0.2428, + "epoch": 0.17640489364942383, + "grad_norm": 0.6214718818664551, + "learning_rate": 1.8823967375670507e-05, + "loss": 0.1877, "step": 3475 }, { - "epoch": 0.08833608325929687, - "grad_norm": 1.0229400396347046, - "learning_rate": 1.9411092778271357e-05, - "loss": 0.2359, + "epoch": 0.17665871364028632, + "grad_norm": 0.6122065186500549, + "learning_rate": 1.8822275242398094e-05, + "loss": 0.1823, "step": 3480 }, { - "epoch": 0.08846300291915217, - "grad_norm": 1.1537843942642212, - "learning_rate": 1.9410246647205652e-05, - "loss": 0.2565, + "epoch": 0.17691253363114878, + "grad_norm": 0.5101046562194824, + "learning_rate": 1.8820583109125678e-05, + "loss": 0.1558, "step": 3485 }, { - "epoch": 0.08858992257900748, - "grad_norm": 1.1452876329421997, - "learning_rate": 1.940940051613995e-05, - "loss": 0.2524, + "epoch": 0.17716635362201127, + "grad_norm": 0.6742383241653442, + "learning_rate": 1.8818890975853258e-05, + "loss": 0.1767, "step": 3490 }, { - "epoch": 0.0887168422388628, - "grad_norm": 0.8211934566497803, - "learning_rate": 1.940855438507425e-05, - "loss": 0.2257, + "epoch": 0.17742017361287374, + "grad_norm": 0.7784730792045593, + "learning_rate": 1.8817198842580845e-05, + "loss": 0.1944, "step": 3495 }, { - "epoch": 0.08884376189871811, - "grad_norm": 0.9323396682739258, - "learning_rate": 1.9407708254008548e-05, - "loss": 0.2501, + "epoch": 0.17767399360373623, + "grad_norm": 0.5944966673851013, + "learning_rate": 1.8815506709308425e-05, + "loss": 0.1625, "step": 3500 }, { - "epoch": 0.08897068155857342, - "grad_norm": 1.0089243650436401, - "learning_rate": 1.9406862122942846e-05, - "loss": 0.2456, + "epoch": 0.17792781359459872, + "grad_norm": 0.5932590961456299, + "learning_rate": 1.881381457603601e-05, + "loss": 0.1994, "step": 3505 }, { - "epoch": 0.08909760121842873, - "grad_norm": 1.2962764501571655, - "learning_rate": 1.9406015991877144e-05, - "loss": 0.2188, + "epoch": 0.17818163358546119, + "grad_norm": 0.7129168510437012, + "learning_rate": 1.8812122442763596e-05, + "loss": 0.2029, "step": 3510 }, { - "epoch": 0.08922452087828404, - "grad_norm": 0.8558183312416077, - "learning_rate": 1.9405169860811443e-05, - "loss": 0.251, + "epoch": 0.17843545357632368, + "grad_norm": 1.4822226762771606, + "learning_rate": 1.8810430309491176e-05, + "loss": 0.1733, "step": 3515 }, { - "epoch": 0.08935144053813936, - "grad_norm": 0.7986130714416504, - "learning_rate": 1.940432372974574e-05, - "loss": 0.2066, + "epoch": 0.17868927356718614, + "grad_norm": 0.5432773232460022, + "learning_rate": 1.8808738176218763e-05, + "loss": 0.1995, "step": 3520 }, { - "epoch": 0.08947836019799467, - "grad_norm": 0.815967321395874, - "learning_rate": 1.9403477598680036e-05, - "loss": 0.2384, + "epoch": 0.17894309355804863, + "grad_norm": 0.6018402576446533, + "learning_rate": 1.8807046042946343e-05, + "loss": 0.1706, "step": 3525 }, { - "epoch": 0.08960527985784998, - "grad_norm": 0.815326988697052, - "learning_rate": 1.9402631467614335e-05, - "loss": 0.2562, + "epoch": 0.17919691354891112, + "grad_norm": 0.7911956906318665, + "learning_rate": 1.8805353909673926e-05, + "loss": 0.2016, "step": 3530 }, { - "epoch": 0.0897321995177053, - "grad_norm": 1.0299516916275024, - "learning_rate": 1.9401785336548633e-05, - "loss": 0.2675, + "epoch": 0.1794507335397736, + "grad_norm": 0.9234727621078491, + "learning_rate": 1.880366177640151e-05, + "loss": 0.1936, "step": 3535 }, { - "epoch": 0.0898591191775606, - "grad_norm": 1.1873550415039062, - "learning_rate": 1.940093920548293e-05, - "loss": 0.2479, + "epoch": 0.17970455353063608, + "grad_norm": 0.5961397886276245, + "learning_rate": 1.8801969643129094e-05, + "loss": 0.1628, "step": 3540 }, { - "epoch": 0.08998603883741592, - "grad_norm": 0.9314694404602051, - "learning_rate": 1.9400093074417226e-05, - "loss": 0.2187, + "epoch": 0.17995837352149854, + "grad_norm": 0.689795196056366, + "learning_rate": 1.8800277509856677e-05, + "loss": 0.1835, "step": 3545 }, { - "epoch": 0.09011295849727123, - "grad_norm": 0.9607328772544861, - "learning_rate": 1.9399246943351525e-05, - "loss": 0.2146, + "epoch": 0.18021219351236104, + "grad_norm": 0.538791835308075, + "learning_rate": 1.879858537658426e-05, + "loss": 0.185, "step": 3550 }, { - "epoch": 0.09023987815712654, - "grad_norm": 0.8027885556221008, - "learning_rate": 1.9398400812285823e-05, - "loss": 0.213, + "epoch": 0.18046601350322353, + "grad_norm": 0.6588658094406128, + "learning_rate": 1.8796893243311844e-05, + "loss": 0.1787, "step": 3555 }, { - "epoch": 0.09036679781698186, - "grad_norm": 0.7053439617156982, - "learning_rate": 1.939755468122012e-05, - "loss": 0.2705, + "epoch": 0.180719833494086, + "grad_norm": 0.5752840042114258, + "learning_rate": 1.8795201110039428e-05, + "loss": 0.1643, "step": 3560 }, { - "epoch": 0.09049371747683717, - "grad_norm": 1.274819254875183, - "learning_rate": 1.939670855015442e-05, - "loss": 0.2542, + "epoch": 0.18097365348494848, + "grad_norm": 1.0449694395065308, + "learning_rate": 1.879350897676701e-05, + "loss": 0.167, "step": 3565 }, { - "epoch": 0.09062063713669247, - "grad_norm": 1.2390090227127075, - "learning_rate": 1.939586241908872e-05, - "loss": 0.2244, + "epoch": 0.18122747347581095, + "grad_norm": 0.5879854559898376, + "learning_rate": 1.8791816843494595e-05, + "loss": 0.1879, "step": 3570 }, { - "epoch": 0.09074755679654778, - "grad_norm": 1.2375258207321167, - "learning_rate": 1.9395016288023017e-05, - "loss": 0.2199, + "epoch": 0.18148129346667344, + "grad_norm": 0.782319962978363, + "learning_rate": 1.879012471022218e-05, + "loss": 0.1702, "step": 3575 }, { - "epoch": 0.09087447645640309, - "grad_norm": 0.9994595646858215, - "learning_rate": 1.9394170156957315e-05, - "loss": 0.2084, + "epoch": 0.1817351134575359, + "grad_norm": 0.6967921853065491, + "learning_rate": 1.8788432576949762e-05, + "loss": 0.1811, "step": 3580 }, { - "epoch": 0.0910013961162584, - "grad_norm": 1.390851378440857, - "learning_rate": 1.939332402589161e-05, - "loss": 0.2577, + "epoch": 0.1819889334483984, + "grad_norm": 0.6189330220222473, + "learning_rate": 1.8786740443677345e-05, + "loss": 0.1682, "step": 3585 }, { - "epoch": 0.09112831577611372, - "grad_norm": 1.4203026294708252, - "learning_rate": 1.939247789482591e-05, - "loss": 0.2183, + "epoch": 0.18224275343926088, + "grad_norm": 1.0096155405044556, + "learning_rate": 1.878504831040493e-05, + "loss": 0.1538, "step": 3590 }, { - "epoch": 0.09125523543596903, - "grad_norm": 0.9023216366767883, - "learning_rate": 1.9391631763760207e-05, - "loss": 0.2287, + "epoch": 0.18249657343012335, + "grad_norm": 0.9320985078811646, + "learning_rate": 1.8783356177132513e-05, + "loss": 0.1963, "step": 3595 }, { - "epoch": 0.09138215509582434, - "grad_norm": 0.6915572881698608, - "learning_rate": 1.9390785632694505e-05, - "loss": 0.2265, + "epoch": 0.18275039342098584, + "grad_norm": 0.6774333119392395, + "learning_rate": 1.8781664043860096e-05, + "loss": 0.1907, "step": 3600 }, { - "epoch": 0.09150907475567965, - "grad_norm": 1.3944305181503296, - "learning_rate": 1.9389939501628804e-05, - "loss": 0.2295, + "epoch": 0.1830042134118483, + "grad_norm": 0.7954360246658325, + "learning_rate": 1.877997191058768e-05, + "loss": 0.2024, "step": 3605 }, { - "epoch": 0.09163599441553497, - "grad_norm": 0.8488020300865173, - "learning_rate": 1.9389093370563102e-05, - "loss": 0.2171, + "epoch": 0.1832580334027108, + "grad_norm": 0.5827684998512268, + "learning_rate": 1.8778279777315263e-05, + "loss": 0.1882, "step": 3610 }, { - "epoch": 0.09176291407539028, - "grad_norm": 0.8288318514823914, - "learning_rate": 1.93882472394974e-05, - "loss": 0.1795, + "epoch": 0.1835118533935733, + "grad_norm": 0.9476773738861084, + "learning_rate": 1.8776587644042847e-05, + "loss": 0.2031, "step": 3615 }, { - "epoch": 0.09188983373524559, - "grad_norm": 0.7818716764450073, - "learning_rate": 1.93874011084317e-05, - "loss": 0.2343, + "epoch": 0.18376567338443575, + "grad_norm": 0.5946481227874756, + "learning_rate": 1.877489551077043e-05, + "loss": 0.1796, "step": 3620 }, { - "epoch": 0.0920167533951009, - "grad_norm": 0.8118691444396973, - "learning_rate": 1.9386554977365998e-05, - "loss": 0.2033, + "epoch": 0.18401949337529824, + "grad_norm": 0.9748368859291077, + "learning_rate": 1.8773203377498014e-05, + "loss": 0.1859, "step": 3625 }, { - "epoch": 0.09214367305495622, - "grad_norm": 0.7367107272148132, - "learning_rate": 1.9385708846300293e-05, - "loss": 0.2245, + "epoch": 0.1842733133661607, + "grad_norm": 0.680027425289154, + "learning_rate": 1.8771511244225597e-05, + "loss": 0.1949, "step": 3630 }, { - "epoch": 0.09227059271481153, - "grad_norm": 1.0515329837799072, - "learning_rate": 1.938486271523459e-05, - "loss": 0.1968, + "epoch": 0.1845271333570232, + "grad_norm": 0.7523426413536072, + "learning_rate": 1.876981911095318e-05, + "loss": 0.1911, "step": 3635 }, { - "epoch": 0.09239751237466684, - "grad_norm": 0.763935387134552, - "learning_rate": 1.938401658416889e-05, - "loss": 0.2212, + "epoch": 0.1847809533478857, + "grad_norm": 0.6500402092933655, + "learning_rate": 1.8768126977680764e-05, + "loss": 0.163, "step": 3640 }, { - "epoch": 0.09252443203452215, - "grad_norm": 2.0606870651245117, - "learning_rate": 1.9383170453103188e-05, - "loss": 0.225, + "epoch": 0.18503477333874815, + "grad_norm": 0.8817210793495178, + "learning_rate": 1.8766434844408348e-05, + "loss": 0.1681, "step": 3645 }, { - "epoch": 0.09265135169437747, - "grad_norm": 1.007465124130249, - "learning_rate": 1.9382324322037486e-05, - "loss": 0.2412, + "epoch": 0.18528859332961065, + "grad_norm": 0.6788656115531921, + "learning_rate": 1.876474271113593e-05, + "loss": 0.1521, "step": 3650 }, { - "epoch": 0.09277827135423278, - "grad_norm": 0.9042555093765259, - "learning_rate": 1.9381478190971785e-05, - "loss": 0.2216, + "epoch": 0.1855424133204731, + "grad_norm": 0.5651068687438965, + "learning_rate": 1.8763050577863512e-05, + "loss": 0.167, "step": 3655 }, { - "epoch": 0.09290519101408808, - "grad_norm": 1.005330204963684, - "learning_rate": 1.9380632059906083e-05, - "loss": 0.2593, + "epoch": 0.1857962333113356, + "grad_norm": 0.6204758286476135, + "learning_rate": 1.87613584445911e-05, + "loss": 0.1718, "step": 3660 }, { - "epoch": 0.09303211067394339, - "grad_norm": 0.9452401399612427, - "learning_rate": 1.937978592884038e-05, - "loss": 0.2471, + "epoch": 0.1860500533021981, + "grad_norm": 0.6937487721443176, + "learning_rate": 1.8759666311318682e-05, + "loss": 0.2, "step": 3665 }, { - "epoch": 0.0931590303337987, - "grad_norm": 1.048919916152954, - "learning_rate": 1.9378939797774676e-05, - "loss": 0.184, + "epoch": 0.18630387329306056, + "grad_norm": 0.791344165802002, + "learning_rate": 1.8757974178046266e-05, + "loss": 0.1746, "step": 3670 }, { - "epoch": 0.09328594999365401, - "grad_norm": 3.530853271484375, - "learning_rate": 1.9378093666708975e-05, - "loss": 0.2179, + "epoch": 0.18655769328392305, + "grad_norm": 0.7099347114562988, + "learning_rate": 1.875628204477385e-05, + "loss": 0.1779, "step": 3675 }, { - "epoch": 0.09341286965350933, - "grad_norm": 0.9165559411048889, - "learning_rate": 1.9377247535643273e-05, - "loss": 0.2257, + "epoch": 0.1868115132747855, + "grad_norm": 0.6113842129707336, + "learning_rate": 1.875458991150143e-05, + "loss": 0.1821, "step": 3680 }, { - "epoch": 0.09353978931336464, - "grad_norm": 0.7260668277740479, - "learning_rate": 1.937640140457757e-05, - "loss": 0.2259, + "epoch": 0.187065333265648, + "grad_norm": 0.6101746559143066, + "learning_rate": 1.8752897778229016e-05, + "loss": 0.1616, "step": 3685 }, { - "epoch": 0.09366670897321995, - "grad_norm": 0.8369659185409546, - "learning_rate": 1.9375555273511867e-05, - "loss": 0.2177, + "epoch": 0.1873191532565105, + "grad_norm": 0.7758021354675293, + "learning_rate": 1.87512056449566e-05, + "loss": 0.1774, "step": 3690 }, { - "epoch": 0.09379362863307526, - "grad_norm": 1.2068604230880737, - "learning_rate": 1.9374709142446165e-05, - "loss": 0.2534, + "epoch": 0.18757297324737296, + "grad_norm": 0.632546603679657, + "learning_rate": 1.874951351168418e-05, + "loss": 0.1653, "step": 3695 }, { - "epoch": 0.09392054829293058, - "grad_norm": 0.9643564820289612, - "learning_rate": 1.9373863011380463e-05, - "loss": 0.2115, + "epoch": 0.18782679323823545, + "grad_norm": 1.211270809173584, + "learning_rate": 1.8747821378411767e-05, + "loss": 0.1907, "step": 3700 }, { - "epoch": 0.09404746795278589, - "grad_norm": 0.9956749677658081, - "learning_rate": 1.9373016880314762e-05, - "loss": 0.2632, + "epoch": 0.18808061322909791, + "grad_norm": 0.8178271055221558, + "learning_rate": 1.8746129245139347e-05, + "loss": 0.1833, "step": 3705 }, { - "epoch": 0.0941743876126412, - "grad_norm": 0.9080381393432617, - "learning_rate": 1.937217074924906e-05, - "loss": 0.2019, + "epoch": 0.1883344332199604, + "grad_norm": 0.720481276512146, + "learning_rate": 1.8744437111866934e-05, + "loss": 0.1865, "step": 3710 }, { - "epoch": 0.09430130727249651, - "grad_norm": 0.8534904718399048, - "learning_rate": 1.937132461818336e-05, - "loss": 0.2347, + "epoch": 0.1885882532108229, + "grad_norm": 0.6861073970794678, + "learning_rate": 1.8742744978594514e-05, + "loss": 0.159, "step": 3715 }, { - "epoch": 0.09442822693235182, - "grad_norm": 1.936751365661621, - "learning_rate": 1.9370478487117657e-05, - "loss": 0.2245, + "epoch": 0.18884207320168536, + "grad_norm": 0.8942195773124695, + "learning_rate": 1.8741052845322098e-05, + "loss": 0.17, "step": 3720 }, { - "epoch": 0.09455514659220714, - "grad_norm": 1.1231223344802856, - "learning_rate": 1.9369632356051955e-05, - "loss": 0.2715, + "epoch": 0.18909589319254785, + "grad_norm": 0.6733516454696655, + "learning_rate": 1.8739360712049685e-05, + "loss": 0.2026, "step": 3725 }, { - "epoch": 0.09468206625206245, - "grad_norm": 1.120985746383667, - "learning_rate": 1.936878622498625e-05, - "loss": 0.234, + "epoch": 0.18934971318341032, + "grad_norm": 0.7968901991844177, + "learning_rate": 1.8737668578777265e-05, + "loss": 0.1768, "step": 3730 }, { - "epoch": 0.09480898591191776, - "grad_norm": 1.2166080474853516, - "learning_rate": 1.936794009392055e-05, - "loss": 0.2046, + "epoch": 0.1896035331742728, + "grad_norm": 0.7362722158432007, + "learning_rate": 1.873597644550485e-05, + "loss": 0.1793, "step": 3735 }, { - "epoch": 0.09493590557177307, - "grad_norm": 0.8336555361747742, - "learning_rate": 1.9367093962854847e-05, - "loss": 0.2254, + "epoch": 0.18985735316513527, + "grad_norm": 0.5821983814239502, + "learning_rate": 1.8734284312232432e-05, + "loss": 0.1935, "step": 3740 }, { - "epoch": 0.09506282523162837, - "grad_norm": 0.8134741187095642, - "learning_rate": 1.9366247831789146e-05, - "loss": 0.2184, + "epoch": 0.19011117315599776, + "grad_norm": 0.6550444960594177, + "learning_rate": 1.8732592178960016e-05, + "loss": 0.1815, "step": 3745 }, { - "epoch": 0.09518974489148369, - "grad_norm": 0.7357302308082581, - "learning_rate": 1.9365401700723444e-05, - "loss": 0.2256, + "epoch": 0.19036499314686026, + "grad_norm": 1.057003140449524, + "learning_rate": 1.87309000456876e-05, + "loss": 0.1735, "step": 3750 }, { - "epoch": 0.095316664551339, - "grad_norm": 0.5998886823654175, - "learning_rate": 1.9364555569657742e-05, - "loss": 0.2134, + "epoch": 0.19061881313772272, + "grad_norm": 0.6318512558937073, + "learning_rate": 1.8729207912415183e-05, + "loss": 0.1894, "step": 3755 }, { - "epoch": 0.09544358421119431, - "grad_norm": 0.7712414860725403, - "learning_rate": 1.936370943859204e-05, - "loss": 0.2146, + "epoch": 0.1908726331285852, + "grad_norm": 0.514367401599884, + "learning_rate": 1.8727515779142766e-05, + "loss": 0.162, "step": 3760 }, { - "epoch": 0.09557050387104962, - "grad_norm": 0.9046329259872437, - "learning_rate": 1.936286330752634e-05, - "loss": 0.2484, + "epoch": 0.19112645311944768, + "grad_norm": 0.5353686809539795, + "learning_rate": 1.872582364587035e-05, + "loss": 0.1942, "step": 3765 }, { - "epoch": 0.09569742353090493, - "grad_norm": 0.7762267589569092, - "learning_rate": 1.9362017176460634e-05, - "loss": 0.2486, + "epoch": 0.19138027311031017, + "grad_norm": 0.7195748090744019, + "learning_rate": 1.8724131512597933e-05, + "loss": 0.1638, "step": 3770 }, { - "epoch": 0.09582434319076025, - "grad_norm": 1.1691628694534302, - "learning_rate": 1.9361171045394933e-05, - "loss": 0.2144, + "epoch": 0.19163409310117266, + "grad_norm": 0.5870863199234009, + "learning_rate": 1.8722439379325517e-05, + "loss": 0.1616, "step": 3775 }, { - "epoch": 0.09595126285061556, - "grad_norm": 1.2161072492599487, - "learning_rate": 1.936032491432923e-05, - "loss": 0.2701, + "epoch": 0.19188791309203512, + "grad_norm": 0.5601217150688171, + "learning_rate": 1.87207472460531e-05, + "loss": 0.175, "step": 3780 }, { - "epoch": 0.09607818251047087, - "grad_norm": 1.19119393825531, - "learning_rate": 1.935947878326353e-05, - "loss": 0.2202, + "epoch": 0.19214173308289761, + "grad_norm": 0.6515225768089294, + "learning_rate": 1.8719055112780684e-05, + "loss": 0.196, "step": 3785 }, { - "epoch": 0.09620510217032618, - "grad_norm": 0.7615955471992493, - "learning_rate": 1.9358632652197828e-05, - "loss": 0.2496, + "epoch": 0.19239555307376008, + "grad_norm": 0.5685335397720337, + "learning_rate": 1.8717362979508267e-05, + "loss": 0.173, "step": 3790 }, { - "epoch": 0.0963320218301815, - "grad_norm": 0.9187094569206238, - "learning_rate": 1.9357786521132126e-05, - "loss": 0.2038, + "epoch": 0.19264937306462257, + "grad_norm": 0.5485351085662842, + "learning_rate": 1.871567084623585e-05, + "loss": 0.1613, "step": 3795 }, { - "epoch": 0.09645894149003681, - "grad_norm": 0.6565961241722107, - "learning_rate": 1.9356940390066425e-05, - "loss": 0.1922, + "epoch": 0.19290319305548506, + "grad_norm": 0.5211474299430847, + "learning_rate": 1.8713978712963435e-05, + "loss": 0.1811, "step": 3800 }, { - "epoch": 0.09658586114989212, - "grad_norm": 0.8192458152770996, - "learning_rate": 1.9356094259000723e-05, - "loss": 0.2043, + "epoch": 0.19315701304634753, + "grad_norm": 0.5695779323577881, + "learning_rate": 1.8712286579691018e-05, + "loss": 0.1737, "step": 3805 }, { - "epoch": 0.09671278080974743, - "grad_norm": 0.7217807173728943, - "learning_rate": 1.9355248127935018e-05, - "loss": 0.2239, + "epoch": 0.19341083303721002, + "grad_norm": 0.781248152256012, + "learning_rate": 1.87105944464186e-05, + "loss": 0.1474, "step": 3810 }, { - "epoch": 0.09683970046960275, - "grad_norm": 0.9245862364768982, - "learning_rate": 1.9354401996869316e-05, - "loss": 0.2015, + "epoch": 0.19366465302807248, + "grad_norm": 0.6343421936035156, + "learning_rate": 1.8708902313146185e-05, + "loss": 0.1597, "step": 3815 }, { - "epoch": 0.09696662012945806, - "grad_norm": 1.2739198207855225, - "learning_rate": 1.9353555865803615e-05, - "loss": 0.2329, + "epoch": 0.19391847301893497, + "grad_norm": 0.46050354838371277, + "learning_rate": 1.870721017987377e-05, + "loss": 0.1726, "step": 3820 }, { - "epoch": 0.09709353978931337, - "grad_norm": 0.8015793561935425, - "learning_rate": 1.9352709734737913e-05, - "loss": 0.2095, + "epoch": 0.19417229300979746, + "grad_norm": 0.7671462297439575, + "learning_rate": 1.8705518046601352e-05, + "loss": 0.16, "step": 3825 }, { - "epoch": 0.09722045944916867, - "grad_norm": 0.8556388020515442, - "learning_rate": 1.9351863603672208e-05, - "loss": 0.223, + "epoch": 0.19442611300065993, + "grad_norm": 0.6018497347831726, + "learning_rate": 1.8703825913328936e-05, + "loss": 0.17, "step": 3830 }, { - "epoch": 0.09734737910902398, - "grad_norm": 1.0989891290664673, - "learning_rate": 1.9351017472606507e-05, - "loss": 0.2197, + "epoch": 0.19467993299152242, + "grad_norm": 0.5179815292358398, + "learning_rate": 1.870213378005652e-05, + "loss": 0.1667, "step": 3835 }, { - "epoch": 0.0974742987688793, - "grad_norm": 1.1333421468734741, - "learning_rate": 1.9350171341540805e-05, - "loss": 0.245, + "epoch": 0.19493375298238488, + "grad_norm": 0.6752147674560547, + "learning_rate": 1.8700441646784103e-05, + "loss": 0.1778, "step": 3840 }, { - "epoch": 0.0976012184287346, - "grad_norm": 2.533620834350586, - "learning_rate": 1.9349325210475103e-05, - "loss": 0.2759, + "epoch": 0.19518757297324738, + "grad_norm": 0.5998455286026001, + "learning_rate": 1.8698749513511686e-05, + "loss": 0.1804, "step": 3845 }, { - "epoch": 0.09772813808858992, - "grad_norm": 1.6387470960617065, - "learning_rate": 1.9348479079409402e-05, - "loss": 0.2351, + "epoch": 0.19544139296410987, + "grad_norm": 0.7329761981964111, + "learning_rate": 1.869705738023927e-05, + "loss": 0.1705, "step": 3850 }, { - "epoch": 0.09785505774844523, - "grad_norm": 1.3592725992202759, - "learning_rate": 1.93476329483437e-05, - "loss": 0.2737, + "epoch": 0.19569521295497233, + "grad_norm": 0.5798784494400024, + "learning_rate": 1.8695365246966853e-05, + "loss": 0.1722, "step": 3855 }, { - "epoch": 0.09798197740830054, - "grad_norm": 0.8061047196388245, - "learning_rate": 1.9346786817278e-05, - "loss": 0.2268, + "epoch": 0.19594903294583482, + "grad_norm": 0.55069899559021, + "learning_rate": 1.8693673113694437e-05, + "loss": 0.1677, "step": 3860 }, { - "epoch": 0.09810889706815586, - "grad_norm": 0.840723991394043, - "learning_rate": 1.9345940686212297e-05, - "loss": 0.2418, + "epoch": 0.1962028529366973, + "grad_norm": 0.7288419008255005, + "learning_rate": 1.869198098042202e-05, + "loss": 0.1609, "step": 3865 }, { - "epoch": 0.09823581672801117, - "grad_norm": 1.0905126333236694, - "learning_rate": 1.9345094555146592e-05, - "loss": 0.2433, + "epoch": 0.19645667292755978, + "grad_norm": 0.5638713836669922, + "learning_rate": 1.8690288847149604e-05, + "loss": 0.1744, "step": 3870 }, { - "epoch": 0.09836273638786648, - "grad_norm": 1.1304442882537842, - "learning_rate": 1.934424842408089e-05, - "loss": 0.2251, + "epoch": 0.19671049291842224, + "grad_norm": 1.066811203956604, + "learning_rate": 1.8688596713877188e-05, + "loss": 0.1809, "step": 3875 }, { - "epoch": 0.0984896560477218, - "grad_norm": 1.265886664390564, - "learning_rate": 1.934340229301519e-05, - "loss": 0.2253, + "epoch": 0.19696431290928473, + "grad_norm": 0.5710257291793823, + "learning_rate": 1.868690458060477e-05, + "loss": 0.177, "step": 3880 }, { - "epoch": 0.0986165757075771, - "grad_norm": 0.9406000971794128, - "learning_rate": 1.9342556161949487e-05, - "loss": 0.2237, + "epoch": 0.19721813290014722, + "grad_norm": 1.2099708318710327, + "learning_rate": 1.868521244733235e-05, + "loss": 0.1727, "step": 3885 }, { - "epoch": 0.09874349536743242, - "grad_norm": 2.325550079345703, - "learning_rate": 1.9341710030883786e-05, - "loss": 0.2331, + "epoch": 0.1974719528910097, + "grad_norm": 0.5482766032218933, + "learning_rate": 1.8683520314059938e-05, + "loss": 0.1774, "step": 3890 }, { - "epoch": 0.09887041502728773, - "grad_norm": 0.6453620791435242, - "learning_rate": 1.9340863899818084e-05, - "loss": 0.1856, + "epoch": 0.19772577288187218, + "grad_norm": 0.5520328283309937, + "learning_rate": 1.868182818078752e-05, + "loss": 0.2038, "step": 3895 }, { - "epoch": 0.09899733468714304, - "grad_norm": 1.2219336032867432, - "learning_rate": 1.9340017768752383e-05, - "loss": 0.2588, + "epoch": 0.19797959287273464, + "grad_norm": 0.7456613183021545, + "learning_rate": 1.8680136047515102e-05, + "loss": 0.2103, "step": 3900 }, { - "epoch": 0.09912425434699836, - "grad_norm": 0.7682165503501892, - "learning_rate": 1.933917163768668e-05, - "loss": 0.2453, + "epoch": 0.19823341286359714, + "grad_norm": 0.6516144275665283, + "learning_rate": 1.867844391424269e-05, + "loss": 0.1716, "step": 3905 }, { - "epoch": 0.09925117400685367, - "grad_norm": 0.6612002849578857, - "learning_rate": 1.9338325506620976e-05, - "loss": 0.2299, + "epoch": 0.19848723285445963, + "grad_norm": 0.6367723345756531, + "learning_rate": 1.867675178097027e-05, + "loss": 0.1618, "step": 3910 }, { - "epoch": 0.09937809366670897, - "grad_norm": 1.8694086074829102, - "learning_rate": 1.9337479375555274e-05, - "loss": 0.2037, + "epoch": 0.1987410528453221, + "grad_norm": 0.4886556565761566, + "learning_rate": 1.8675059647697856e-05, + "loss": 0.1596, "step": 3915 }, { - "epoch": 0.09950501332656428, - "grad_norm": 0.6837571859359741, - "learning_rate": 1.9336633244489573e-05, - "loss": 0.2286, + "epoch": 0.19899487283618458, + "grad_norm": 0.443977952003479, + "learning_rate": 1.8673367514425436e-05, + "loss": 0.1847, "step": 3920 }, { - "epoch": 0.09963193298641959, - "grad_norm": 3.4474117755889893, - "learning_rate": 1.933578711342387e-05, - "loss": 0.2335, + "epoch": 0.19924869282704705, + "grad_norm": 0.5834622383117676, + "learning_rate": 1.867167538115302e-05, + "loss": 0.1697, "step": 3925 }, { - "epoch": 0.0997588526462749, - "grad_norm": 0.8646114468574524, - "learning_rate": 1.933494098235817e-05, - "loss": 0.257, + "epoch": 0.19950251281790954, + "grad_norm": 0.6995598673820496, + "learning_rate": 1.8669983247880607e-05, + "loss": 0.1785, "step": 3930 }, { - "epoch": 0.09988577230613022, - "grad_norm": 1.2050676345825195, - "learning_rate": 1.9334094851292468e-05, - "loss": 0.255, + "epoch": 0.19975633280877203, + "grad_norm": 0.5793687105178833, + "learning_rate": 1.8668291114608187e-05, + "loss": 0.1835, "step": 3935 }, { - "epoch": 0.10001269196598553, - "grad_norm": 1.0482436418533325, - "learning_rate": 1.9333248720226766e-05, - "loss": 0.2722, + "epoch": 0.2000101527996345, + "grad_norm": 0.6711710691452026, + "learning_rate": 1.866659898133577e-05, + "loss": 0.1808, "step": 3940 }, { - "epoch": 0.10013961162584084, - "grad_norm": 0.9007412195205688, - "learning_rate": 1.9332402589161065e-05, - "loss": 0.1996, + "epoch": 0.20026397279049699, + "grad_norm": 0.7024595737457275, + "learning_rate": 1.8664906848063354e-05, + "loss": 0.1723, "step": 3945 }, { - "epoch": 0.10026653128569615, - "grad_norm": 1.0234311819076538, - "learning_rate": 1.933155645809536e-05, - "loss": 0.2171, + "epoch": 0.20051779278135945, + "grad_norm": 0.5017092823982239, + "learning_rate": 1.8663214714790937e-05, + "loss": 0.1691, "step": 3950 }, { - "epoch": 0.10039345094555147, - "grad_norm": 1.2604800462722778, - "learning_rate": 1.9330710327029658e-05, - "loss": 0.2454, + "epoch": 0.20077161277222194, + "grad_norm": 0.769305944442749, + "learning_rate": 1.8661522581518524e-05, + "loss": 0.1612, "step": 3955 }, { - "epoch": 0.10052037060540678, - "grad_norm": 0.9622499346733093, - "learning_rate": 1.9329864195963957e-05, - "loss": 0.2365, + "epoch": 0.20102543276308443, + "grad_norm": 0.6473913192749023, + "learning_rate": 1.8659830448246105e-05, + "loss": 0.1788, "step": 3960 }, { - "epoch": 0.10064729026526209, - "grad_norm": 1.1835730075836182, - "learning_rate": 1.9329018064898255e-05, - "loss": 0.2569, + "epoch": 0.2012792527539469, + "grad_norm": 0.5249276757240295, + "learning_rate": 1.8658138314973688e-05, + "loss": 0.1654, "step": 3965 }, { - "epoch": 0.1007742099251174, - "grad_norm": 2.0862584114074707, - "learning_rate": 1.932817193383255e-05, - "loss": 0.2307, + "epoch": 0.2015330727448094, + "grad_norm": 0.9578689932823181, + "learning_rate": 1.865644618170127e-05, + "loss": 0.1652, "step": 3970 }, { - "epoch": 0.10090112958497272, - "grad_norm": 1.5677167177200317, - "learning_rate": 1.932732580276685e-05, - "loss": 0.2375, + "epoch": 0.20178689273567185, + "grad_norm": 0.580237627029419, + "learning_rate": 1.8654754048428855e-05, + "loss": 0.1705, "step": 3975 }, { - "epoch": 0.10102804924482803, - "grad_norm": 1.1190135478973389, - "learning_rate": 1.9326479671701147e-05, - "loss": 0.2046, + "epoch": 0.20204071272653434, + "grad_norm": 0.7080004811286926, + "learning_rate": 1.865306191515644e-05, + "loss": 0.1527, "step": 3980 }, { - "epoch": 0.10115496890468334, - "grad_norm": 1.0484044551849365, - "learning_rate": 1.9325633540635445e-05, - "loss": 0.2126, + "epoch": 0.20229453271739684, + "grad_norm": 0.4707263112068176, + "learning_rate": 1.8651369781884022e-05, + "loss": 0.1525, "step": 3985 }, { - "epoch": 0.10128188856453865, - "grad_norm": 0.8122559785842896, - "learning_rate": 1.9324787409569744e-05, - "loss": 0.2291, + "epoch": 0.2025483527082593, + "grad_norm": 1.2706327438354492, + "learning_rate": 1.8649677648611606e-05, + "loss": 0.1963, "step": 3990 }, { - "epoch": 0.10140880822439396, - "grad_norm": 1.3388766050338745, - "learning_rate": 1.9323941278504042e-05, - "loss": 0.226, + "epoch": 0.2028021726991218, + "grad_norm": 0.7353994250297546, + "learning_rate": 1.864798551533919e-05, + "loss": 0.19, "step": 3995 }, { - "epoch": 0.10153572788424926, - "grad_norm": 1.0965639352798462, - "learning_rate": 1.932309514743834e-05, - "loss": 0.2157, + "epoch": 0.20305599268998425, + "grad_norm": 0.7245195508003235, + "learning_rate": 1.8646293382066773e-05, + "loss": 0.1833, "step": 4000 }, { - "epoch": 0.10166264754410458, - "grad_norm": 1.0435322523117065, - "learning_rate": 1.932224901637264e-05, - "loss": 0.2273, + "epoch": 0.20330981268084675, + "grad_norm": 0.7345285415649414, + "learning_rate": 1.8644601248794356e-05, + "loss": 0.175, "step": 4005 }, { - "epoch": 0.10178956720395989, - "grad_norm": 1.3443458080291748, - "learning_rate": 1.9321402885306934e-05, - "loss": 0.239, + "epoch": 0.2035636326717092, + "grad_norm": 0.8779308795928955, + "learning_rate": 1.864290911552194e-05, + "loss": 0.1683, "step": 4010 }, { - "epoch": 0.1019164868638152, - "grad_norm": 0.9611615538597107, - "learning_rate": 1.9320556754241232e-05, - "loss": 0.1673, + "epoch": 0.2038174526625717, + "grad_norm": 0.49476489424705505, + "learning_rate": 1.8641216982249524e-05, + "loss": 0.1581, "step": 4015 }, { - "epoch": 0.10204340652367051, - "grad_norm": 1.5891668796539307, - "learning_rate": 1.931971062317553e-05, - "loss": 0.2085, + "epoch": 0.2040712726534342, + "grad_norm": 0.5535828471183777, + "learning_rate": 1.8639524848977107e-05, + "loss": 0.1901, "step": 4020 }, { - "epoch": 0.10217032618352583, - "grad_norm": 1.2145615816116333, - "learning_rate": 1.931886449210983e-05, - "loss": 0.1954, + "epoch": 0.20432509264429666, + "grad_norm": 0.5144571661949158, + "learning_rate": 1.863783271570469e-05, + "loss": 0.1598, "step": 4025 }, { - "epoch": 0.10229724584338114, - "grad_norm": 1.0900102853775024, - "learning_rate": 1.9318018361044127e-05, - "loss": 0.2212, + "epoch": 0.20457891263515915, + "grad_norm": 0.666415274143219, + "learning_rate": 1.8636140582432274e-05, + "loss": 0.1713, "step": 4030 }, { - "epoch": 0.10242416550323645, - "grad_norm": 1.1373399496078491, - "learning_rate": 1.9317172229978426e-05, - "loss": 0.2091, + "epoch": 0.2048327326260216, + "grad_norm": 0.5483223795890808, + "learning_rate": 1.8634448449159858e-05, + "loss": 0.1685, "step": 4035 }, { - "epoch": 0.10255108516309176, - "grad_norm": 0.7083691358566284, - "learning_rate": 1.9316326098912724e-05, - "loss": 0.224, + "epoch": 0.2050865526168841, + "grad_norm": 1.4303494691848755, + "learning_rate": 1.863275631588744e-05, + "loss": 0.1635, "step": 4040 }, { - "epoch": 0.10267800482294707, - "grad_norm": 2.1579391956329346, - "learning_rate": 1.9315479967847023e-05, - "loss": 0.2133, + "epoch": 0.2053403726077466, + "grad_norm": 0.8545394539833069, + "learning_rate": 1.8631064182615025e-05, + "loss": 0.1647, "step": 4045 }, { - "epoch": 0.10280492448280239, - "grad_norm": 1.9878079891204834, - "learning_rate": 1.9314633836781318e-05, - "loss": 0.2161, + "epoch": 0.20559419259860906, + "grad_norm": 1.2137486934661865, + "learning_rate": 1.862937204934261e-05, + "loss": 0.1817, "step": 4050 }, { - "epoch": 0.1029318441426577, - "grad_norm": 0.6478080153465271, - "learning_rate": 1.9313787705715616e-05, - "loss": 0.2159, + "epoch": 0.20584801258947155, + "grad_norm": 0.6643612384796143, + "learning_rate": 1.8627679916070192e-05, + "loss": 0.1677, "step": 4055 }, { - "epoch": 0.10305876380251301, - "grad_norm": 1.7184710502624512, - "learning_rate": 1.9312941574649914e-05, - "loss": 0.1973, + "epoch": 0.20610183258033402, + "grad_norm": 0.6444915533065796, + "learning_rate": 1.8625987782797775e-05, + "loss": 0.1735, "step": 4060 }, { - "epoch": 0.10318568346236832, - "grad_norm": 0.6055800318717957, - "learning_rate": 1.9312095443584213e-05, - "loss": 0.1671, + "epoch": 0.2063556525711965, + "grad_norm": 0.591526985168457, + "learning_rate": 1.862429564952536e-05, + "loss": 0.1719, "step": 4065 }, { - "epoch": 0.10331260312222364, - "grad_norm": 0.9398266077041626, - "learning_rate": 1.931124931251851e-05, - "loss": 0.2335, + "epoch": 0.206609472562059, + "grad_norm": 0.5368006825447083, + "learning_rate": 1.8622603516252943e-05, + "loss": 0.1468, "step": 4070 }, { - "epoch": 0.10343952278207895, - "grad_norm": 1.149258017539978, - "learning_rate": 1.931040318145281e-05, - "loss": 0.2277, + "epoch": 0.20686329255292146, + "grad_norm": 0.589856743812561, + "learning_rate": 1.8620911382980523e-05, + "loss": 0.1637, "step": 4075 }, { - "epoch": 0.10356644244193426, - "grad_norm": 1.2454499006271362, - "learning_rate": 1.9309557050387108e-05, - "loss": 0.2119, + "epoch": 0.20711711254378395, + "grad_norm": 0.707324206829071, + "learning_rate": 1.861921924970811e-05, + "loss": 0.1785, "step": 4080 }, { - "epoch": 0.10369336210178957, - "grad_norm": 0.7552687525749207, - "learning_rate": 1.9308710919321406e-05, - "loss": 0.1998, + "epoch": 0.20737093253464642, + "grad_norm": 0.6533371210098267, + "learning_rate": 1.8617527116435693e-05, + "loss": 0.1815, "step": 4085 }, { - "epoch": 0.10382028176164487, - "grad_norm": 0.9446992874145508, - "learning_rate": 1.93078647882557e-05, - "loss": 0.2665, + "epoch": 0.2076247525255089, + "grad_norm": 1.014189600944519, + "learning_rate": 1.8615834983163273e-05, + "loss": 0.1684, "step": 4090 }, { - "epoch": 0.10394720142150018, - "grad_norm": 0.7842034101486206, - "learning_rate": 1.930701865719e-05, - "loss": 0.1948, + "epoch": 0.2078785725163714, + "grad_norm": 0.4793491065502167, + "learning_rate": 1.861414284989086e-05, + "loss": 0.1621, "step": 4095 }, { - "epoch": 0.1040741210813555, - "grad_norm": 0.6790844798088074, - "learning_rate": 1.9306172526124298e-05, - "loss": 0.1965, + "epoch": 0.20813239250723387, + "grad_norm": 0.7456510663032532, + "learning_rate": 1.861245071661844e-05, + "loss": 0.1836, "step": 4100 }, { - "epoch": 0.10420104074121081, - "grad_norm": 0.9377152323722839, - "learning_rate": 1.9305326395058597e-05, - "loss": 0.1939, + "epoch": 0.20838621249809636, + "grad_norm": 0.706316351890564, + "learning_rate": 1.8610758583346027e-05, + "loss": 0.176, "step": 4105 }, { - "epoch": 0.10432796040106612, - "grad_norm": 0.81058269739151, - "learning_rate": 1.930448026399289e-05, - "loss": 0.1924, + "epoch": 0.20864003248895882, + "grad_norm": 1.0256385803222656, + "learning_rate": 1.860906645007361e-05, + "loss": 0.1866, "step": 4110 }, { - "epoch": 0.10445488006092143, - "grad_norm": 0.8358652591705322, - "learning_rate": 1.930363413292719e-05, - "loss": 0.2058, + "epoch": 0.2088938524798213, + "grad_norm": 0.5895639061927795, + "learning_rate": 1.860737431680119e-05, + "loss": 0.1542, "step": 4115 }, { - "epoch": 0.10458179972077675, - "grad_norm": 1.107664942741394, - "learning_rate": 1.930278800186149e-05, - "loss": 0.2003, + "epoch": 0.2091476724706838, + "grad_norm": 0.6485773324966431, + "learning_rate": 1.8605682183528778e-05, + "loss": 0.1585, "step": 4120 }, { - "epoch": 0.10470871938063206, - "grad_norm": 0.7854679822921753, - "learning_rate": 1.9301941870795787e-05, - "loss": 0.1966, + "epoch": 0.20940149246154627, + "grad_norm": 0.5658993124961853, + "learning_rate": 1.8603990050256358e-05, + "loss": 0.189, "step": 4125 }, { - "epoch": 0.10483563904048737, - "grad_norm": 0.7209163904190063, - "learning_rate": 1.9301095739730085e-05, - "loss": 0.1832, + "epoch": 0.20965531245240876, + "grad_norm": 0.5852290987968445, + "learning_rate": 1.860229791698394e-05, + "loss": 0.164, "step": 4130 }, { - "epoch": 0.10496255870034268, - "grad_norm": 1.7365106344223022, - "learning_rate": 1.9300249608664384e-05, - "loss": 0.2628, + "epoch": 0.20990913244327122, + "grad_norm": 0.5155125856399536, + "learning_rate": 1.860060578371153e-05, + "loss": 0.1706, "step": 4135 }, { - "epoch": 0.105089478360198, - "grad_norm": 1.0078846216201782, - "learning_rate": 1.9299403477598682e-05, - "loss": 0.2204, + "epoch": 0.21016295243413372, + "grad_norm": 0.5006566047668457, + "learning_rate": 1.859891365043911e-05, + "loss": 0.158, "step": 4140 }, { - "epoch": 0.10521639802005331, - "grad_norm": 0.7731621861457825, - "learning_rate": 1.929855734653298e-05, - "loss": 0.2299, + "epoch": 0.21041677242499618, + "grad_norm": 0.8291502594947815, + "learning_rate": 1.8597221517166692e-05, + "loss": 0.1732, "step": 4145 }, { - "epoch": 0.10534331767990862, - "grad_norm": 0.7200449109077454, - "learning_rate": 1.929771121546728e-05, - "loss": 0.2068, + "epoch": 0.21067059241585867, + "grad_norm": 0.6317897439002991, + "learning_rate": 1.8595529383894276e-05, + "loss": 0.1629, "step": 4150 }, { - "epoch": 0.10547023733976393, - "grad_norm": 1.1264568567276, - "learning_rate": 1.9296865084401574e-05, - "loss": 0.2135, + "epoch": 0.21092441240672116, + "grad_norm": 0.8886175155639648, + "learning_rate": 1.859383725062186e-05, + "loss": 0.162, "step": 4155 }, { - "epoch": 0.10559715699961925, - "grad_norm": 0.9792529940605164, - "learning_rate": 1.9296018953335872e-05, - "loss": 0.2223, + "epoch": 0.21117823239758363, + "grad_norm": 1.5014044046401978, + "learning_rate": 1.8592145117349446e-05, + "loss": 0.1696, "step": 4160 }, { - "epoch": 0.10572407665947456, - "grad_norm": 0.9717310667037964, - "learning_rate": 1.929517282227017e-05, - "loss": 0.2093, + "epoch": 0.21143205238844612, + "grad_norm": 0.6006774306297302, + "learning_rate": 1.8590452984077026e-05, + "loss": 0.1706, "step": 4165 }, { - "epoch": 0.10585099631932987, - "grad_norm": 0.7246736884117126, - "learning_rate": 1.929432669120447e-05, - "loss": 0.1882, + "epoch": 0.21168587237930858, + "grad_norm": 0.5629169940948486, + "learning_rate": 1.858876085080461e-05, + "loss": 0.1868, "step": 4170 }, { - "epoch": 0.10597791597918517, - "grad_norm": 0.9230507016181946, - "learning_rate": 1.9293480560138768e-05, - "loss": 0.2248, + "epoch": 0.21193969237017107, + "grad_norm": 0.5821768641471863, + "learning_rate": 1.8587068717532194e-05, + "loss": 0.1612, "step": 4175 }, { - "epoch": 0.10610483563904048, - "grad_norm": 0.9745478630065918, - "learning_rate": 1.9292634429073066e-05, - "loss": 0.2386, + "epoch": 0.21219351236103356, + "grad_norm": 0.4641706645488739, + "learning_rate": 1.8585376584259777e-05, + "loss": 0.1573, "step": 4180 }, { - "epoch": 0.1062317552988958, - "grad_norm": 0.6078996658325195, - "learning_rate": 1.9291788298007364e-05, - "loss": 0.192, + "epoch": 0.21244733235189603, + "grad_norm": 0.6541835069656372, + "learning_rate": 1.858368445098736e-05, + "loss": 0.1757, "step": 4185 }, { - "epoch": 0.1063586749587511, - "grad_norm": 0.8220447301864624, - "learning_rate": 1.9290942166941663e-05, - "loss": 0.2162, + "epoch": 0.21270115234275852, + "grad_norm": 0.6074985861778259, + "learning_rate": 1.8581992317714944e-05, + "loss": 0.1877, "step": 4190 }, { - "epoch": 0.10648559461860642, - "grad_norm": 0.7671021223068237, - "learning_rate": 1.9290096035875958e-05, - "loss": 0.2325, + "epoch": 0.21295497233362098, + "grad_norm": 0.63148033618927, + "learning_rate": 1.8580300184442528e-05, + "loss": 0.1815, "step": 4195 }, { - "epoch": 0.10661251427846173, - "grad_norm": 1.1744745969772339, - "learning_rate": 1.9289249904810256e-05, - "loss": 0.1917, + "epoch": 0.21320879232448348, + "grad_norm": 0.7583580017089844, + "learning_rate": 1.857860805117011e-05, + "loss": 0.1703, "step": 4200 }, { - "epoch": 0.10673943393831704, - "grad_norm": 0.977283775806427, - "learning_rate": 1.9288403773744555e-05, - "loss": 0.2209, + "epoch": 0.21346261231534597, + "grad_norm": 0.737652599811554, + "learning_rate": 1.8576915917897695e-05, + "loss": 0.1595, "step": 4205 }, { - "epoch": 0.10686635359817236, - "grad_norm": 0.7146806716918945, - "learning_rate": 1.9287557642678853e-05, - "loss": 0.2438, + "epoch": 0.21371643230620843, + "grad_norm": 0.6646580696105957, + "learning_rate": 1.857522378462528e-05, + "loss": 0.1653, "step": 4210 }, { - "epoch": 0.10699327325802767, - "grad_norm": 0.9886432886123657, - "learning_rate": 1.928671151161315e-05, - "loss": 0.1758, + "epoch": 0.21397025229707092, + "grad_norm": 1.0683635473251343, + "learning_rate": 1.8573531651352862e-05, + "loss": 0.1656, "step": 4215 }, { - "epoch": 0.10712019291788298, - "grad_norm": 0.9756491184234619, - "learning_rate": 1.928586538054745e-05, - "loss": 0.2091, + "epoch": 0.2142240722879334, + "grad_norm": 0.6982542276382446, + "learning_rate": 1.8571839518080445e-05, + "loss": 0.1861, "step": 4220 }, { - "epoch": 0.1072471125777383, - "grad_norm": 1.163233757019043, - "learning_rate": 1.9285019249481748e-05, - "loss": 0.19, + "epoch": 0.21447789227879588, + "grad_norm": 0.6811363697052002, + "learning_rate": 1.857014738480803e-05, + "loss": 0.1652, "step": 4225 }, { - "epoch": 0.1073740322375936, - "grad_norm": 0.8932257294654846, - "learning_rate": 1.9284173118416047e-05, - "loss": 0.2184, + "epoch": 0.21473171226965837, + "grad_norm": 0.43406397104263306, + "learning_rate": 1.8568455251535613e-05, + "loss": 0.1683, "step": 4230 }, { - "epoch": 0.10750095189744892, - "grad_norm": 0.9897421002388, - "learning_rate": 1.928332698735034e-05, - "loss": 0.2042, + "epoch": 0.21498553226052083, + "grad_norm": 0.4632825255393982, + "learning_rate": 1.8566763118263196e-05, + "loss": 0.1428, "step": 4235 }, { - "epoch": 0.10762787155730423, - "grad_norm": 0.8948019742965698, - "learning_rate": 1.928248085628464e-05, - "loss": 0.1812, + "epoch": 0.21523935225138333, + "grad_norm": 0.6871059536933899, + "learning_rate": 1.856507098499078e-05, + "loss": 0.1557, "step": 4240 }, { - "epoch": 0.10775479121715954, - "grad_norm": 1.0315595865249634, - "learning_rate": 1.928163472521894e-05, - "loss": 0.2276, + "epoch": 0.2154931722422458, + "grad_norm": 0.4901556968688965, + "learning_rate": 1.8563378851718363e-05, + "loss": 0.1651, "step": 4245 }, { - "epoch": 0.10788171087701486, - "grad_norm": 1.022025465965271, - "learning_rate": 1.9280788594153237e-05, - "loss": 0.2368, + "epoch": 0.21574699223310828, + "grad_norm": 0.5677388310432434, + "learning_rate": 1.8561686718445947e-05, + "loss": 0.1552, "step": 4250 }, { - "epoch": 0.10800863053687017, - "grad_norm": 0.9149864315986633, - "learning_rate": 1.9279942463087532e-05, - "loss": 0.2253, + "epoch": 0.21600081222397077, + "grad_norm": 0.7926852107048035, + "learning_rate": 1.855999458517353e-05, + "loss": 0.1605, "step": 4255 }, { - "epoch": 0.10813555019672547, - "grad_norm": 0.5962433815002441, - "learning_rate": 1.927909633202183e-05, - "loss": 0.2074, + "epoch": 0.21625463221483324, + "grad_norm": 0.69388347864151, + "learning_rate": 1.8558302451901114e-05, + "loss": 0.1773, "step": 4260 }, { - "epoch": 0.10826246985658078, - "grad_norm": 0.9450381398200989, - "learning_rate": 1.927825020095613e-05, - "loss": 0.245, + "epoch": 0.21650845220569573, + "grad_norm": 0.6975191235542297, + "learning_rate": 1.8556610318628697e-05, + "loss": 0.1646, "step": 4265 }, { - "epoch": 0.10838938951643609, - "grad_norm": 0.8050817847251892, - "learning_rate": 1.9277404069890427e-05, - "loss": 0.196, + "epoch": 0.2167622721965582, + "grad_norm": 0.5928018689155579, + "learning_rate": 1.855491818535628e-05, + "loss": 0.1664, "step": 4270 }, { - "epoch": 0.1085163091762914, - "grad_norm": 0.7721019983291626, - "learning_rate": 1.9276557938824725e-05, - "loss": 0.2068, + "epoch": 0.21701609218742068, + "grad_norm": 0.7675602436065674, + "learning_rate": 1.8553226052083864e-05, + "loss": 0.1727, "step": 4275 }, { - "epoch": 0.10864322883614672, - "grad_norm": 1.2933099269866943, - "learning_rate": 1.9275711807759024e-05, - "loss": 0.1902, + "epoch": 0.21726991217828315, + "grad_norm": 1.1031657457351685, + "learning_rate": 1.8551533918811445e-05, + "loss": 0.1664, "step": 4280 }, { - "epoch": 0.10877014849600203, - "grad_norm": 0.8257484436035156, - "learning_rate": 1.9274865676693322e-05, - "loss": 0.2222, + "epoch": 0.21752373216914564, + "grad_norm": 0.46347787976264954, + "learning_rate": 1.854984178553903e-05, + "loss": 0.1781, "step": 4285 }, { - "epoch": 0.10889706815585734, - "grad_norm": 0.7227062582969666, - "learning_rate": 1.927401954562762e-05, - "loss": 0.1936, + "epoch": 0.21777755216000813, + "grad_norm": 0.8614928126335144, + "learning_rate": 1.8548149652266615e-05, + "loss": 0.1721, "step": 4290 }, { - "epoch": 0.10902398781571265, - "grad_norm": 1.0127782821655273, - "learning_rate": 1.9273173414561916e-05, - "loss": 0.2194, + "epoch": 0.2180313721508706, + "grad_norm": 0.49578431248664856, + "learning_rate": 1.8546457518994195e-05, + "loss": 0.1724, "step": 4295 }, { - "epoch": 0.10915090747556797, - "grad_norm": 0.997646152973175, - "learning_rate": 1.9272327283496214e-05, - "loss": 0.1993, + "epoch": 0.2182851921417331, + "grad_norm": 0.6700772643089294, + "learning_rate": 1.8544765385721782e-05, + "loss": 0.1681, "step": 4300 }, { - "epoch": 0.10927782713542328, - "grad_norm": 1.3856492042541504, - "learning_rate": 1.9271481152430512e-05, - "loss": 0.2314, + "epoch": 0.21853901213259555, + "grad_norm": 0.6420726180076599, + "learning_rate": 1.8543073252449362e-05, + "loss": 0.185, "step": 4305 }, { - "epoch": 0.10940474679527859, - "grad_norm": 1.3000781536102295, - "learning_rate": 1.927063502136481e-05, - "loss": 0.225, + "epoch": 0.21879283212345804, + "grad_norm": 0.6904594302177429, + "learning_rate": 1.854138111917695e-05, + "loss": 0.1689, "step": 4310 }, { - "epoch": 0.1095316664551339, - "grad_norm": 1.331863284111023, - "learning_rate": 1.926978889029911e-05, - "loss": 0.2086, + "epoch": 0.21904665211432053, + "grad_norm": 0.5921376347541809, + "learning_rate": 1.8539688985904533e-05, + "loss": 0.1651, "step": 4315 }, { - "epoch": 0.10965858611498921, - "grad_norm": 0.7433885335922241, - "learning_rate": 1.9268942759233408e-05, - "loss": 0.1814, + "epoch": 0.219300472105183, + "grad_norm": 0.7793440818786621, + "learning_rate": 1.8537996852632113e-05, + "loss": 0.1718, "step": 4320 }, { - "epoch": 0.10978550577484453, - "grad_norm": 0.8436661958694458, - "learning_rate": 1.9268096628167706e-05, - "loss": 0.2186, + "epoch": 0.2195542920960455, + "grad_norm": 0.42908427119255066, + "learning_rate": 1.85363047193597e-05, + "loss": 0.1639, "step": 4325 }, { - "epoch": 0.10991242543469984, - "grad_norm": 0.7441290020942688, - "learning_rate": 1.9267250497102004e-05, - "loss": 0.1866, + "epoch": 0.21980811208690795, + "grad_norm": 0.6529392600059509, + "learning_rate": 1.853461258608728e-05, + "loss": 0.1845, "step": 4330 }, { - "epoch": 0.11003934509455515, - "grad_norm": 0.704481840133667, - "learning_rate": 1.92664043660363e-05, - "loss": 0.2025, + "epoch": 0.22006193207777044, + "grad_norm": 0.4588109254837036, + "learning_rate": 1.8532920452814864e-05, + "loss": 0.1653, "step": 4335 }, { - "epoch": 0.11016626475441046, - "grad_norm": 1.531681776046753, - "learning_rate": 1.9265558234970598e-05, - "loss": 0.208, + "epoch": 0.22031575206863294, + "grad_norm": 0.5849223136901855, + "learning_rate": 1.853122831954245e-05, + "loss": 0.164, "step": 4340 }, { - "epoch": 0.11029318441426576, - "grad_norm": 0.7923519015312195, - "learning_rate": 1.9264712103904896e-05, - "loss": 0.1911, + "epoch": 0.2205695720594954, + "grad_norm": 0.7719104290008545, + "learning_rate": 1.852953618627003e-05, + "loss": 0.1663, "step": 4345 }, { - "epoch": 0.11042010407412108, - "grad_norm": 2.403700828552246, - "learning_rate": 1.9263865972839195e-05, - "loss": 0.1748, + "epoch": 0.2208233920503579, + "grad_norm": 0.5275380611419678, + "learning_rate": 1.8527844052997618e-05, + "loss": 0.1622, "step": 4350 }, { - "epoch": 0.11054702373397639, - "grad_norm": 0.863163411617279, - "learning_rate": 1.9263019841773493e-05, - "loss": 0.1885, + "epoch": 0.22107721204122036, + "grad_norm": 1.1291331052780151, + "learning_rate": 1.8526151919725198e-05, + "loss": 0.1736, "step": 4355 }, { - "epoch": 0.1106739433938317, - "grad_norm": 1.0433287620544434, - "learning_rate": 1.926217371070779e-05, - "loss": 0.195, + "epoch": 0.22133103203208285, + "grad_norm": 0.7423467636108398, + "learning_rate": 1.852445978645278e-05, + "loss": 0.1594, "step": 4360 }, { - "epoch": 0.11080086305368701, - "grad_norm": 0.6503421068191528, - "learning_rate": 1.926132757964209e-05, - "loss": 0.2075, + "epoch": 0.22158485202294534, + "grad_norm": 0.6104901432991028, + "learning_rate": 1.8522767653180368e-05, + "loss": 0.1613, "step": 4365 }, { - "epoch": 0.11092778271354232, - "grad_norm": 0.9488763809204102, - "learning_rate": 1.9260481448576388e-05, - "loss": 0.2356, + "epoch": 0.2218386720138078, + "grad_norm": 0.6495379209518433, + "learning_rate": 1.852107551990795e-05, + "loss": 0.1595, "step": 4370 }, { - "epoch": 0.11105470237339764, - "grad_norm": 1.2179583311080933, - "learning_rate": 1.9259635317510683e-05, - "loss": 0.2226, + "epoch": 0.2220924920046703, + "grad_norm": 0.5626767873764038, + "learning_rate": 1.8519383386635532e-05, + "loss": 0.1716, "step": 4375 }, { - "epoch": 0.11118162203325295, - "grad_norm": 0.8963335752487183, - "learning_rate": 1.925878918644498e-05, - "loss": 0.2081, + "epoch": 0.22234631199553276, + "grad_norm": 0.5380212664604187, + "learning_rate": 1.8517691253363116e-05, + "loss": 0.1745, "step": 4380 }, { - "epoch": 0.11130854169310826, - "grad_norm": 0.7322095036506653, - "learning_rate": 1.925794305537928e-05, - "loss": 0.1864, + "epoch": 0.22260013198639525, + "grad_norm": 0.5656577348709106, + "learning_rate": 1.85159991200907e-05, + "loss": 0.1812, "step": 4385 }, { - "epoch": 0.11143546135296357, - "grad_norm": 0.8519896864891052, - "learning_rate": 1.925709692431358e-05, - "loss": 0.2117, + "epoch": 0.22285395197725774, + "grad_norm": 0.5490018129348755, + "learning_rate": 1.8514306986818283e-05, + "loss": 0.1724, "step": 4390 }, { - "epoch": 0.11156238101281889, - "grad_norm": 1.0053905248641968, - "learning_rate": 1.9256250793247873e-05, - "loss": 0.184, + "epoch": 0.2231077719681202, + "grad_norm": 0.563014805316925, + "learning_rate": 1.8512614853545866e-05, + "loss": 0.1547, "step": 4395 }, { - "epoch": 0.1116893006726742, - "grad_norm": 0.8344958424568176, - "learning_rate": 1.9255404662182172e-05, - "loss": 0.2136, + "epoch": 0.2233615919589827, + "grad_norm": 0.6768389344215393, + "learning_rate": 1.851092272027345e-05, + "loss": 0.1643, "step": 4400 }, { - "epoch": 0.11181622033252951, - "grad_norm": 1.2814654111862183, - "learning_rate": 1.925455853111647e-05, - "loss": 0.2395, + "epoch": 0.22361541194984516, + "grad_norm": 0.9782013893127441, + "learning_rate": 1.8509230587001033e-05, + "loss": 0.1556, "step": 4405 }, { - "epoch": 0.11194313999238482, - "grad_norm": 0.6529009938240051, - "learning_rate": 1.925371240005077e-05, - "loss": 0.2108, + "epoch": 0.22386923194070765, + "grad_norm": 0.6855179071426392, + "learning_rate": 1.8507538453728617e-05, + "loss": 0.167, "step": 4410 }, { - "epoch": 0.11207005965224014, - "grad_norm": 6.508125305175781, - "learning_rate": 1.9252866268985067e-05, - "loss": 0.2606, + "epoch": 0.22412305193157014, + "grad_norm": 0.9513364434242249, + "learning_rate": 1.85058463204562e-05, + "loss": 0.1615, "step": 4415 }, { - "epoch": 0.11219697931209545, - "grad_norm": 0.8124038577079773, - "learning_rate": 1.9252020137919365e-05, - "loss": 0.1782, + "epoch": 0.2243768719224326, + "grad_norm": 0.5813558101654053, + "learning_rate": 1.8504154187183784e-05, + "loss": 0.1509, "step": 4420 }, { - "epoch": 0.11232389897195076, - "grad_norm": 1.1028518676757812, - "learning_rate": 1.9251174006853664e-05, - "loss": 0.2124, + "epoch": 0.2246306919132951, + "grad_norm": 0.8219256401062012, + "learning_rate": 1.8502462053911367e-05, + "loss": 0.1616, "step": 4425 }, { - "epoch": 0.11245081863180606, - "grad_norm": 0.6230480670928955, - "learning_rate": 1.9250327875787962e-05, - "loss": 0.1776, + "epoch": 0.22488451190415756, + "grad_norm": 0.4321226477622986, + "learning_rate": 1.850076992063895e-05, + "loss": 0.155, "step": 4430 }, { - "epoch": 0.11257773829166137, - "grad_norm": 1.9197957515716553, - "learning_rate": 1.9249481744722257e-05, - "loss": 0.2198, + "epoch": 0.22513833189502006, + "grad_norm": 0.6410679817199707, + "learning_rate": 1.8499077787366534e-05, + "loss": 0.1658, "step": 4435 }, { - "epoch": 0.11270465795151668, - "grad_norm": 0.8994693160057068, - "learning_rate": 1.9248635613656556e-05, - "loss": 0.2367, + "epoch": 0.22539215188588252, + "grad_norm": 0.6968072056770325, + "learning_rate": 1.8497385654094118e-05, + "loss": 0.1574, "step": 4440 }, { - "epoch": 0.112831577611372, - "grad_norm": 0.9597081542015076, - "learning_rate": 1.9247789482590854e-05, - "loss": 0.2114, + "epoch": 0.225645971876745, + "grad_norm": 0.8063611388206482, + "learning_rate": 1.84956935208217e-05, + "loss": 0.149, "step": 4445 }, { - "epoch": 0.11295849727122731, - "grad_norm": 0.7435235977172852, - "learning_rate": 1.9246943351525153e-05, - "loss": 0.1966, + "epoch": 0.2258997918676075, + "grad_norm": 0.5573265552520752, + "learning_rate": 1.8494001387549285e-05, + "loss": 0.1624, "step": 4450 }, { - "epoch": 0.11308541693108262, - "grad_norm": 0.9796454310417175, - "learning_rate": 1.924609722045945e-05, - "loss": 0.1912, + "epoch": 0.22615361185846997, + "grad_norm": 0.6173406839370728, + "learning_rate": 1.849230925427687e-05, + "loss": 0.1539, "step": 4455 }, { - "epoch": 0.11321233659093793, - "grad_norm": 1.2359586954116821, - "learning_rate": 1.924525108939375e-05, - "loss": 0.1883, + "epoch": 0.22640743184933246, + "grad_norm": 0.46205034852027893, + "learning_rate": 1.8490617121004452e-05, + "loss": 0.1546, "step": 4460 }, { - "epoch": 0.11333925625079325, - "grad_norm": 1.1114424467086792, - "learning_rate": 1.9244404958328048e-05, - "loss": 0.1975, + "epoch": 0.22666125184019492, + "grad_norm": 0.5302807688713074, + "learning_rate": 1.8488924987732036e-05, + "loss": 0.1712, "step": 4465 }, { - "epoch": 0.11346617591064856, - "grad_norm": 0.9696239829063416, - "learning_rate": 1.9243558827262346e-05, - "loss": 0.2073, + "epoch": 0.2269150718310574, + "grad_norm": 0.6777194738388062, + "learning_rate": 1.848723285445962e-05, + "loss": 0.1613, "step": 4470 }, { - "epoch": 0.11359309557050387, - "grad_norm": 1.0914586782455444, - "learning_rate": 1.924271269619664e-05, - "loss": 0.1884, + "epoch": 0.2271688918219199, + "grad_norm": 0.4836040735244751, + "learning_rate": 1.8485540721187203e-05, + "loss": 0.166, "step": 4475 }, { - "epoch": 0.11372001523035918, - "grad_norm": 0.9296486973762512, - "learning_rate": 1.924186656513094e-05, - "loss": 0.2043, + "epoch": 0.22742271181278237, + "grad_norm": 0.7450656890869141, + "learning_rate": 1.8483848587914786e-05, + "loss": 0.1776, "step": 4480 }, { - "epoch": 0.1138469348902145, - "grad_norm": 1.0891261100769043, - "learning_rate": 1.9241020434065238e-05, - "loss": 0.2035, + "epoch": 0.22767653180364486, + "grad_norm": 1.0024539232254028, + "learning_rate": 1.8482156454642367e-05, + "loss": 0.1649, "step": 4485 }, { - "epoch": 0.11397385455006981, - "grad_norm": 0.7081494331359863, - "learning_rate": 1.9240174302999536e-05, - "loss": 0.1846, + "epoch": 0.22793035179450732, + "grad_norm": 0.8160313963890076, + "learning_rate": 1.8480464321369953e-05, + "loss": 0.1389, "step": 4490 }, { - "epoch": 0.11410077420992512, - "grad_norm": 1.2113571166992188, - "learning_rate": 1.9239328171933835e-05, - "loss": 0.1797, + "epoch": 0.22818417178536982, + "grad_norm": 0.495733380317688, + "learning_rate": 1.8478772188097537e-05, + "loss": 0.1434, "step": 4495 }, { - "epoch": 0.11422769386978043, - "grad_norm": 2.346068859100342, - "learning_rate": 1.9238482040868133e-05, - "loss": 0.2414, + "epoch": 0.2284379917762323, + "grad_norm": 0.6479185819625854, + "learning_rate": 1.847708005482512e-05, + "loss": 0.1492, "step": 4500 }, { - "epoch": 0.11435461352963575, - "grad_norm": 0.6107497811317444, - "learning_rate": 1.923763590980243e-05, - "loss": 0.1965, + "epoch": 0.22869181176709477, + "grad_norm": 0.6237260103225708, + "learning_rate": 1.8475387921552704e-05, + "loss": 0.1692, "step": 4505 }, { - "epoch": 0.11448153318949106, - "grad_norm": 0.9806370139122009, - "learning_rate": 1.923678977873673e-05, - "loss": 0.2182, + "epoch": 0.22894563175795726, + "grad_norm": 0.5665149688720703, + "learning_rate": 1.8473695788280284e-05, + "loss": 0.1783, "step": 4510 }, { - "epoch": 0.11460845284934637, - "grad_norm": 0.5246864557266235, - "learning_rate": 1.9235943647671025e-05, - "loss": 0.1699, + "epoch": 0.22919945174881973, + "grad_norm": 0.7720448970794678, + "learning_rate": 1.847200365500787e-05, + "loss": 0.1766, "step": 4515 }, { - "epoch": 0.11473537250920167, - "grad_norm": 0.7907321453094482, - "learning_rate": 1.9235097516605323e-05, - "loss": 0.1907, + "epoch": 0.22945327173968222, + "grad_norm": 0.6933048963546753, + "learning_rate": 1.8470311521735455e-05, + "loss": 0.1655, "step": 4520 }, { - "epoch": 0.11486229216905698, - "grad_norm": 0.9656054377555847, - "learning_rate": 1.9234251385539622e-05, - "loss": 0.2121, + "epoch": 0.2297070917305447, + "grad_norm": 0.7732922434806824, + "learning_rate": 1.8468619388463035e-05, + "loss": 0.1609, "step": 4525 }, { - "epoch": 0.1149892118289123, - "grad_norm": 0.9709194898605347, - "learning_rate": 1.923340525447392e-05, - "loss": 0.2109, + "epoch": 0.22996091172140717, + "grad_norm": 0.9285184741020203, + "learning_rate": 1.8466927255190622e-05, + "loss": 0.1476, "step": 4530 }, { - "epoch": 0.1151161314887676, - "grad_norm": 0.7866454124450684, - "learning_rate": 1.9232559123408215e-05, - "loss": 0.2281, + "epoch": 0.23021473171226967, + "grad_norm": 0.5524913668632507, + "learning_rate": 1.8465235121918202e-05, + "loss": 0.1541, "step": 4535 }, { - "epoch": 0.11524305114862292, - "grad_norm": 1.0973340272903442, - "learning_rate": 1.9231712992342514e-05, - "loss": 0.2191, + "epoch": 0.23046855170313213, + "grad_norm": 0.5339060425758362, + "learning_rate": 1.8463542988645786e-05, + "loss": 0.1802, "step": 4540 }, { - "epoch": 0.11536997080847823, - "grad_norm": 0.7484031915664673, - "learning_rate": 1.9230866861276812e-05, - "loss": 0.1997, + "epoch": 0.23072237169399462, + "grad_norm": 0.5357369184494019, + "learning_rate": 1.8461850855373372e-05, + "loss": 0.162, "step": 4545 }, { - "epoch": 0.11549689046833354, - "grad_norm": 0.7914714217185974, - "learning_rate": 1.923002073021111e-05, - "loss": 0.2224, + "epoch": 0.2309761916848571, + "grad_norm": 0.6212167739868164, + "learning_rate": 1.8460158722100953e-05, + "loss": 0.1612, "step": 4550 }, { - "epoch": 0.11562381012818886, - "grad_norm": 1.023216962814331, - "learning_rate": 1.922917459914541e-05, - "loss": 0.2085, + "epoch": 0.23123001167571958, + "grad_norm": 0.5287737250328064, + "learning_rate": 1.845846658882854e-05, + "loss": 0.1445, "step": 4555 }, { - "epoch": 0.11575072978804417, - "grad_norm": 0.6812121868133545, - "learning_rate": 1.9228328468079707e-05, - "loss": 0.1805, + "epoch": 0.23148383166658207, + "grad_norm": 0.5808826088905334, + "learning_rate": 1.845677445555612e-05, + "loss": 0.1443, "step": 4560 }, { - "epoch": 0.11587764944789948, - "grad_norm": 1.7417243719100952, - "learning_rate": 1.9227482337014006e-05, - "loss": 0.1777, + "epoch": 0.23173765165744453, + "grad_norm": 0.47897592186927795, + "learning_rate": 1.8455082322283703e-05, + "loss": 0.1503, "step": 4565 }, { - "epoch": 0.11600456910775479, - "grad_norm": 1.0682921409606934, - "learning_rate": 1.9226636205948304e-05, - "loss": 0.2034, + "epoch": 0.23199147164830702, + "grad_norm": 0.7037847638130188, + "learning_rate": 1.845339018901129e-05, + "loss": 0.1676, "step": 4570 }, { - "epoch": 0.1161314887676101, - "grad_norm": 1.2180947065353394, - "learning_rate": 1.92257900748826e-05, - "loss": 0.2048, + "epoch": 0.2322452916391695, + "grad_norm": 0.4233403503894806, + "learning_rate": 1.845169805573887e-05, + "loss": 0.1459, "step": 4575 }, { - "epoch": 0.11625840842746542, - "grad_norm": 0.7795225381851196, - "learning_rate": 1.9224943943816897e-05, - "loss": 0.2364, + "epoch": 0.23249911163003198, + "grad_norm": 1.0100395679473877, + "learning_rate": 1.8450005922466454e-05, + "loss": 0.1742, "step": 4580 }, { - "epoch": 0.11638532808732073, - "grad_norm": 1.0028471946716309, - "learning_rate": 1.9224097812751196e-05, - "loss": 0.2293, + "epoch": 0.23275293162089447, + "grad_norm": 0.6140120029449463, + "learning_rate": 1.8448313789194037e-05, + "loss": 0.1439, "step": 4585 }, { - "epoch": 0.11651224774717604, - "grad_norm": 0.829910933971405, - "learning_rate": 1.9223251681685494e-05, - "loss": 0.2189, + "epoch": 0.23300675161175693, + "grad_norm": 0.5431662201881409, + "learning_rate": 1.844662165592162e-05, + "loss": 0.1444, "step": 4590 }, { - "epoch": 0.11663916740703135, - "grad_norm": 0.8888054490089417, - "learning_rate": 1.9222405550619793e-05, - "loss": 0.2253, + "epoch": 0.23326057160261943, + "grad_norm": 0.7064282894134521, + "learning_rate": 1.8444929522649208e-05, + "loss": 0.1598, "step": 4595 }, { - "epoch": 0.11676608706688667, - "grad_norm": 0.871583104133606, - "learning_rate": 1.922155941955409e-05, - "loss": 0.158, + "epoch": 0.2335143915934819, + "grad_norm": 0.5530514717102051, + "learning_rate": 1.8443237389376788e-05, + "loss": 0.1514, "step": 4600 }, { - "epoch": 0.11689300672674197, - "grad_norm": 0.9991680383682251, - "learning_rate": 1.922071328848839e-05, - "loss": 0.1954, + "epoch": 0.23376821158434438, + "grad_norm": 0.5173631906509399, + "learning_rate": 1.844154525610437e-05, + "loss": 0.1581, "step": 4605 }, { - "epoch": 0.11701992638659728, - "grad_norm": 0.9313769340515137, - "learning_rate": 1.9219867157422688e-05, - "loss": 0.2116, + "epoch": 0.23402203157520687, + "grad_norm": 0.5507360100746155, + "learning_rate": 1.8439853122831955e-05, + "loss": 0.1555, "step": 4610 }, { - "epoch": 0.11714684604645259, - "grad_norm": 1.2132656574249268, - "learning_rate": 1.9219021026356983e-05, - "loss": 0.215, + "epoch": 0.23427585156606934, + "grad_norm": 0.6322073340415955, + "learning_rate": 1.843816098955954e-05, + "loss": 0.1734, "step": 4615 }, { - "epoch": 0.1172737657063079, - "grad_norm": 0.6507996916770935, - "learning_rate": 1.921817489529128e-05, - "loss": 0.2085, + "epoch": 0.23452967155693183, + "grad_norm": 0.6972060799598694, + "learning_rate": 1.8436468856287122e-05, + "loss": 0.1554, "step": 4620 }, { - "epoch": 0.11740068536616322, - "grad_norm": 0.695362389087677, - "learning_rate": 1.921732876422558e-05, - "loss": 0.1773, + "epoch": 0.2347834915477943, + "grad_norm": 0.4970358610153198, + "learning_rate": 1.8434776723014706e-05, + "loss": 0.1679, "step": 4625 }, { - "epoch": 0.11752760502601853, - "grad_norm": 0.9689681529998779, - "learning_rate": 1.9216482633159878e-05, - "loss": 0.2367, + "epoch": 0.23503731153865678, + "grad_norm": 0.5872799158096313, + "learning_rate": 1.843308458974229e-05, + "loss": 0.1489, "step": 4630 }, { - "epoch": 0.11765452468587384, - "grad_norm": 0.6860599517822266, - "learning_rate": 1.9215636502094176e-05, - "loss": 0.2223, + "epoch": 0.23529113152951928, + "grad_norm": 1.1778312921524048, + "learning_rate": 1.8431392456469873e-05, + "loss": 0.1502, "step": 4635 }, { - "epoch": 0.11778144434572915, - "grad_norm": 0.8819100856781006, - "learning_rate": 1.9214790371028475e-05, - "loss": 0.2, + "epoch": 0.23554495152038174, + "grad_norm": 0.5631628036499023, + "learning_rate": 1.8429700323197456e-05, + "loss": 0.1709, "step": 4640 }, { - "epoch": 0.11790836400558447, - "grad_norm": 0.7338300347328186, - "learning_rate": 1.9213944239962773e-05, - "loss": 0.1924, + "epoch": 0.23579877151124423, + "grad_norm": 0.7208503484725952, + "learning_rate": 1.842800818992504e-05, + "loss": 0.1591, "step": 4645 }, { - "epoch": 0.11803528366543978, - "grad_norm": 1.1397582292556763, - "learning_rate": 1.921309810889707e-05, - "loss": 0.2194, + "epoch": 0.2360525915021067, + "grad_norm": 0.5835586786270142, + "learning_rate": 1.8426316056652624e-05, + "loss": 0.16, "step": 4650 }, { - "epoch": 0.11816220332529509, - "grad_norm": 1.020789623260498, - "learning_rate": 1.921225197783137e-05, - "loss": 0.2155, + "epoch": 0.2363064114929692, + "grad_norm": 0.5638494491577148, + "learning_rate": 1.8424623923380207e-05, + "loss": 0.1534, "step": 4655 }, { - "epoch": 0.1182891229851504, - "grad_norm": 1.2411658763885498, - "learning_rate": 1.9211405846765665e-05, - "loss": 0.2225, + "epoch": 0.23656023148383168, + "grad_norm": 0.5742624998092651, + "learning_rate": 1.842293179010779e-05, + "loss": 0.1587, "step": 4660 }, { - "epoch": 0.11841604264500571, - "grad_norm": 0.8273743391036987, - "learning_rate": 1.9210559715699963e-05, - "loss": 0.2029, + "epoch": 0.23681405147469414, + "grad_norm": 1.1678133010864258, + "learning_rate": 1.8421239656835374e-05, + "loss": 0.1577, "step": 4665 }, { - "epoch": 0.11854296230486103, - "grad_norm": 0.8867537975311279, - "learning_rate": 1.9209713584634262e-05, - "loss": 0.22, + "epoch": 0.23706787146555663, + "grad_norm": 0.5317492485046387, + "learning_rate": 1.8419547523562958e-05, + "loss": 0.1635, "step": 4670 }, { - "epoch": 0.11866988196471634, - "grad_norm": 0.7572629451751709, - "learning_rate": 1.920886745356856e-05, - "loss": 0.1727, + "epoch": 0.2373216914564191, + "grad_norm": 0.5751121640205383, + "learning_rate": 1.841785539029054e-05, + "loss": 0.1619, "step": 4675 }, { - "epoch": 0.11879680162457165, - "grad_norm": 0.6054407954216003, - "learning_rate": 1.9208021322502855e-05, - "loss": 0.2185, + "epoch": 0.2375755114472816, + "grad_norm": 0.5913323163986206, + "learning_rate": 1.8416163257018125e-05, + "loss": 0.1438, "step": 4680 }, { - "epoch": 0.11892372128442696, - "grad_norm": 1.8818379640579224, - "learning_rate": 1.9207175191437154e-05, - "loss": 0.2231, + "epoch": 0.23782933143814408, + "grad_norm": 0.43694448471069336, + "learning_rate": 1.841447112374571e-05, + "loss": 0.1558, "step": 4685 }, { - "epoch": 0.11905064094428226, - "grad_norm": 0.851755678653717, - "learning_rate": 1.9206329060371452e-05, - "loss": 0.2408, + "epoch": 0.23808315142900655, + "grad_norm": 0.580193042755127, + "learning_rate": 1.841277899047329e-05, + "loss": 0.1544, "step": 4690 }, { - "epoch": 0.11917756060413758, - "grad_norm": 0.7299715280532837, - "learning_rate": 1.920548292930575e-05, - "loss": 0.1971, + "epoch": 0.23833697141986904, + "grad_norm": 0.5924519896507263, + "learning_rate": 1.8411086857200875e-05, + "loss": 0.1766, "step": 4695 }, { - "epoch": 0.11930448026399289, - "grad_norm": 0.8004871010780334, - "learning_rate": 1.920463679824005e-05, - "loss": 0.2042, + "epoch": 0.2385907914107315, + "grad_norm": 0.5101874470710754, + "learning_rate": 1.840939472392846e-05, + "loss": 0.149, "step": 4700 }, { - "epoch": 0.1194313999238482, - "grad_norm": 0.9361944198608398, - "learning_rate": 1.9203790667174347e-05, - "loss": 0.218, + "epoch": 0.238844611401594, + "grad_norm": 0.5736780762672424, + "learning_rate": 1.8407702590656043e-05, + "loss": 0.1663, "step": 4705 }, { - "epoch": 0.11955831958370351, - "grad_norm": 1.396208643913269, - "learning_rate": 1.9202944536108646e-05, - "loss": 0.2016, + "epoch": 0.23909843139245646, + "grad_norm": 0.4370191693305969, + "learning_rate": 1.8406010457383626e-05, + "loss": 0.1512, "step": 4710 }, { - "epoch": 0.11968523924355882, - "grad_norm": 0.711529016494751, - "learning_rate": 1.9202098405042944e-05, - "loss": 0.2077, + "epoch": 0.23935225138331895, + "grad_norm": 0.545307993888855, + "learning_rate": 1.8404318324111206e-05, + "loss": 0.1537, "step": 4715 }, { - "epoch": 0.11981215890341414, - "grad_norm": 0.9275544285774231, - "learning_rate": 1.920125227397724e-05, - "loss": 0.2772, + "epoch": 0.23960607137418144, + "grad_norm": 0.680793285369873, + "learning_rate": 1.8402626190838793e-05, + "loss": 0.1798, "step": 4720 }, { - "epoch": 0.11993907856326945, - "grad_norm": 1.13945472240448, - "learning_rate": 1.9200406142911538e-05, - "loss": 0.2222, + "epoch": 0.2398598913650439, + "grad_norm": 0.7056201100349426, + "learning_rate": 1.8400934057566377e-05, + "loss": 0.1549, "step": 4725 }, { - "epoch": 0.12006599822312476, - "grad_norm": 1.056583046913147, - "learning_rate": 1.9199560011845836e-05, - "loss": 0.1961, + "epoch": 0.2401137113559064, + "grad_norm": 0.4889606535434723, + "learning_rate": 1.8399241924293957e-05, + "loss": 0.1701, "step": 4730 }, { - "epoch": 0.12019291788298007, - "grad_norm": 0.8818914294242859, - "learning_rate": 1.9198713880780134e-05, - "loss": 0.2043, + "epoch": 0.24036753134676886, + "grad_norm": 0.5846245884895325, + "learning_rate": 1.8397549791021544e-05, + "loss": 0.1456, "step": 4735 }, { - "epoch": 0.12031983754283539, - "grad_norm": 0.9523479342460632, - "learning_rate": 1.9197867749714433e-05, - "loss": 0.2042, + "epoch": 0.24062135133763135, + "grad_norm": 0.6689904928207397, + "learning_rate": 1.8395857657749124e-05, + "loss": 0.1648, "step": 4740 }, { - "epoch": 0.1204467572026907, - "grad_norm": 1.0179616212844849, - "learning_rate": 1.919702161864873e-05, - "loss": 0.2376, + "epoch": 0.24087517132849384, + "grad_norm": 0.5623743534088135, + "learning_rate": 1.839416552447671e-05, + "loss": 0.1538, "step": 4745 }, { - "epoch": 0.12057367686254601, - "grad_norm": 1.0485228300094604, - "learning_rate": 1.919617548758303e-05, - "loss": 0.2008, + "epoch": 0.2411289913193563, + "grad_norm": 0.8147268295288086, + "learning_rate": 1.8392473391204294e-05, + "loss": 0.1594, "step": 4750 }, { - "epoch": 0.12070059652240132, - "grad_norm": 1.0348702669143677, - "learning_rate": 1.9195329356517328e-05, - "loss": 0.2268, + "epoch": 0.2413828113102188, + "grad_norm": 0.49249938130378723, + "learning_rate": 1.8390781257931875e-05, + "loss": 0.1728, "step": 4755 }, { - "epoch": 0.12082751618225664, - "grad_norm": 0.861442506313324, - "learning_rate": 1.9194483225451623e-05, - "loss": 0.2109, + "epoch": 0.24163663130108126, + "grad_norm": 0.4992578327655792, + "learning_rate": 1.838908912465946e-05, + "loss": 0.1593, "step": 4760 }, { - "epoch": 0.12095443584211195, - "grad_norm": 0.7748829126358032, - "learning_rate": 1.919363709438592e-05, - "loss": 0.195, + "epoch": 0.24189045129194375, + "grad_norm": 0.5691167712211609, + "learning_rate": 1.838739699138704e-05, + "loss": 0.1537, "step": 4765 }, { - "epoch": 0.12108135550196726, - "grad_norm": 1.211017370223999, - "learning_rate": 1.919279096332022e-05, - "loss": 0.1946, + "epoch": 0.24214427128280624, + "grad_norm": 0.5181019902229309, + "learning_rate": 1.8385704858114625e-05, + "loss": 0.148, "step": 4770 }, { - "epoch": 0.12120827516182256, - "grad_norm": 0.9302659034729004, - "learning_rate": 1.9191944832254518e-05, - "loss": 0.1781, + "epoch": 0.2423980912736687, + "grad_norm": 0.4930359423160553, + "learning_rate": 1.8384012724842212e-05, + "loss": 0.1518, "step": 4775 }, { - "epoch": 0.12133519482167787, - "grad_norm": 1.2043062448501587, - "learning_rate": 1.9191098701188817e-05, - "loss": 0.1847, + "epoch": 0.2426519112645312, + "grad_norm": 0.6070294380187988, + "learning_rate": 1.8382320591569792e-05, + "loss": 0.1812, "step": 4780 }, { - "epoch": 0.12146211448153318, - "grad_norm": 1.6692134141921997, - "learning_rate": 1.9190252570123115e-05, - "loss": 0.1936, + "epoch": 0.24290573125539366, + "grad_norm": 0.6835089921951294, + "learning_rate": 1.8380628458297376e-05, + "loss": 0.1767, "step": 4785 }, { - "epoch": 0.1215890341413885, - "grad_norm": 0.989179253578186, - "learning_rate": 1.9189406439057413e-05, - "loss": 0.2421, + "epoch": 0.24315955124625616, + "grad_norm": 0.7003611326217651, + "learning_rate": 1.837893632502496e-05, + "loss": 0.1476, "step": 4790 }, { - "epoch": 0.12171595380124381, - "grad_norm": 0.8882060050964355, - "learning_rate": 1.9188560307991712e-05, - "loss": 0.2043, + "epoch": 0.24341337123711865, + "grad_norm": 0.4410546123981476, + "learning_rate": 1.8377244191752543e-05, + "loss": 0.1493, "step": 4795 }, { - "epoch": 0.12184287346109912, - "grad_norm": 0.7609587907791138, - "learning_rate": 1.9187714176926007e-05, - "loss": 0.214, + "epoch": 0.2436671912279811, + "grad_norm": 0.6904603242874146, + "learning_rate": 1.837555205848013e-05, + "loss": 0.1473, "step": 4800 }, { - "epoch": 0.12196979312095443, - "grad_norm": 1.0931562185287476, - "learning_rate": 1.9186868045860305e-05, - "loss": 0.1843, + "epoch": 0.2439210112188436, + "grad_norm": 0.5898627638816833, + "learning_rate": 1.837385992520771e-05, + "loss": 0.1576, "step": 4805 }, { - "epoch": 0.12209671278080975, - "grad_norm": 0.6345117092132568, - "learning_rate": 1.9186021914794604e-05, - "loss": 0.1849, + "epoch": 0.24417483120970607, + "grad_norm": 0.5015605092048645, + "learning_rate": 1.8372167791935294e-05, + "loss": 0.1438, "step": 4810 }, { - "epoch": 0.12222363244066506, - "grad_norm": 1.5469715595245361, - "learning_rate": 1.9185175783728902e-05, - "loss": 0.2073, + "epoch": 0.24442865120056856, + "grad_norm": 0.7323523163795471, + "learning_rate": 1.8370475658662877e-05, + "loss": 0.1609, "step": 4815 }, { - "epoch": 0.12235055210052037, - "grad_norm": 1.000430941581726, - "learning_rate": 1.9184329652663197e-05, - "loss": 0.2184, + "epoch": 0.24468247119143105, + "grad_norm": 0.5204626321792603, + "learning_rate": 1.836878352539046e-05, + "loss": 0.169, "step": 4820 }, { - "epoch": 0.12247747176037568, - "grad_norm": 1.1145352125167847, - "learning_rate": 1.9183483521597495e-05, - "loss": 0.1845, + "epoch": 0.24493629118229351, + "grad_norm": 0.5830612182617188, + "learning_rate": 1.8367091392118044e-05, + "loss": 0.1387, "step": 4825 }, { - "epoch": 0.122604391420231, - "grad_norm": 0.8491441011428833, - "learning_rate": 1.9182637390531794e-05, - "loss": 0.1815, + "epoch": 0.245190111173156, + "grad_norm": 0.45259636640548706, + "learning_rate": 1.8365399258845628e-05, + "loss": 0.1599, "step": 4830 }, { - "epoch": 0.12273131108008631, - "grad_norm": 0.6895710229873657, - "learning_rate": 1.9181791259466092e-05, - "loss": 0.1845, + "epoch": 0.24544393116401847, + "grad_norm": 0.6329886317253113, + "learning_rate": 1.836370712557321e-05, + "loss": 0.1701, "step": 4835 }, { - "epoch": 0.12285823073994162, - "grad_norm": 0.6821191906929016, - "learning_rate": 1.918094512840039e-05, - "loss": 0.1962, + "epoch": 0.24569775115488096, + "grad_norm": 0.4662386476993561, + "learning_rate": 1.8362014992300795e-05, + "loss": 0.1307, "step": 4840 }, { - "epoch": 0.12298515039979693, - "grad_norm": 0.8433831930160522, - "learning_rate": 1.918009899733469e-05, - "loss": 0.2122, + "epoch": 0.24595157114574343, + "grad_norm": 0.5723447799682617, + "learning_rate": 1.836032285902838e-05, + "loss": 0.1635, "step": 4845 }, { - "epoch": 0.12311207005965225, - "grad_norm": 0.7010766863822937, - "learning_rate": 1.9179252866268987e-05, - "loss": 0.162, + "epoch": 0.24620539113660592, + "grad_norm": 0.9412028789520264, + "learning_rate": 1.8358630725755962e-05, + "loss": 0.1704, "step": 4850 }, { - "epoch": 0.12323898971950756, - "grad_norm": 0.83511883020401, - "learning_rate": 1.9178406735203286e-05, - "loss": 0.2176, + "epoch": 0.2464592111274684, + "grad_norm": 0.6017778515815735, + "learning_rate": 1.8356938592483545e-05, + "loss": 0.1658, "step": 4855 }, { - "epoch": 0.12336590937936286, - "grad_norm": 0.7897379994392395, - "learning_rate": 1.917756060413758e-05, - "loss": 0.2028, + "epoch": 0.24671303111833087, + "grad_norm": 0.45639511942863464, + "learning_rate": 1.835524645921113e-05, + "loss": 0.1519, "step": 4860 }, { - "epoch": 0.12349282903921817, - "grad_norm": 2.2418625354766846, - "learning_rate": 1.917671447307188e-05, - "loss": 0.1884, + "epoch": 0.24696685110919336, + "grad_norm": 0.5620295405387878, + "learning_rate": 1.8353554325938713e-05, + "loss": 0.1727, "step": 4865 }, { - "epoch": 0.12361974869907348, - "grad_norm": 0.8522242307662964, - "learning_rate": 1.9175868342006178e-05, - "loss": 0.2168, + "epoch": 0.24722067110005583, + "grad_norm": 0.5075967907905579, + "learning_rate": 1.8351862192666296e-05, + "loss": 0.1656, "step": 4870 }, { - "epoch": 0.1237466683589288, - "grad_norm": 0.7293094992637634, - "learning_rate": 1.9175022210940476e-05, - "loss": 0.1983, + "epoch": 0.24747449109091832, + "grad_norm": 0.7711220383644104, + "learning_rate": 1.835017005939388e-05, + "loss": 0.1705, "step": 4875 }, { - "epoch": 0.1238735880187841, - "grad_norm": 1.0028489828109741, - "learning_rate": 1.9174176079874774e-05, - "loss": 0.2062, + "epoch": 0.2477283110817808, + "grad_norm": 0.5555239319801331, + "learning_rate": 1.8348477926121463e-05, + "loss": 0.1239, "step": 4880 }, { - "epoch": 0.12400050767863942, - "grad_norm": 1.4196940660476685, - "learning_rate": 1.9173329948809073e-05, - "loss": 0.2006, + "epoch": 0.24798213107264327, + "grad_norm": 0.4874543845653534, + "learning_rate": 1.8346785792849047e-05, + "loss": 0.1596, "step": 4885 }, { - "epoch": 0.12412742733849473, - "grad_norm": 0.9900663495063782, - "learning_rate": 1.917248381774337e-05, - "loss": 0.173, + "epoch": 0.24823595106350577, + "grad_norm": 0.49628138542175293, + "learning_rate": 1.834509365957663e-05, + "loss": 0.142, "step": 4890 }, { - "epoch": 0.12425434699835004, - "grad_norm": 0.7599760293960571, - "learning_rate": 1.917163768667767e-05, - "loss": 0.2206, + "epoch": 0.24848977105436823, + "grad_norm": 0.6575145125389099, + "learning_rate": 1.8343401526304214e-05, + "loss": 0.1602, "step": 4895 }, { - "epoch": 0.12438126665820536, - "grad_norm": 1.1602343320846558, - "learning_rate": 1.9170791555611965e-05, - "loss": 0.1919, + "epoch": 0.24874359104523072, + "grad_norm": 0.4418342709541321, + "learning_rate": 1.8341709393031797e-05, + "loss": 0.1574, "step": 4900 }, { - "epoch": 0.12450818631806067, - "grad_norm": 0.8975493311882019, - "learning_rate": 1.9169945424546263e-05, - "loss": 0.2279, + "epoch": 0.2489974110360932, + "grad_norm": 0.7797775864601135, + "learning_rate": 1.834001725975938e-05, + "loss": 0.1718, "step": 4905 }, { - "epoch": 0.12463510597791598, - "grad_norm": 0.6388280987739563, - "learning_rate": 1.916909929348056e-05, - "loss": 0.2044, + "epoch": 0.24925123102695568, + "grad_norm": 0.87046879529953, + "learning_rate": 1.8338325126486964e-05, + "loss": 0.1667, "step": 4910 }, { - "epoch": 0.12476202563777129, - "grad_norm": 0.6177375316619873, - "learning_rate": 1.916825316241486e-05, - "loss": 0.2258, + "epoch": 0.24950505101781817, + "grad_norm": 0.47170791029930115, + "learning_rate": 1.8336632993214548e-05, + "loss": 0.1376, "step": 4915 }, { - "epoch": 0.1248889452976266, - "grad_norm": 0.8580732941627502, - "learning_rate": 1.9167407031349158e-05, - "loss": 0.2059, + "epoch": 0.24975887100868063, + "grad_norm": 0.6200122833251953, + "learning_rate": 1.8334940859942128e-05, + "loss": 0.1674, "step": 4920 }, { - "epoch": 0.1250158649574819, - "grad_norm": 0.8901786804199219, - "learning_rate": 1.9166560900283457e-05, - "loss": 0.2241, + "epoch": 0.2500126909995431, + "grad_norm": 0.7150362730026245, + "learning_rate": 1.8333248726669715e-05, + "loss": 0.1697, "step": 4925 }, { - "epoch": 0.12514278461733722, - "grad_norm": 1.1941996812820435, - "learning_rate": 1.9165714769217755e-05, - "loss": 0.2233, + "epoch": 0.2502665109904056, + "grad_norm": 0.9218109846115112, + "learning_rate": 1.83315565933973e-05, + "loss": 0.1693, "step": 4930 }, { - "epoch": 0.12526970427719253, - "grad_norm": 0.877659261226654, - "learning_rate": 1.9164868638152053e-05, - "loss": 0.1844, + "epoch": 0.2505203309812681, + "grad_norm": 0.5581763386726379, + "learning_rate": 1.832986446012488e-05, + "loss": 0.1584, "step": 4935 }, { - "epoch": 0.12539662393704784, - "grad_norm": 0.9015358686447144, - "learning_rate": 1.916402250708635e-05, - "loss": 0.188, + "epoch": 0.25077415097213057, + "grad_norm": 0.610871434211731, + "learning_rate": 1.8328172326852466e-05, + "loss": 0.1564, "step": 4940 }, { - "epoch": 0.12552354359690315, - "grad_norm": 0.744577944278717, - "learning_rate": 1.9163176376020647e-05, - "loss": 0.1825, + "epoch": 0.25102797096299306, + "grad_norm": 0.5767541527748108, + "learning_rate": 1.8326480193580046e-05, + "loss": 0.1501, "step": 4945 }, { - "epoch": 0.12565046325675847, - "grad_norm": 0.9511021971702576, - "learning_rate": 1.9162330244954945e-05, - "loss": 0.2538, + "epoch": 0.2512817909538555, + "grad_norm": 0.5301868319511414, + "learning_rate": 1.8324788060307633e-05, + "loss": 0.1444, "step": 4950 }, { - "epoch": 0.12577738291661378, - "grad_norm": 1.4672715663909912, - "learning_rate": 1.9161484113889244e-05, - "loss": 0.2103, + "epoch": 0.251535610944718, + "grad_norm": 0.4195795953273773, + "learning_rate": 1.8323095927035216e-05, + "loss": 0.1529, "step": 4955 }, { - "epoch": 0.1259043025764691, - "grad_norm": 1.0460151433944702, - "learning_rate": 1.916063798282354e-05, - "loss": 0.227, + "epoch": 0.2517894309355805, + "grad_norm": 0.6302689909934998, + "learning_rate": 1.8321403793762797e-05, + "loss": 0.1554, "step": 4960 }, { - "epoch": 0.1260312222363244, - "grad_norm": 1.2163174152374268, - "learning_rate": 1.9159791851757837e-05, - "loss": 0.2231, + "epoch": 0.252043250926443, + "grad_norm": 0.8117844462394714, + "learning_rate": 1.8319711660490383e-05, + "loss": 0.1598, "step": 4965 }, { - "epoch": 0.12615814189617972, - "grad_norm": 1.0316379070281982, - "learning_rate": 1.9158945720692136e-05, - "loss": 0.2102, + "epoch": 0.25229707091730547, + "grad_norm": 0.6213078498840332, + "learning_rate": 1.8318019527217964e-05, + "loss": 0.1598, "step": 4970 }, { - "epoch": 0.12628506155603503, - "grad_norm": 1.267429232597351, - "learning_rate": 1.9158099589626434e-05, - "loss": 0.2136, + "epoch": 0.2525508909081679, + "grad_norm": 0.5293858647346497, + "learning_rate": 1.8316327393945547e-05, + "loss": 0.144, "step": 4975 }, { - "epoch": 0.12641198121589034, - "grad_norm": 9.899612426757812, - "learning_rate": 1.9157253458560732e-05, - "loss": 0.2129, + "epoch": 0.2528047108990304, + "grad_norm": 0.5613870620727539, + "learning_rate": 1.8314635260673134e-05, + "loss": 0.1496, "step": 4980 }, { - "epoch": 0.12653890087574565, - "grad_norm": 0.8212594389915466, - "learning_rate": 1.915640732749503e-05, - "loss": 0.2189, + "epoch": 0.2530585308898929, + "grad_norm": 0.9666538834571838, + "learning_rate": 1.8312943127400714e-05, + "loss": 0.1718, "step": 4985 }, { - "epoch": 0.12666582053560096, - "grad_norm": 1.2683393955230713, - "learning_rate": 1.915556119642933e-05, - "loss": 0.2108, + "epoch": 0.2533123508807554, + "grad_norm": 0.6101076006889343, + "learning_rate": 1.83112509941283e-05, + "loss": 0.1444, "step": 4990 }, { - "epoch": 0.12679274019545628, - "grad_norm": 1.0142894983291626, - "learning_rate": 1.9154715065363628e-05, - "loss": 0.2136, + "epoch": 0.25356617087161787, + "grad_norm": 0.5413176417350769, + "learning_rate": 1.830955886085588e-05, + "loss": 0.1378, "step": 4995 }, { - "epoch": 0.1269196598553116, - "grad_norm": 0.6837312579154968, - "learning_rate": 1.9153868934297923e-05, - "loss": 0.1972, + "epoch": 0.2538199908624803, + "grad_norm": 0.731587827205658, + "learning_rate": 1.8307866727583465e-05, + "loss": 0.1523, "step": 5000 }, { - "epoch": 0.1270465795151669, - "grad_norm": 0.7658743858337402, - "learning_rate": 1.915302280323222e-05, - "loss": 0.2067, + "epoch": 0.2540738108533428, + "grad_norm": 0.6076200008392334, + "learning_rate": 1.8306174594311052e-05, + "loss": 0.1298, "step": 5005 }, { - "epoch": 0.12717349917502221, - "grad_norm": 0.6658621430397034, - "learning_rate": 1.915217667216652e-05, - "loss": 0.1861, + "epoch": 0.2543276308442053, + "grad_norm": 0.4898158013820648, + "learning_rate": 1.8304482461038632e-05, + "loss": 0.1509, "step": 5010 }, { - "epoch": 0.12730041883487753, - "grad_norm": 0.847270667552948, - "learning_rate": 1.9151330541100818e-05, - "loss": 0.1481, + "epoch": 0.2545814508350678, + "grad_norm": 0.6534063816070557, + "learning_rate": 1.8302790327766215e-05, + "loss": 0.1562, "step": 5015 }, { - "epoch": 0.12742733849473284, - "grad_norm": 1.086572527885437, - "learning_rate": 1.9150484410035116e-05, - "loss": 0.2066, + "epoch": 0.25483527082593027, + "grad_norm": 0.8010299801826477, + "learning_rate": 1.83010981944938e-05, + "loss": 0.1422, "step": 5020 }, { - "epoch": 0.12755425815458815, - "grad_norm": 0.8942188024520874, - "learning_rate": 1.9149638278969415e-05, - "loss": 0.1842, + "epoch": 0.2550890908167927, + "grad_norm": 0.6017109751701355, + "learning_rate": 1.8299406061221383e-05, + "loss": 0.1592, "step": 5025 }, { - "epoch": 0.12768117781444346, - "grad_norm": 0.8380123376846313, - "learning_rate": 1.9148792147903713e-05, - "loss": 0.1956, + "epoch": 0.2553429108076552, + "grad_norm": 0.579904317855835, + "learning_rate": 1.8297713927948966e-05, + "loss": 0.1567, "step": 5030 }, { - "epoch": 0.12780809747429878, - "grad_norm": 1.1296532154083252, - "learning_rate": 1.914794601683801e-05, - "loss": 0.1962, + "epoch": 0.2555967307985177, + "grad_norm": 0.94927978515625, + "learning_rate": 1.829602179467655e-05, + "loss": 0.1479, "step": 5035 }, { - "epoch": 0.1279350171341541, - "grad_norm": 1.18803870677948, - "learning_rate": 1.9147099885772306e-05, - "loss": 0.2149, + "epoch": 0.2558505507893802, + "grad_norm": 0.5884600877761841, + "learning_rate": 1.8294329661404133e-05, + "loss": 0.1592, "step": 5040 }, { - "epoch": 0.1280619367940094, - "grad_norm": 1.0930676460266113, - "learning_rate": 1.9146253754706605e-05, - "loss": 0.2102, + "epoch": 0.2561043707802427, + "grad_norm": 0.7687221169471741, + "learning_rate": 1.8292637528131717e-05, + "loss": 0.1514, "step": 5045 }, { - "epoch": 0.1281888564538647, - "grad_norm": 0.8624572157859802, - "learning_rate": 1.9145407623640903e-05, - "loss": 0.2131, + "epoch": 0.2563581907711051, + "grad_norm": 0.7298230528831482, + "learning_rate": 1.82909453948593e-05, + "loss": 0.1746, "step": 5050 }, { - "epoch": 0.12831577611372003, - "grad_norm": 0.6565977931022644, - "learning_rate": 1.91445614925752e-05, - "loss": 0.1837, + "epoch": 0.2566120107619676, + "grad_norm": 0.8364652395248413, + "learning_rate": 1.8289253261586884e-05, + "loss": 0.145, "step": 5055 }, { - "epoch": 0.12844269577357534, - "grad_norm": 1.112851619720459, - "learning_rate": 1.91437153615095e-05, - "loss": 0.2347, + "epoch": 0.2568658307528301, + "grad_norm": 0.799123227596283, + "learning_rate": 1.8287561128314467e-05, + "loss": 0.1438, "step": 5060 }, { - "epoch": 0.12856961543343065, - "grad_norm": 0.9813293814659119, - "learning_rate": 1.91428692304438e-05, - "loss": 0.2257, + "epoch": 0.2571196507436926, + "grad_norm": 0.7807921767234802, + "learning_rate": 1.828586899504205e-05, + "loss": 0.1375, "step": 5065 }, { - "epoch": 0.12869653509328596, - "grad_norm": 0.7298664450645447, - "learning_rate": 1.9142023099378097e-05, - "loss": 0.1922, + "epoch": 0.2573734707345551, + "grad_norm": 0.4554001986980438, + "learning_rate": 1.8284176861769634e-05, + "loss": 0.1586, "step": 5070 }, { - "epoch": 0.12882345475314125, - "grad_norm": 1.2995325326919556, - "learning_rate": 1.9141176968312395e-05, - "loss": 0.1871, + "epoch": 0.2576272907254175, + "grad_norm": 0.5187448263168335, + "learning_rate": 1.8282484728497218e-05, + "loss": 0.1526, "step": 5075 }, { - "epoch": 0.12895037441299656, - "grad_norm": 0.6218967437744141, - "learning_rate": 1.914033083724669e-05, - "loss": 0.2068, + "epoch": 0.25788111071628, + "grad_norm": 0.5898265242576599, + "learning_rate": 1.82807925952248e-05, + "loss": 0.1523, "step": 5080 }, { - "epoch": 0.12907729407285187, - "grad_norm": 0.8864388465881348, - "learning_rate": 1.913948470618099e-05, - "loss": 0.1813, + "epoch": 0.2581349307071425, + "grad_norm": 0.4496493637561798, + "learning_rate": 1.8279100461952385e-05, + "loss": 0.1583, "step": 5085 }, { - "epoch": 0.12920421373270718, - "grad_norm": 0.7129151225090027, - "learning_rate": 1.9138638575115287e-05, - "loss": 0.1814, + "epoch": 0.258388750698005, + "grad_norm": 0.5907924771308899, + "learning_rate": 1.827740832867997e-05, + "loss": 0.1697, "step": 5090 }, { - "epoch": 0.1293311333925625, - "grad_norm": 1.0529191493988037, - "learning_rate": 1.9137792444049585e-05, - "loss": 0.2116, + "epoch": 0.2586425706888675, + "grad_norm": 0.5844322443008423, + "learning_rate": 1.8275716195407552e-05, + "loss": 0.1562, "step": 5095 }, { - "epoch": 0.1294580530524178, - "grad_norm": 0.755419135093689, - "learning_rate": 1.913694631298388e-05, - "loss": 0.1598, + "epoch": 0.2588963906797299, + "grad_norm": 0.5347046256065369, + "learning_rate": 1.8274024062135136e-05, + "loss": 0.1396, "step": 5100 }, { - "epoch": 0.12958497271227312, - "grad_norm": 0.8069663047790527, - "learning_rate": 1.913610018191818e-05, - "loss": 0.2121, + "epoch": 0.2591502106705924, + "grad_norm": 0.6127249598503113, + "learning_rate": 1.827233192886272e-05, + "loss": 0.1442, "step": 5105 }, { - "epoch": 0.12971189237212843, - "grad_norm": 0.8285185694694519, - "learning_rate": 1.9135254050852477e-05, - "loss": 0.2068, + "epoch": 0.2594040306614549, + "grad_norm": 0.5309034585952759, + "learning_rate": 1.8270639795590303e-05, + "loss": 0.148, "step": 5110 }, { - "epoch": 0.12983881203198375, - "grad_norm": 1.0346201658248901, - "learning_rate": 1.9134407919786776e-05, - "loss": 0.2153, + "epoch": 0.2596578506523174, + "grad_norm": 0.6933298707008362, + "learning_rate": 1.8268947662317886e-05, + "loss": 0.1723, "step": 5115 }, { - "epoch": 0.12996573169183906, - "grad_norm": 0.9951513409614563, - "learning_rate": 1.9133561788721074e-05, - "loss": 0.1773, + "epoch": 0.2599116706431799, + "grad_norm": 0.6623631119728088, + "learning_rate": 1.826725552904547e-05, + "loss": 0.164, "step": 5120 }, { - "epoch": 0.13009265135169437, - "grad_norm": 0.712108850479126, - "learning_rate": 1.9132715657655372e-05, - "loss": 0.1719, + "epoch": 0.2601654906340423, + "grad_norm": 0.7233380079269409, + "learning_rate": 1.826556339577305e-05, + "loss": 0.1501, "step": 5125 }, { - "epoch": 0.13021957101154968, - "grad_norm": 2.519448757171631, - "learning_rate": 1.913186952658967e-05, - "loss": 0.2001, + "epoch": 0.2604193106249048, + "grad_norm": 0.6823766231536865, + "learning_rate": 1.8263871262500637e-05, + "loss": 0.1508, "step": 5130 }, { - "epoch": 0.130346490671405, - "grad_norm": 0.8538994789123535, - "learning_rate": 1.913102339552397e-05, - "loss": 0.2005, + "epoch": 0.2606731306157673, + "grad_norm": 0.5988183617591858, + "learning_rate": 1.826217912922822e-05, + "loss": 0.1495, "step": 5135 }, { - "epoch": 0.1304734103312603, - "grad_norm": 0.6305434703826904, - "learning_rate": 1.9130177264458264e-05, - "loss": 0.2402, + "epoch": 0.2609269506066298, + "grad_norm": 0.48616233468055725, + "learning_rate": 1.8260486995955804e-05, + "loss": 0.1423, "step": 5140 }, { - "epoch": 0.13060032999111562, - "grad_norm": 0.6293193101882935, - "learning_rate": 1.9129331133392563e-05, - "loss": 0.1952, + "epoch": 0.2611807705974923, + "grad_norm": 0.4381769001483917, + "learning_rate": 1.8258794862683388e-05, + "loss": 0.1396, "step": 5145 }, { - "epoch": 0.13072724965097093, - "grad_norm": 0.522113561630249, - "learning_rate": 1.912848500232686e-05, - "loss": 0.1958, + "epoch": 0.2614345905883547, + "grad_norm": 0.4858790934085846, + "learning_rate": 1.8257102729410968e-05, + "loss": 0.1503, "step": 5150 }, { - "epoch": 0.13085416931082625, - "grad_norm": 0.8106921911239624, - "learning_rate": 1.912763887126116e-05, - "loss": 0.1825, + "epoch": 0.2616884105792172, + "grad_norm": 0.5144358277320862, + "learning_rate": 1.8255410596138555e-05, + "loss": 0.1586, "step": 5155 }, { - "epoch": 0.13098108897068156, - "grad_norm": 1.1281088590621948, - "learning_rate": 1.9126792740195458e-05, - "loss": 0.1832, + "epoch": 0.2619422305700797, + "grad_norm": 0.46459242701530457, + "learning_rate": 1.8253718462866138e-05, + "loss": 0.1571, "step": 5160 }, { - "epoch": 0.13110800863053687, - "grad_norm": 0.8852180242538452, - "learning_rate": 1.9125946609129756e-05, - "loss": 0.1967, + "epoch": 0.2621960505609422, + "grad_norm": 0.8006240725517273, + "learning_rate": 1.825202632959372e-05, + "loss": 0.1511, "step": 5165 }, { - "epoch": 0.13123492829039218, - "grad_norm": 1.1727315187454224, - "learning_rate": 1.9125100478064055e-05, - "loss": 0.2252, + "epoch": 0.2624498705518047, + "grad_norm": 0.5427458882331848, + "learning_rate": 1.8250334196321305e-05, + "loss": 0.1455, "step": 5170 }, { - "epoch": 0.1313618479502475, - "grad_norm": 0.6905813813209534, - "learning_rate": 1.9124254346998353e-05, - "loss": 0.1612, + "epoch": 0.2627036905426671, + "grad_norm": 0.6491566300392151, + "learning_rate": 1.8248642063048886e-05, + "loss": 0.1539, "step": 5175 }, { - "epoch": 0.1314887676101028, - "grad_norm": 0.6785316467285156, - "learning_rate": 1.912340821593265e-05, - "loss": 0.1963, + "epoch": 0.2629575105335296, + "grad_norm": 0.9627673029899597, + "learning_rate": 1.824694992977647e-05, + "loss": 0.1524, "step": 5180 }, { - "epoch": 0.13161568726995812, - "grad_norm": 0.642821729183197, - "learning_rate": 1.9122562084866946e-05, - "loss": 0.1728, + "epoch": 0.2632113305243921, + "grad_norm": 0.5841239094734192, + "learning_rate": 1.8245257796504056e-05, + "loss": 0.1476, "step": 5185 }, { - "epoch": 0.13174260692981343, - "grad_norm": 0.9135999083518982, - "learning_rate": 1.9121715953801245e-05, - "loss": 0.214, + "epoch": 0.2634651505152546, + "grad_norm": 0.501349925994873, + "learning_rate": 1.8243565663231636e-05, + "loss": 0.1262, "step": 5190 }, { - "epoch": 0.13186952658966875, - "grad_norm": 0.5881655216217041, - "learning_rate": 1.9120869822735543e-05, - "loss": 0.1964, + "epoch": 0.26371897050611703, + "grad_norm": 0.899695873260498, + "learning_rate": 1.8241873529959223e-05, + "loss": 0.1508, "step": 5195 }, { - "epoch": 0.13199644624952406, - "grad_norm": 0.6714578866958618, - "learning_rate": 1.912002369166984e-05, - "loss": 0.2019, + "epoch": 0.2639727904969795, + "grad_norm": 0.4951866865158081, + "learning_rate": 1.8240181396686803e-05, + "loss": 0.1479, "step": 5200 }, { - "epoch": 0.13212336590937937, - "grad_norm": 0.9432132244110107, - "learning_rate": 1.911917756060414e-05, - "loss": 0.2092, + "epoch": 0.264226610487842, + "grad_norm": 0.5623950362205505, + "learning_rate": 1.8238489263414387e-05, + "loss": 0.1641, "step": 5205 }, { - "epoch": 0.13225028556923468, - "grad_norm": 0.7245412468910217, - "learning_rate": 1.911833142953844e-05, - "loss": 0.1852, + "epoch": 0.2644804304787045, + "grad_norm": 0.4208991527557373, + "learning_rate": 1.8236797130141974e-05, + "loss": 0.1436, "step": 5210 }, { - "epoch": 0.13237720522909, - "grad_norm": 1.603753924369812, - "learning_rate": 1.9117485298472737e-05, - "loss": 0.1896, + "epoch": 0.264734250469567, + "grad_norm": 0.41514652967453003, + "learning_rate": 1.8235104996869554e-05, + "loss": 0.1507, "step": 5215 }, { - "epoch": 0.1325041248889453, - "grad_norm": 1.0410493612289429, - "learning_rate": 1.9116639167407035e-05, - "loss": 0.1967, + "epoch": 0.26498807046042944, + "grad_norm": 0.4979642629623413, + "learning_rate": 1.8233412863597137e-05, + "loss": 0.1338, "step": 5220 }, { - "epoch": 0.13263104454880062, - "grad_norm": 1.430548071861267, - "learning_rate": 1.911579303634133e-05, - "loss": 0.167, + "epoch": 0.26524189045129193, + "grad_norm": 0.8609977960586548, + "learning_rate": 1.823172073032472e-05, + "loss": 0.1624, "step": 5225 }, { - "epoch": 0.13275796420865593, - "grad_norm": 1.1257307529449463, - "learning_rate": 1.911494690527563e-05, - "loss": 0.2223, + "epoch": 0.2654957104421544, + "grad_norm": 0.4810056984424591, + "learning_rate": 1.8230028597052305e-05, + "loss": 0.1511, "step": 5230 }, { - "epoch": 0.13288488386851124, - "grad_norm": 0.6970940232276917, - "learning_rate": 1.9114100774209927e-05, - "loss": 0.2108, + "epoch": 0.2657495304330169, + "grad_norm": 0.5637201070785522, + "learning_rate": 1.822833646377989e-05, + "loss": 0.1451, "step": 5235 }, { - "epoch": 0.13301180352836656, - "grad_norm": 0.8964440226554871, - "learning_rate": 1.9113254643144225e-05, - "loss": 0.1908, + "epoch": 0.2660033504238794, + "grad_norm": 0.77173912525177, + "learning_rate": 1.822664433050747e-05, + "loss": 0.1694, "step": 5240 }, { - "epoch": 0.13313872318822187, - "grad_norm": 1.0627422332763672, - "learning_rate": 1.911240851207852e-05, - "loss": 0.1964, + "epoch": 0.26625717041474184, + "grad_norm": 0.7287624478340149, + "learning_rate": 1.8224952197235055e-05, + "loss": 0.1474, "step": 5245 }, { - "epoch": 0.13326564284807715, - "grad_norm": 0.749755322933197, - "learning_rate": 1.911156238101282e-05, - "loss": 0.1824, + "epoch": 0.26651099040560433, + "grad_norm": 0.6791149973869324, + "learning_rate": 1.822326006396264e-05, + "loss": 0.1321, "step": 5250 }, { - "epoch": 0.13339256250793247, - "grad_norm": 1.2132848501205444, - "learning_rate": 1.9110716249947117e-05, - "loss": 0.2, + "epoch": 0.2667648103964668, + "grad_norm": 0.486334890127182, + "learning_rate": 1.8221567930690222e-05, + "loss": 0.145, "step": 5255 }, { - "epoch": 0.13351948216778778, - "grad_norm": 0.7574829459190369, - "learning_rate": 1.9109870118881416e-05, - "loss": 0.1878, + "epoch": 0.2670186303873293, + "grad_norm": 0.4670858383178711, + "learning_rate": 1.8219875797417806e-05, + "loss": 0.1335, "step": 5260 }, { - "epoch": 0.1336464018276431, - "grad_norm": 0.8688048124313354, - "learning_rate": 1.9109023987815714e-05, - "loss": 0.2081, + "epoch": 0.2672724503781918, + "grad_norm": 0.4310056269168854, + "learning_rate": 1.821818366414539e-05, + "loss": 0.1339, "step": 5265 }, { - "epoch": 0.1337733214874984, - "grad_norm": 1.6115187406539917, - "learning_rate": 1.9108177856750013e-05, - "loss": 0.1985, + "epoch": 0.26752627036905424, + "grad_norm": 0.5363441109657288, + "learning_rate": 1.8216491530872973e-05, + "loss": 0.1421, "step": 5270 }, { - "epoch": 0.13390024114735372, - "grad_norm": 0.7479302287101746, - "learning_rate": 1.910733172568431e-05, - "loss": 0.1954, + "epoch": 0.26778009035991673, + "grad_norm": 0.6194841861724854, + "learning_rate": 1.8214799397600556e-05, + "loss": 0.1682, "step": 5275 }, { - "epoch": 0.13402716080720903, - "grad_norm": 0.9148740768432617, - "learning_rate": 1.910648559461861e-05, - "loss": 0.1537, + "epoch": 0.2680339103507792, + "grad_norm": 0.6221051216125488, + "learning_rate": 1.821310726432814e-05, + "loss": 0.139, "step": 5280 }, { - "epoch": 0.13415408046706434, - "grad_norm": 0.8943387269973755, - "learning_rate": 1.9105639463552904e-05, - "loss": 0.2074, + "epoch": 0.2682877303416417, + "grad_norm": 0.5695869326591492, + "learning_rate": 1.8211415131055724e-05, + "loss": 0.1409, "step": 5285 }, { - "epoch": 0.13428100012691965, - "grad_norm": 0.8216649889945984, - "learning_rate": 1.9104793332487203e-05, - "loss": 0.2016, + "epoch": 0.2685415503325042, + "grad_norm": 0.4726913273334503, + "learning_rate": 1.8209722997783307e-05, + "loss": 0.1443, "step": 5290 }, { - "epoch": 0.13440791978677497, - "grad_norm": 0.7933045029640198, - "learning_rate": 1.91039472014215e-05, - "loss": 0.2141, + "epoch": 0.26879537032336664, + "grad_norm": 0.5159754157066345, + "learning_rate": 1.820803086451089e-05, + "loss": 0.158, "step": 5295 }, { - "epoch": 0.13453483944663028, - "grad_norm": 0.9970956444740295, - "learning_rate": 1.91031010703558e-05, - "loss": 0.1847, + "epoch": 0.26904919031422914, + "grad_norm": 1.0508424043655396, + "learning_rate": 1.8206338731238474e-05, + "loss": 0.1508, "step": 5300 }, { - "epoch": 0.1346617591064856, - "grad_norm": 0.887851893901825, - "learning_rate": 1.9102254939290098e-05, - "loss": 0.1794, + "epoch": 0.26930301030509163, + "grad_norm": 0.49234750866889954, + "learning_rate": 1.8204646597966058e-05, + "loss": 0.1555, "step": 5305 }, { - "epoch": 0.1347886787663409, - "grad_norm": 0.8959735631942749, - "learning_rate": 1.9101408808224396e-05, - "loss": 0.2102, + "epoch": 0.2695568302959541, + "grad_norm": 0.5780052542686462, + "learning_rate": 1.820295446469364e-05, + "loss": 0.1449, "step": 5310 }, { - "epoch": 0.13491559842619621, - "grad_norm": 1.5433861017227173, - "learning_rate": 1.9100562677158695e-05, - "loss": 0.184, + "epoch": 0.2698106502868166, + "grad_norm": 0.6060476303100586, + "learning_rate": 1.8201262331421225e-05, + "loss": 0.1535, "step": 5315 }, { - "epoch": 0.13504251808605153, - "grad_norm": 0.9173113703727722, - "learning_rate": 1.9099716546092993e-05, - "loss": 0.1959, + "epoch": 0.27006447027767905, + "grad_norm": 0.5436288118362427, + "learning_rate": 1.819957019814881e-05, + "loss": 0.1513, "step": 5320 }, { - "epoch": 0.13516943774590684, - "grad_norm": 0.8776009678840637, - "learning_rate": 1.9098870415027288e-05, - "loss": 0.178, + "epoch": 0.27031829026854154, + "grad_norm": 0.6781036257743835, + "learning_rate": 1.8197878064876392e-05, + "loss": 0.1599, "step": 5325 }, { - "epoch": 0.13529635740576215, - "grad_norm": 0.7578942179679871, - "learning_rate": 1.9098024283961587e-05, - "loss": 0.1666, + "epoch": 0.27057211025940403, + "grad_norm": 0.6104758381843567, + "learning_rate": 1.8196185931603972e-05, + "loss": 0.1395, "step": 5330 }, { - "epoch": 0.13542327706561746, - "grad_norm": 0.7525966763496399, - "learning_rate": 1.9097178152895885e-05, - "loss": 0.1782, + "epoch": 0.2708259302502665, + "grad_norm": 0.5809837579727173, + "learning_rate": 1.819449379833156e-05, + "loss": 0.1341, "step": 5335 }, { - "epoch": 0.13555019672547278, - "grad_norm": 1.3207752704620361, - "learning_rate": 1.9096332021830183e-05, - "loss": 0.206, + "epoch": 0.271079750241129, + "grad_norm": 0.9313431978225708, + "learning_rate": 1.8192801665059143e-05, + "loss": 0.1543, "step": 5340 }, { - "epoch": 0.1356771163853281, - "grad_norm": 0.8363043665885925, - "learning_rate": 1.9095485890764482e-05, - "loss": 0.1973, + "epoch": 0.27133357023199145, + "grad_norm": 0.7047086954116821, + "learning_rate": 1.8191109531786726e-05, + "loss": 0.1528, "step": 5345 }, { - "epoch": 0.1358040360451834, - "grad_norm": 1.0819944143295288, - "learning_rate": 1.909463975969878e-05, - "loss": 0.1978, + "epoch": 0.27158739022285394, + "grad_norm": 0.4853482246398926, + "learning_rate": 1.818941739851431e-05, + "loss": 0.1488, "step": 5350 }, { - "epoch": 0.13593095570503871, - "grad_norm": 0.7902984619140625, - "learning_rate": 1.909379362863308e-05, - "loss": 0.2065, + "epoch": 0.27184121021371643, + "grad_norm": 1.5312269926071167, + "learning_rate": 1.818772526524189e-05, + "loss": 0.1581, "step": 5355 }, { - "epoch": 0.13605787536489403, - "grad_norm": 0.6045721173286438, - "learning_rate": 1.9092947497567377e-05, - "loss": 0.1429, + "epoch": 0.2720950302045789, + "grad_norm": 0.6196808815002441, + "learning_rate": 1.8186033131969477e-05, + "loss": 0.1306, "step": 5360 }, { - "epoch": 0.13618479502474934, - "grad_norm": 0.7414247989654541, - "learning_rate": 1.9092101366501672e-05, - "loss": 0.1639, + "epoch": 0.2723488501954414, + "grad_norm": 0.7211527228355408, + "learning_rate": 1.818434099869706e-05, + "loss": 0.1374, "step": 5365 }, { - "epoch": 0.13631171468460465, - "grad_norm": 1.2185474634170532, - "learning_rate": 1.909125523543597e-05, - "loss": 0.1819, + "epoch": 0.27260267018630385, + "grad_norm": 0.4848230481147766, + "learning_rate": 1.818264886542464e-05, + "loss": 0.1495, "step": 5370 }, { - "epoch": 0.13643863434445996, - "grad_norm": 1.0421013832092285, - "learning_rate": 1.909040910437027e-05, - "loss": 0.194, + "epoch": 0.27285649017716634, + "grad_norm": 0.5052759051322937, + "learning_rate": 1.8180956732152227e-05, + "loss": 0.1601, "step": 5375 }, { - "epoch": 0.13656555400431528, - "grad_norm": 1.3605399131774902, - "learning_rate": 1.9089562973304567e-05, - "loss": 0.1867, + "epoch": 0.27311031016802884, + "grad_norm": 0.6089451909065247, + "learning_rate": 1.8179264598879807e-05, + "loss": 0.1567, "step": 5380 }, { - "epoch": 0.1366924736641706, - "grad_norm": 0.7693747878074646, - "learning_rate": 1.9088716842238862e-05, - "loss": 0.1695, + "epoch": 0.2733641301588913, + "grad_norm": 0.5925761461257935, + "learning_rate": 1.8177572465607394e-05, + "loss": 0.1406, "step": 5385 }, { - "epoch": 0.1368193933240259, - "grad_norm": 0.7963986396789551, - "learning_rate": 1.908787071117316e-05, - "loss": 0.1831, + "epoch": 0.2736179501497538, + "grad_norm": 0.6521849632263184, + "learning_rate": 1.8175880332334978e-05, + "loss": 0.1402, "step": 5390 }, { - "epoch": 0.1369463129838812, - "grad_norm": 0.7483286261558533, - "learning_rate": 1.908702458010746e-05, - "loss": 0.1749, + "epoch": 0.27387177014061626, + "grad_norm": 0.4725498557090759, + "learning_rate": 1.8174188199062558e-05, + "loss": 0.1494, "step": 5395 }, { - "epoch": 0.13707323264373653, - "grad_norm": 0.7271854877471924, - "learning_rate": 1.9086178449041757e-05, - "loss": 0.186, + "epoch": 0.27412559013147875, + "grad_norm": 0.5570566058158875, + "learning_rate": 1.8172496065790145e-05, + "loss": 0.1463, "step": 5400 }, { - "epoch": 0.13720015230359184, - "grad_norm": 1.2616287469863892, - "learning_rate": 1.9085332317976056e-05, - "loss": 0.1915, + "epoch": 0.27437941012234124, + "grad_norm": 0.6081807613372803, + "learning_rate": 1.8170803932517725e-05, + "loss": 0.1356, "step": 5405 }, { - "epoch": 0.13732707196344715, - "grad_norm": 0.8520820736885071, - "learning_rate": 1.9084486186910354e-05, - "loss": 0.1941, + "epoch": 0.27463323011320373, + "grad_norm": 0.5569767355918884, + "learning_rate": 1.816911179924531e-05, + "loss": 0.131, "step": 5410 }, { - "epoch": 0.13745399162330246, - "grad_norm": 0.7394649982452393, - "learning_rate": 1.9083640055844653e-05, - "loss": 0.1851, + "epoch": 0.2748870501040662, + "grad_norm": 0.6330967545509338, + "learning_rate": 1.8167419665972896e-05, + "loss": 0.1529, "step": 5415 }, { - "epoch": 0.13758091128315775, - "grad_norm": 1.0292296409606934, - "learning_rate": 1.908279392477895e-05, - "loss": 0.1978, + "epoch": 0.27514087009492866, + "grad_norm": 0.5000077486038208, + "learning_rate": 1.8165727532700476e-05, + "loss": 0.1352, "step": 5420 }, { - "epoch": 0.13770783094301306, - "grad_norm": 0.8078890442848206, - "learning_rate": 1.9081947793713246e-05, - "loss": 0.1807, + "epoch": 0.27539469008579115, + "grad_norm": 0.6218600273132324, + "learning_rate": 1.816403539942806e-05, + "loss": 0.1528, "step": 5425 }, { - "epoch": 0.13783475060286837, - "grad_norm": 1.1172857284545898, - "learning_rate": 1.9081101662647544e-05, - "loss": 0.1996, + "epoch": 0.27564851007665364, + "grad_norm": 0.5702294707298279, + "learning_rate": 1.8162343266155643e-05, + "loss": 0.1399, "step": 5430 }, { - "epoch": 0.13796167026272368, - "grad_norm": 0.8366782665252686, - "learning_rate": 1.9080255531581843e-05, - "loss": 0.1826, + "epoch": 0.27590233006751613, + "grad_norm": 0.9165341258049011, + "learning_rate": 1.8160651132883226e-05, + "loss": 0.1471, "step": 5435 }, { - "epoch": 0.138088589922579, - "grad_norm": 0.9700227975845337, - "learning_rate": 1.907940940051614e-05, - "loss": 0.1891, + "epoch": 0.2761561500583786, + "grad_norm": 0.5732564926147461, + "learning_rate": 1.8158958999610813e-05, + "loss": 0.1554, "step": 5440 }, { - "epoch": 0.1382155095824343, - "grad_norm": 1.078025460243225, - "learning_rate": 1.907856326945044e-05, - "loss": 0.1611, + "epoch": 0.27640997004924106, + "grad_norm": 0.5652003288269043, + "learning_rate": 1.8157266866338394e-05, + "loss": 0.1491, "step": 5445 }, { - "epoch": 0.13834242924228962, - "grad_norm": 1.2530544996261597, - "learning_rate": 1.9077717138384738e-05, - "loss": 0.1844, + "epoch": 0.27666379004010355, + "grad_norm": 0.5051900148391724, + "learning_rate": 1.8155574733065977e-05, + "loss": 0.1511, "step": 5450 }, { - "epoch": 0.13846934890214493, - "grad_norm": 0.9885790944099426, - "learning_rate": 1.9076871007319036e-05, - "loss": 0.1846, + "epoch": 0.27691761003096604, + "grad_norm": 0.4586610794067383, + "learning_rate": 1.815388259979356e-05, + "loss": 0.1433, "step": 5455 }, { - "epoch": 0.13859626856200025, - "grad_norm": 1.326894760131836, - "learning_rate": 1.9076024876253335e-05, - "loss": 0.1787, + "epoch": 0.27717143002182854, + "grad_norm": 0.6589603424072266, + "learning_rate": 1.8152190466521144e-05, + "loss": 0.1451, "step": 5460 }, { - "epoch": 0.13872318822185556, - "grad_norm": 0.8343599438667297, - "learning_rate": 1.907517874518763e-05, - "loss": 0.2145, + "epoch": 0.277425250012691, + "grad_norm": 0.6678264737129211, + "learning_rate": 1.8150498333248728e-05, + "loss": 0.1481, "step": 5465 }, { - "epoch": 0.13885010788171087, - "grad_norm": 1.3647360801696777, - "learning_rate": 1.9074332614121928e-05, - "loss": 0.1715, + "epoch": 0.27767907000355346, + "grad_norm": 0.5780376195907593, + "learning_rate": 1.814880619997631e-05, + "loss": 0.159, "step": 5470 }, { - "epoch": 0.13897702754156618, - "grad_norm": 0.6957979202270508, - "learning_rate": 1.9073486483056227e-05, - "loss": 0.187, + "epoch": 0.27793288999441595, + "grad_norm": 0.72762531042099, + "learning_rate": 1.8147114066703895e-05, + "loss": 0.1429, "step": 5475 }, { - "epoch": 0.1391039472014215, - "grad_norm": 0.8027459979057312, - "learning_rate": 1.9072640351990525e-05, - "loss": 0.1514, + "epoch": 0.27818670998527845, + "grad_norm": 0.5006431341171265, + "learning_rate": 1.814542193343148e-05, + "loss": 0.1635, "step": 5480 }, { - "epoch": 0.1392308668612768, - "grad_norm": 1.0036553144454956, - "learning_rate": 1.9071794220924823e-05, - "loss": 0.1747, + "epoch": 0.27844052997614094, + "grad_norm": 0.5482341647148132, + "learning_rate": 1.8143729800159062e-05, + "loss": 0.1551, "step": 5485 }, { - "epoch": 0.13935778652113212, - "grad_norm": 0.9628024101257324, - "learning_rate": 1.9070948089859122e-05, - "loss": 0.1681, + "epoch": 0.2786943499670034, + "grad_norm": 0.4888313114643097, + "learning_rate": 1.8142037666886645e-05, + "loss": 0.1414, "step": 5490 }, { - "epoch": 0.13948470618098743, - "grad_norm": 0.8277355432510376, - "learning_rate": 1.907010195879342e-05, - "loss": 0.2074, + "epoch": 0.27894816995786587, + "grad_norm": 0.49801504611968994, + "learning_rate": 1.814034553361423e-05, + "loss": 0.1561, "step": 5495 }, { - "epoch": 0.13961162584084275, - "grad_norm": 0.7739232182502747, - "learning_rate": 1.906925582772772e-05, - "loss": 0.1964, + "epoch": 0.27920198994872836, + "grad_norm": 0.42549580335617065, + "learning_rate": 1.8138653400341813e-05, + "loss": 0.1265, "step": 5500 }, { - "epoch": 0.13973854550069806, - "grad_norm": 0.5800849795341492, - "learning_rate": 1.9068409696662014e-05, - "loss": 0.1805, + "epoch": 0.27945580993959085, + "grad_norm": 0.582340657711029, + "learning_rate": 1.8136961267069396e-05, + "loss": 0.1468, "step": 5505 }, { - "epoch": 0.13986546516055337, - "grad_norm": 1.134594440460205, - "learning_rate": 1.9067563565596312e-05, - "loss": 0.1984, + "epoch": 0.27970962993045334, + "grad_norm": 0.46748143434524536, + "learning_rate": 1.813526913379698e-05, + "loss": 0.143, "step": 5510 }, { - "epoch": 0.13999238482040868, - "grad_norm": 0.6453481912612915, - "learning_rate": 1.906671743453061e-05, - "loss": 0.1785, + "epoch": 0.2799634499213158, + "grad_norm": 0.49724528193473816, + "learning_rate": 1.8133577000524563e-05, + "loss": 0.1523, "step": 5515 }, { - "epoch": 0.140119304480264, - "grad_norm": 0.8900023102760315, - "learning_rate": 1.906587130346491e-05, - "loss": 0.1703, + "epoch": 0.28021726991217827, + "grad_norm": 0.49215927720069885, + "learning_rate": 1.8131884867252147e-05, + "loss": 0.1504, "step": 5520 }, { - "epoch": 0.1402462241401193, - "grad_norm": 0.5553280115127563, - "learning_rate": 1.9065025172399204e-05, - "loss": 0.1836, + "epoch": 0.28047108990304076, + "grad_norm": 0.5257487297058105, + "learning_rate": 1.813019273397973e-05, + "loss": 0.1279, "step": 5525 }, { - "epoch": 0.14037314379997462, - "grad_norm": 2.055643320083618, - "learning_rate": 1.9064179041333502e-05, - "loss": 0.2108, + "epoch": 0.28072490989390325, + "grad_norm": 0.6766299605369568, + "learning_rate": 1.8128500600707314e-05, + "loss": 0.1515, "step": 5530 }, { - "epoch": 0.14050006345982993, - "grad_norm": 1.3215564489364624, - "learning_rate": 1.90633329102678e-05, - "loss": 0.2127, + "epoch": 0.28097872988476574, + "grad_norm": 0.531494140625, + "learning_rate": 1.8126808467434897e-05, + "loss": 0.1464, "step": 5535 }, { - "epoch": 0.14062698311968524, - "grad_norm": 0.7487951517105103, - "learning_rate": 1.90624867792021e-05, - "loss": 0.1944, + "epoch": 0.2812325498756282, + "grad_norm": 0.4178050458431244, + "learning_rate": 1.812511633416248e-05, + "loss": 0.1271, "step": 5540 }, { - "epoch": 0.14075390277954056, - "grad_norm": 0.8048514127731323, - "learning_rate": 1.9061640648136398e-05, - "loss": 0.1916, + "epoch": 0.28148636986649067, + "grad_norm": 0.576151967048645, + "learning_rate": 1.8123424200890064e-05, + "loss": 0.1328, "step": 5545 }, { - "epoch": 0.14088082243939587, - "grad_norm": 0.734968900680542, - "learning_rate": 1.9060794517070696e-05, - "loss": 0.184, + "epoch": 0.28174018985735316, + "grad_norm": 0.6306776404380798, + "learning_rate": 1.8121732067617648e-05, + "loss": 0.1511, "step": 5550 }, { - "epoch": 0.14100774209925118, - "grad_norm": 0.7723875641822815, - "learning_rate": 1.9059948386004994e-05, - "loss": 0.1946, + "epoch": 0.28199400984821565, + "grad_norm": 0.634989321231842, + "learning_rate": 1.812003993434523e-05, + "loss": 0.1549, "step": 5555 }, { - "epoch": 0.1411346617591065, - "grad_norm": 1.1328619718551636, - "learning_rate": 1.9059102254939293e-05, - "loss": 0.1998, + "epoch": 0.28224782983907815, + "grad_norm": 0.7044651508331299, + "learning_rate": 1.811834780107281e-05, + "loss": 0.1609, "step": 5560 }, { - "epoch": 0.1412615814189618, - "grad_norm": 0.7094976305961609, - "learning_rate": 1.9058256123873588e-05, - "loss": 0.1804, + "epoch": 0.2825016498299406, + "grad_norm": 0.5213934183120728, + "learning_rate": 1.81166556678004e-05, + "loss": 0.1359, "step": 5565 }, { - "epoch": 0.14138850107881712, - "grad_norm": 0.6979081034660339, - "learning_rate": 1.9057409992807886e-05, - "loss": 0.1729, + "epoch": 0.2827554698208031, + "grad_norm": 0.5297014117240906, + "learning_rate": 1.8114963534527982e-05, + "loss": 0.1493, "step": 5570 }, { - "epoch": 0.14151542073867243, - "grad_norm": 0.8068703413009644, - "learning_rate": 1.9056563861742185e-05, - "loss": 0.1829, + "epoch": 0.28300928981166557, + "grad_norm": 0.4303801953792572, + "learning_rate": 1.8113271401255562e-05, + "loss": 0.1408, "step": 5575 }, { - "epoch": 0.14164234039852774, - "grad_norm": 1.192426085472107, - "learning_rate": 1.9055717730676483e-05, - "loss": 0.2029, + "epoch": 0.28326310980252806, + "grad_norm": 0.7388393878936768, + "learning_rate": 1.811157926798315e-05, + "loss": 0.1492, "step": 5580 }, { - "epoch": 0.14176926005838306, - "grad_norm": 0.8921623229980469, - "learning_rate": 1.905487159961078e-05, - "loss": 0.1926, + "epoch": 0.28351692979339055, + "grad_norm": 0.6850863695144653, + "learning_rate": 1.810988713471073e-05, + "loss": 0.1631, "step": 5585 }, { - "epoch": 0.14189617971823837, - "grad_norm": 0.9586236476898193, - "learning_rate": 1.905402546854508e-05, - "loss": 0.178, + "epoch": 0.283770749784253, + "grad_norm": 0.7044445872306824, + "learning_rate": 1.8108195001438316e-05, + "loss": 0.1582, "step": 5590 }, { - "epoch": 0.14202309937809365, - "grad_norm": 0.8791643977165222, - "learning_rate": 1.9053179337479378e-05, - "loss": 0.185, + "epoch": 0.2840245697751155, + "grad_norm": 0.4703899621963501, + "learning_rate": 1.81065028681659e-05, + "loss": 0.1369, "step": 5595 }, { - "epoch": 0.14215001903794897, - "grad_norm": 0.7843058705329895, - "learning_rate": 1.9052333206413677e-05, - "loss": 0.1838, + "epoch": 0.28427838976597797, + "grad_norm": 0.5529588460922241, + "learning_rate": 1.810481073489348e-05, + "loss": 0.1618, "step": 5600 }, { - "epoch": 0.14227693869780428, - "grad_norm": 0.995017945766449, - "learning_rate": 1.905148707534797e-05, - "loss": 0.1977, + "epoch": 0.28453220975684046, + "grad_norm": 0.6327471733093262, + "learning_rate": 1.8103118601621067e-05, + "loss": 0.1339, "step": 5605 }, { - "epoch": 0.1424038583576596, - "grad_norm": 0.7489838600158691, - "learning_rate": 1.905064094428227e-05, - "loss": 0.1926, + "epoch": 0.28478602974770295, + "grad_norm": 0.5310539603233337, + "learning_rate": 1.8101426468348647e-05, + "loss": 0.1511, "step": 5610 }, { - "epoch": 0.1425307780175149, - "grad_norm": 0.7439056038856506, - "learning_rate": 1.904979481321657e-05, - "loss": 0.1821, + "epoch": 0.2850398497385654, + "grad_norm": 0.5089000463485718, + "learning_rate": 1.809973433507623e-05, + "loss": 0.1491, "step": 5615 }, { - "epoch": 0.14265769767737022, - "grad_norm": 1.0915043354034424, - "learning_rate": 1.9048948682150867e-05, - "loss": 0.2255, + "epoch": 0.2852936697294279, + "grad_norm": 0.4953418970108032, + "learning_rate": 1.8098042201803818e-05, + "loss": 0.1468, "step": 5620 }, { - "epoch": 0.14278461733722553, - "grad_norm": 0.6900913715362549, - "learning_rate": 1.9048102551085165e-05, - "loss": 0.1658, + "epoch": 0.28554748972029037, + "grad_norm": 0.6269605159759521, + "learning_rate": 1.8096350068531398e-05, + "loss": 0.1411, "step": 5625 }, { - "epoch": 0.14291153699708084, - "grad_norm": 1.2645331621170044, - "learning_rate": 1.9047256420019464e-05, - "loss": 0.1931, + "epoch": 0.28580130971115286, + "grad_norm": 0.5403065085411072, + "learning_rate": 1.8094657935258985e-05, + "loss": 0.139, "step": 5630 }, { - "epoch": 0.14303845665693615, - "grad_norm": 0.7588236927986145, - "learning_rate": 1.9046410288953762e-05, - "loss": 0.1958, + "epoch": 0.28605512970201535, + "grad_norm": 0.9192875623703003, + "learning_rate": 1.8092965801986565e-05, + "loss": 0.133, "step": 5635 }, { - "epoch": 0.14316537631679147, - "grad_norm": 0.6471638679504395, - "learning_rate": 1.904556415788806e-05, - "loss": 0.1737, + "epoch": 0.2863089496928778, + "grad_norm": 0.5615043044090271, + "learning_rate": 1.809127366871415e-05, + "loss": 0.1354, "step": 5640 }, { - "epoch": 0.14329229597664678, - "grad_norm": 1.3450305461883545, - "learning_rate": 1.9044718026822355e-05, - "loss": 0.1703, + "epoch": 0.2865627696837403, + "grad_norm": 2.930060863494873, + "learning_rate": 1.8089581535441735e-05, + "loss": 0.1422, "step": 5645 }, { - "epoch": 0.1434192156365021, - "grad_norm": 1.083116054534912, - "learning_rate": 1.9043871895756654e-05, - "loss": 0.1954, + "epoch": 0.2868165896746028, + "grad_norm": 0.42745351791381836, + "learning_rate": 1.8087889402169315e-05, + "loss": 0.1428, "step": 5650 }, { - "epoch": 0.1435461352963574, - "grad_norm": 0.8281340599060059, - "learning_rate": 1.9043025764690952e-05, - "loss": 0.1688, + "epoch": 0.28707040966546526, + "grad_norm": 0.5274901390075684, + "learning_rate": 1.80861972688969e-05, + "loss": 0.164, "step": 5655 }, { - "epoch": 0.14367305495621271, - "grad_norm": 3.3304848670959473, - "learning_rate": 1.904217963362525e-05, - "loss": 0.1837, + "epoch": 0.28732422965632776, + "grad_norm": 1.1042112112045288, + "learning_rate": 1.8084505135624483e-05, + "loss": 0.142, "step": 5660 }, { - "epoch": 0.14379997461606803, - "grad_norm": 0.9351780414581299, - "learning_rate": 1.9041333502559546e-05, - "loss": 0.2103, + "epoch": 0.2875780496471902, + "grad_norm": 0.6478941440582275, + "learning_rate": 1.8082813002352066e-05, + "loss": 0.1353, "step": 5665 }, { - "epoch": 0.14392689427592334, - "grad_norm": 0.903019905090332, - "learning_rate": 1.9040487371493844e-05, - "loss": 0.1807, + "epoch": 0.2878318696380527, + "grad_norm": 0.6474268436431885, + "learning_rate": 1.808112086907965e-05, + "loss": 0.1537, "step": 5670 }, { - "epoch": 0.14405381393577865, - "grad_norm": 0.7403823733329773, - "learning_rate": 1.9039641240428142e-05, - "loss": 0.191, + "epoch": 0.2880856896289152, + "grad_norm": 0.46995773911476135, + "learning_rate": 1.8079428735807233e-05, + "loss": 0.1338, "step": 5675 }, { - "epoch": 0.14418073359563396, - "grad_norm": 0.6442487835884094, - "learning_rate": 1.903879510936244e-05, - "loss": 0.1889, + "epoch": 0.28833950961977767, + "grad_norm": 0.652370274066925, + "learning_rate": 1.8077736602534817e-05, + "loss": 0.1532, "step": 5680 }, { - "epoch": 0.14430765325548928, - "grad_norm": 0.8075422644615173, - "learning_rate": 1.903794897829674e-05, - "loss": 0.2082, + "epoch": 0.28859332961064016, + "grad_norm": 0.5019606351852417, + "learning_rate": 1.80760444692624e-05, + "loss": 0.1606, "step": 5685 }, { - "epoch": 0.1444345729153446, - "grad_norm": 2.0602900981903076, - "learning_rate": 1.9037102847231038e-05, - "loss": 0.1671, + "epoch": 0.2888471496015026, + "grad_norm": 0.44102251529693604, + "learning_rate": 1.8074352335989984e-05, + "loss": 0.1462, "step": 5690 }, { - "epoch": 0.1445614925751999, - "grad_norm": 0.8565961718559265, - "learning_rate": 1.9036256716165336e-05, - "loss": 0.1813, + "epoch": 0.2891009695923651, + "grad_norm": 0.4564070701599121, + "learning_rate": 1.8072660202717567e-05, + "loss": 0.1537, "step": 5695 }, { - "epoch": 0.1446884122350552, - "grad_norm": 1.3125139474868774, - "learning_rate": 1.9035410585099634e-05, - "loss": 0.1838, + "epoch": 0.2893547895832276, + "grad_norm": 0.42364734411239624, + "learning_rate": 1.807096806944515e-05, + "loss": 0.1347, "step": 5700 }, { - "epoch": 0.14481533189491053, - "grad_norm": 0.5465439558029175, - "learning_rate": 1.9034564454033933e-05, - "loss": 0.1707, + "epoch": 0.28960860957409007, + "grad_norm": 0.5539987683296204, + "learning_rate": 1.8069275936172734e-05, + "loss": 0.1375, "step": 5705 }, { - "epoch": 0.14494225155476584, - "grad_norm": 0.8434939980506897, - "learning_rate": 1.9033718322968228e-05, - "loss": 0.1819, + "epoch": 0.28986242956495256, + "grad_norm": 0.4797275960445404, + "learning_rate": 1.8067583802900318e-05, + "loss": 0.1473, "step": 5710 }, { - "epoch": 0.14506917121462115, - "grad_norm": 0.8216573596000671, - "learning_rate": 1.9032872191902526e-05, - "loss": 0.1884, + "epoch": 0.290116249555815, + "grad_norm": 0.6089901328086853, + "learning_rate": 1.80658916696279e-05, + "loss": 0.1346, "step": 5715 }, { - "epoch": 0.14519609087447646, - "grad_norm": 0.8753653764724731, - "learning_rate": 1.9032026060836825e-05, - "loss": 0.1927, + "epoch": 0.2903700695466775, + "grad_norm": 0.49910488724708557, + "learning_rate": 1.8064199536355485e-05, + "loss": 0.1322, "step": 5720 }, { - "epoch": 0.14532301053433178, - "grad_norm": 0.7406740784645081, - "learning_rate": 1.9031179929771123e-05, - "loss": 0.1631, + "epoch": 0.29062388953754, + "grad_norm": 0.45843714475631714, + "learning_rate": 1.806250740308307e-05, + "loss": 0.1367, "step": 5725 }, { - "epoch": 0.1454499301941871, - "grad_norm": 0.6242668628692627, - "learning_rate": 1.903033379870542e-05, - "loss": 0.1605, + "epoch": 0.2908777095284025, + "grad_norm": 1.3286268711090088, + "learning_rate": 1.8060815269810652e-05, + "loss": 0.1337, "step": 5730 }, { - "epoch": 0.1455768498540424, - "grad_norm": 1.2106947898864746, - "learning_rate": 1.902948766763972e-05, - "loss": 0.1771, + "epoch": 0.29113152951926496, + "grad_norm": 0.5683203339576721, + "learning_rate": 1.8059123136538236e-05, + "loss": 0.1415, "step": 5735 }, { - "epoch": 0.1457037695138977, - "grad_norm": 1.0504047870635986, - "learning_rate": 1.9028641536574018e-05, - "loss": 0.1868, + "epoch": 0.2913853495101274, + "grad_norm": 0.6591338515281677, + "learning_rate": 1.805743100326582e-05, + "loss": 0.1362, "step": 5740 }, { - "epoch": 0.14583068917375303, - "grad_norm": 1.1359809637069702, - "learning_rate": 1.9027795405508317e-05, - "loss": 0.177, + "epoch": 0.2916391695009899, + "grad_norm": 0.6420497298240662, + "learning_rate": 1.8055738869993403e-05, + "loss": 0.1575, "step": 5745 }, { - "epoch": 0.14595760883360834, - "grad_norm": 0.9788577556610107, - "learning_rate": 1.902694927444261e-05, - "loss": 0.1691, + "epoch": 0.2918929894918524, + "grad_norm": 0.4309554100036621, + "learning_rate": 1.8054046736720986e-05, + "loss": 0.1502, "step": 5750 }, { - "epoch": 0.14608452849346365, - "grad_norm": 0.7610688209533691, - "learning_rate": 1.902610314337691e-05, - "loss": 0.1797, + "epoch": 0.2921468094827149, + "grad_norm": 1.4654325246810913, + "learning_rate": 1.805235460344857e-05, + "loss": 0.1285, "step": 5755 }, { - "epoch": 0.14621144815331896, - "grad_norm": 0.707621693611145, - "learning_rate": 1.902525701231121e-05, - "loss": 0.2172, + "epoch": 0.2924006294735773, + "grad_norm": 0.667293906211853, + "learning_rate": 1.8050662470176153e-05, + "loss": 0.1473, "step": 5760 }, { - "epoch": 0.14633836781317425, - "grad_norm": 1.508949637413025, - "learning_rate": 1.9024410881245507e-05, - "loss": 0.1804, + "epoch": 0.2926544494644398, + "grad_norm": 0.4698887765407562, + "learning_rate": 1.8048970336903734e-05, + "loss": 0.1396, "step": 5765 }, { - "epoch": 0.14646528747302956, - "grad_norm": 0.66214519739151, - "learning_rate": 1.9023564750179805e-05, - "loss": 0.1602, + "epoch": 0.2929082694553023, + "grad_norm": 0.5942188501358032, + "learning_rate": 1.804727820363132e-05, + "loss": 0.1436, "step": 5770 }, { - "epoch": 0.14659220713288487, - "grad_norm": 0.869759738445282, - "learning_rate": 1.9022718619114104e-05, - "loss": 0.1806, + "epoch": 0.2931620894461648, + "grad_norm": 0.34591802954673767, + "learning_rate": 1.8045586070358904e-05, + "loss": 0.1373, "step": 5775 }, { - "epoch": 0.14671912679274018, - "grad_norm": 0.7928999662399292, - "learning_rate": 1.9021872488048402e-05, - "loss": 0.2263, + "epoch": 0.2934159094370273, + "grad_norm": 0.4438014030456543, + "learning_rate": 1.8043893937086488e-05, + "loss": 0.1468, "step": 5780 }, { - "epoch": 0.1468460464525955, - "grad_norm": 3.7979824542999268, - "learning_rate": 1.90210263569827e-05, - "loss": 0.2298, + "epoch": 0.2936697294278897, + "grad_norm": 0.5076015591621399, + "learning_rate": 1.804220180381407e-05, + "loss": 0.1318, "step": 5785 }, { - "epoch": 0.1469729661124508, - "grad_norm": 0.810461699962616, - "learning_rate": 1.9020180225916996e-05, - "loss": 0.1921, + "epoch": 0.2939235494187522, + "grad_norm": 0.5563536882400513, + "learning_rate": 1.804050967054165e-05, + "loss": 0.1402, "step": 5790 }, { - "epoch": 0.14709988577230612, - "grad_norm": 0.6921939849853516, - "learning_rate": 1.9019334094851294e-05, - "loss": 0.1973, + "epoch": 0.2941773694096147, + "grad_norm": 14.195382118225098, + "learning_rate": 1.8038817537269238e-05, + "loss": 0.1376, "step": 5795 }, { - "epoch": 0.14722680543216143, - "grad_norm": 0.8335049748420715, - "learning_rate": 1.9018487963785592e-05, - "loss": 0.1832, + "epoch": 0.2944311894004772, + "grad_norm": 0.40444865822792053, + "learning_rate": 1.8037125403996822e-05, + "loss": 0.1234, "step": 5800 }, { - "epoch": 0.14735372509201675, - "grad_norm": 0.9456361532211304, - "learning_rate": 1.901764183271989e-05, - "loss": 0.1869, + "epoch": 0.2946850093913397, + "grad_norm": 0.5746991038322449, + "learning_rate": 1.8035433270724402e-05, + "loss": 0.1297, "step": 5805 }, { - "epoch": 0.14748064475187206, - "grad_norm": 1.0116539001464844, - "learning_rate": 1.9016795701654186e-05, - "loss": 0.1715, + "epoch": 0.2949388293822021, + "grad_norm": 0.31169363856315613, + "learning_rate": 1.803374113745199e-05, + "loss": 0.136, "step": 5810 }, { - "epoch": 0.14760756441172737, - "grad_norm": 0.7165481448173523, - "learning_rate": 1.9015949570588484e-05, - "loss": 0.1925, + "epoch": 0.2951926493730646, + "grad_norm": 0.4355963170528412, + "learning_rate": 1.803204900417957e-05, + "loss": 0.1352, "step": 5815 }, { - "epoch": 0.14773448407158268, - "grad_norm": 0.6696757674217224, - "learning_rate": 1.9015103439522783e-05, - "loss": 0.193, + "epoch": 0.2954464693639271, + "grad_norm": 0.5133848786354065, + "learning_rate": 1.8030356870907153e-05, + "loss": 0.149, "step": 5820 }, { - "epoch": 0.147861403731438, - "grad_norm": 0.9747999906539917, - "learning_rate": 1.901425730845708e-05, - "loss": 0.2018, + "epoch": 0.2957002893547896, + "grad_norm": 0.5429642200469971, + "learning_rate": 1.802866473763474e-05, + "loss": 0.148, "step": 5825 }, { - "epoch": 0.1479883233912933, - "grad_norm": 0.6975069642066956, - "learning_rate": 1.901341117739138e-05, - "loss": 0.1762, + "epoch": 0.2959541093456521, + "grad_norm": 0.777310848236084, + "learning_rate": 1.802697260436232e-05, + "loss": 0.1203, "step": 5830 }, { - "epoch": 0.14811524305114862, - "grad_norm": 0.9031946659088135, - "learning_rate": 1.9012565046325678e-05, - "loss": 0.1882, + "epoch": 0.2962079293365145, + "grad_norm": 0.5586498975753784, + "learning_rate": 1.8025280471089907e-05, + "loss": 0.1489, "step": 5835 }, { - "epoch": 0.14824216271100393, - "grad_norm": 0.7521499991416931, - "learning_rate": 1.9011718915259976e-05, - "loss": 0.2122, + "epoch": 0.296461749327377, + "grad_norm": 0.4667200446128845, + "learning_rate": 1.8023588337817487e-05, + "loss": 0.1203, "step": 5840 }, { - "epoch": 0.14836908237085925, - "grad_norm": 0.6870396733283997, - "learning_rate": 1.9010872784194275e-05, - "loss": 0.2014, + "epoch": 0.2967155693182395, + "grad_norm": 0.7403630614280701, + "learning_rate": 1.802189620454507e-05, + "loss": 0.1111, "step": 5845 }, { - "epoch": 0.14849600203071456, - "grad_norm": 0.9543684124946594, - "learning_rate": 1.901002665312857e-05, - "loss": 0.1616, + "epoch": 0.296969389309102, + "grad_norm": 1.7740875482559204, + "learning_rate": 1.8020204071272657e-05, + "loss": 0.1303, "step": 5850 }, { - "epoch": 0.14862292169056987, - "grad_norm": 0.7832803130149841, - "learning_rate": 1.9009180522062868e-05, - "loss": 0.1945, + "epoch": 0.2972232092999645, + "grad_norm": 0.44264400005340576, + "learning_rate": 1.8018511938000237e-05, + "loss": 0.141, "step": 5855 }, { - "epoch": 0.14874984135042518, - "grad_norm": 0.6287165880203247, - "learning_rate": 1.9008334390997166e-05, - "loss": 0.2043, + "epoch": 0.2974770292908269, + "grad_norm": 0.6758630275726318, + "learning_rate": 1.801681980472782e-05, + "loss": 0.1291, "step": 5860 }, { - "epoch": 0.1488767610102805, - "grad_norm": 0.5846396088600159, - "learning_rate": 1.9007488259931465e-05, - "loss": 0.1762, + "epoch": 0.2977308492816894, + "grad_norm": 0.6458776593208313, + "learning_rate": 1.8015127671455405e-05, + "loss": 0.1398, "step": 5865 }, { - "epoch": 0.1490036806701358, - "grad_norm": 0.7150173187255859, - "learning_rate": 1.9006642128865763e-05, - "loss": 0.1745, + "epoch": 0.2979846692725519, + "grad_norm": 0.6976125836372375, + "learning_rate": 1.8013435538182988e-05, + "loss": 0.1499, "step": 5870 }, { - "epoch": 0.14913060032999112, - "grad_norm": 1.1218910217285156, - "learning_rate": 1.900579599780006e-05, - "loss": 0.1731, + "epoch": 0.2982384892634144, + "grad_norm": 0.7639079689979553, + "learning_rate": 1.801174340491057e-05, + "loss": 0.1473, "step": 5875 }, { - "epoch": 0.14925751998984643, - "grad_norm": 1.0046144723892212, - "learning_rate": 1.900494986673436e-05, - "loss": 0.195, + "epoch": 0.2984923092542769, + "grad_norm": 0.5145253539085388, + "learning_rate": 1.8010051271638155e-05, + "loss": 0.127, "step": 5880 }, { - "epoch": 0.14938443964970174, - "grad_norm": 0.7430806756019592, - "learning_rate": 1.900410373566866e-05, - "loss": 0.1557, + "epoch": 0.2987461292451393, + "grad_norm": 0.5991470217704773, + "learning_rate": 1.800835913836574e-05, + "loss": 0.1434, "step": 5885 }, { - "epoch": 0.14951135930955706, - "grad_norm": 0.7575390338897705, - "learning_rate": 1.9003257604602953e-05, - "loss": 0.1926, + "epoch": 0.2989999492360018, + "grad_norm": 0.5576327443122864, + "learning_rate": 1.8006667005093322e-05, + "loss": 0.1258, "step": 5890 }, { - "epoch": 0.14963827896941237, - "grad_norm": 0.6205933094024658, - "learning_rate": 1.9002411473537252e-05, - "loss": 0.1527, + "epoch": 0.2992537692268643, + "grad_norm": 0.8021837472915649, + "learning_rate": 1.8004974871820906e-05, + "loss": 0.1543, "step": 5895 }, { - "epoch": 0.14976519862926768, - "grad_norm": 1.6096601486206055, - "learning_rate": 1.900156534247155e-05, - "loss": 0.1881, + "epoch": 0.2995075892177268, + "grad_norm": 0.6771152019500732, + "learning_rate": 1.800328273854849e-05, + "loss": 0.1559, "step": 5900 }, { - "epoch": 0.149892118289123, - "grad_norm": 0.930462121963501, - "learning_rate": 1.900071921140585e-05, - "loss": 0.1644, + "epoch": 0.2997614092085893, + "grad_norm": 0.6033034324645996, + "learning_rate": 1.8001590605276073e-05, + "loss": 0.1469, "step": 5905 }, { - "epoch": 0.1500190379489783, - "grad_norm": 0.6915737986564636, - "learning_rate": 1.8999873080340147e-05, - "loss": 0.1745, + "epoch": 0.3000152291994517, + "grad_norm": 0.4367592930793762, + "learning_rate": 1.7999898472003656e-05, + "loss": 0.1241, "step": 5910 }, { - "epoch": 0.15014595760883362, - "grad_norm": 0.8031389713287354, - "learning_rate": 1.8999026949274445e-05, - "loss": 0.2117, + "epoch": 0.3002690491903142, + "grad_norm": 0.8468542098999023, + "learning_rate": 1.799820633873124e-05, + "loss": 0.1336, "step": 5915 }, { - "epoch": 0.15027287726868893, - "grad_norm": 1.0050603151321411, - "learning_rate": 1.8998180818208744e-05, - "loss": 0.1815, + "epoch": 0.3005228691811767, + "grad_norm": 0.5358647108078003, + "learning_rate": 1.7996514205458824e-05, + "loss": 0.1435, "step": 5920 }, { - "epoch": 0.15039979692854424, - "grad_norm": 0.7323532700538635, - "learning_rate": 1.8997334687143042e-05, - "loss": 0.1904, + "epoch": 0.3007766891720392, + "grad_norm": 0.35355257987976074, + "learning_rate": 1.7994822072186407e-05, + "loss": 0.1468, "step": 5925 }, { - "epoch": 0.15052671658839956, - "grad_norm": 1.2183815240859985, - "learning_rate": 1.8996488556077337e-05, - "loss": 0.1912, + "epoch": 0.3010305091629017, + "grad_norm": 0.49814465641975403, + "learning_rate": 1.799312993891399e-05, + "loss": 0.133, "step": 5930 }, { - "epoch": 0.15065363624825487, - "grad_norm": 0.763821005821228, - "learning_rate": 1.8995642425011636e-05, - "loss": 0.1561, + "epoch": 0.30128432915376413, + "grad_norm": 0.505181074142456, + "learning_rate": 1.7991437805641574e-05, + "loss": 0.1573, "step": 5935 }, { - "epoch": 0.15078055590811015, - "grad_norm": 1.13330078125, - "learning_rate": 1.8994796293945934e-05, - "loss": 0.1649, + "epoch": 0.3015381491446266, + "grad_norm": 0.5839815139770508, + "learning_rate": 1.7989745672369158e-05, + "loss": 0.1351, "step": 5940 }, { - "epoch": 0.15090747556796547, - "grad_norm": 0.7391687035560608, - "learning_rate": 1.8993950162880232e-05, - "loss": 0.1719, + "epoch": 0.3017919691354891, + "grad_norm": 0.7596808075904846, + "learning_rate": 1.798805353909674e-05, + "loss": 0.1488, "step": 5945 }, { - "epoch": 0.15103439522782078, - "grad_norm": 0.6476704478263855, - "learning_rate": 1.8993104031814527e-05, - "loss": 0.1903, + "epoch": 0.3020457891263516, + "grad_norm": 1.0872101783752441, + "learning_rate": 1.7986361405824325e-05, + "loss": 0.1295, "step": 5950 }, { - "epoch": 0.1511613148876761, - "grad_norm": 0.6410671472549438, - "learning_rate": 1.8992257900748826e-05, - "loss": 0.1699, + "epoch": 0.3022996091172141, + "grad_norm": 0.4910557270050049, + "learning_rate": 1.798466927255191e-05, + "loss": 0.1453, "step": 5955 }, { - "epoch": 0.1512882345475314, - "grad_norm": 0.8846388459205627, - "learning_rate": 1.8991411769683124e-05, - "loss": 0.1614, + "epoch": 0.30255342910807653, + "grad_norm": 0.46220898628234863, + "learning_rate": 1.7982977139279492e-05, + "loss": 0.1402, "step": 5960 }, { - "epoch": 0.15141515420738672, - "grad_norm": 1.02809476852417, - "learning_rate": 1.8990565638617423e-05, - "loss": 0.1812, + "epoch": 0.302807249098939, + "grad_norm": 0.6212031245231628, + "learning_rate": 1.7981285006007075e-05, + "loss": 0.1473, "step": 5965 }, { - "epoch": 0.15154207386724203, - "grad_norm": 1.0188618898391724, - "learning_rate": 1.898971950755172e-05, - "loss": 0.2332, + "epoch": 0.3030610690898015, + "grad_norm": 1.0893614292144775, + "learning_rate": 1.797959287273466e-05, + "loss": 0.1528, "step": 5970 }, { - "epoch": 0.15166899352709734, - "grad_norm": 0.8200662136077881, - "learning_rate": 1.898887337648602e-05, - "loss": 0.1792, + "epoch": 0.303314889080664, + "grad_norm": 0.6324992775917053, + "learning_rate": 1.7977900739462242e-05, + "loss": 0.1384, "step": 5975 }, { - "epoch": 0.15179591318695265, - "grad_norm": 0.7407543659210205, - "learning_rate": 1.8988027245420318e-05, - "loss": 0.1834, + "epoch": 0.3035687090715265, + "grad_norm": 0.4653703272342682, + "learning_rate": 1.7976208606189826e-05, + "loss": 0.1314, "step": 5980 }, { - "epoch": 0.15192283284680796, - "grad_norm": 0.7434132695198059, - "learning_rate": 1.8987181114354616e-05, - "loss": 0.1917, + "epoch": 0.30382252906238894, + "grad_norm": 0.901879608631134, + "learning_rate": 1.797451647291741e-05, + "loss": 0.1424, "step": 5985 }, { - "epoch": 0.15204975250666328, - "grad_norm": 0.645500659942627, - "learning_rate": 1.898633498328891e-05, - "loss": 0.17, + "epoch": 0.3040763490532514, + "grad_norm": 0.7357150316238403, + "learning_rate": 1.7972824339644993e-05, + "loss": 0.1383, "step": 5990 }, { - "epoch": 0.1521766721665186, - "grad_norm": 0.8983036279678345, - "learning_rate": 1.898548885222321e-05, - "loss": 0.1787, + "epoch": 0.3043301690441139, + "grad_norm": 0.4859638810157776, + "learning_rate": 1.7971132206372573e-05, + "loss": 0.1473, "step": 5995 }, { - "epoch": 0.1523035918263739, - "grad_norm": 0.5680031180381775, - "learning_rate": 1.8984642721157508e-05, - "loss": 0.1923, + "epoch": 0.3045839890349764, + "grad_norm": 0.5311628580093384, + "learning_rate": 1.796944007310016e-05, + "loss": 0.1479, "step": 6000 }, { - "epoch": 0.15243051148622921, - "grad_norm": 1.317458152770996, - "learning_rate": 1.8983796590091806e-05, - "loss": 0.1853, + "epoch": 0.3048378090258389, + "grad_norm": 0.48468127846717834, + "learning_rate": 1.7967747939827744e-05, + "loss": 0.1397, "step": 6005 }, { - "epoch": 0.15255743114608453, - "grad_norm": 0.9032878875732422, - "learning_rate": 1.8982950459026105e-05, - "loss": 0.1761, + "epoch": 0.30509162901670134, + "grad_norm": 0.5624568462371826, + "learning_rate": 1.7966055806555324e-05, + "loss": 0.1426, "step": 6010 }, { - "epoch": 0.15268435080593984, - "grad_norm": 0.9521228075027466, - "learning_rate": 1.8982104327960403e-05, - "loss": 0.1544, + "epoch": 0.30534544900756383, + "grad_norm": 0.6200907826423645, + "learning_rate": 1.796436367328291e-05, + "loss": 0.1372, "step": 6015 }, { - "epoch": 0.15281127046579515, - "grad_norm": 0.7223473787307739, - "learning_rate": 1.89812581968947e-05, - "loss": 0.1488, + "epoch": 0.3055992689984263, + "grad_norm": 0.8948644995689392, + "learning_rate": 1.796267154001049e-05, + "loss": 0.1448, "step": 6020 }, { - "epoch": 0.15293819012565046, - "grad_norm": 0.9519270658493042, - "learning_rate": 1.8980412065829e-05, - "loss": 0.1568, + "epoch": 0.3058530889892888, + "grad_norm": 0.6271385550498962, + "learning_rate": 1.7960979406738078e-05, + "loss": 0.1432, "step": 6025 }, { - "epoch": 0.15306510978550578, - "grad_norm": 1.1192741394042969, - "learning_rate": 1.8979565934763295e-05, - "loss": 0.1894, + "epoch": 0.3061069089801513, + "grad_norm": 0.6979460120201111, + "learning_rate": 1.795928727346566e-05, + "loss": 0.1276, "step": 6030 }, { - "epoch": 0.1531920294453611, - "grad_norm": 0.9851511716842651, - "learning_rate": 1.8978719803697593e-05, - "loss": 0.2149, + "epoch": 0.30636072897101374, + "grad_norm": 1.2904983758926392, + "learning_rate": 1.795759514019324e-05, + "loss": 0.1406, "step": 6035 }, { - "epoch": 0.1533189491052164, - "grad_norm": 0.7892588973045349, - "learning_rate": 1.8977873672631892e-05, - "loss": 0.1842, + "epoch": 0.30661454896187623, + "grad_norm": 0.6335322856903076, + "learning_rate": 1.795590300692083e-05, + "loss": 0.1433, "step": 6040 }, { - "epoch": 0.1534458687650717, - "grad_norm": 0.7000312805175781, - "learning_rate": 1.897702754156619e-05, - "loss": 0.1495, + "epoch": 0.3068683689527387, + "grad_norm": 0.4658793807029724, + "learning_rate": 1.795421087364841e-05, + "loss": 0.1318, "step": 6045 }, { - "epoch": 0.15357278842492703, - "grad_norm": 0.7631243467330933, - "learning_rate": 1.897618141050049e-05, - "loss": 0.1837, + "epoch": 0.3071221889436012, + "grad_norm": 0.5421667098999023, + "learning_rate": 1.7952518740375992e-05, + "loss": 0.142, "step": 6050 }, { - "epoch": 0.15369970808478234, - "grad_norm": 0.5179776549339294, - "learning_rate": 1.8975335279434787e-05, - "loss": 0.2, + "epoch": 0.30737600893446365, + "grad_norm": 0.6543579697608948, + "learning_rate": 1.7950826607103576e-05, + "loss": 0.1387, "step": 6055 }, { - "epoch": 0.15382662774463765, - "grad_norm": 0.9721294045448303, - "learning_rate": 1.8974489148369085e-05, - "loss": 0.229, + "epoch": 0.30762982892532614, + "grad_norm": 0.4661172330379486, + "learning_rate": 1.794913447383116e-05, + "loss": 0.142, "step": 6060 }, { - "epoch": 0.15395354740449296, - "grad_norm": 1.1289331912994385, - "learning_rate": 1.8973643017303384e-05, - "loss": 0.1602, + "epoch": 0.30788364891618863, + "grad_norm": 0.4072614014148712, + "learning_rate": 1.7947442340558743e-05, + "loss": 0.133, "step": 6065 }, { - "epoch": 0.15408046706434828, - "grad_norm": 0.7342250347137451, - "learning_rate": 1.897279688623768e-05, - "loss": 0.1674, + "epoch": 0.3081374689070511, + "grad_norm": 0.46051713824272156, + "learning_rate": 1.7945750207286326e-05, + "loss": 0.1249, "step": 6070 }, { - "epoch": 0.1542073867242036, - "grad_norm": 1.94434654712677, - "learning_rate": 1.8971950755171977e-05, - "loss": 0.1798, + "epoch": 0.3083912888979136, + "grad_norm": 0.5725454092025757, + "learning_rate": 1.794405807401391e-05, + "loss": 0.1221, "step": 6075 }, { - "epoch": 0.1543343063840589, - "grad_norm": 1.1544111967086792, - "learning_rate": 1.8971104624106276e-05, - "loss": 0.1822, + "epoch": 0.30864510888877605, + "grad_norm": 0.5743607878684998, + "learning_rate": 1.7942365940741494e-05, + "loss": 0.1223, "step": 6080 }, { - "epoch": 0.1544612260439142, - "grad_norm": 0.8727169036865234, - "learning_rate": 1.8970258493040574e-05, - "loss": 0.2229, + "epoch": 0.30889892887963855, + "grad_norm": 0.7320393919944763, + "learning_rate": 1.7940673807469077e-05, + "loss": 0.1453, "step": 6085 }, { - "epoch": 0.15458814570376953, - "grad_norm": 0.9148867130279541, - "learning_rate": 1.896941236197487e-05, - "loss": 0.187, + "epoch": 0.30915274887050104, + "grad_norm": 0.4180808365345001, + "learning_rate": 1.793898167419666e-05, + "loss": 0.1409, "step": 6090 }, { - "epoch": 0.15471506536362484, - "grad_norm": 0.916836678981781, - "learning_rate": 1.8968566230909168e-05, - "loss": 0.1875, + "epoch": 0.30940656886136353, + "grad_norm": 0.5129685997962952, + "learning_rate": 1.7937289540924244e-05, + "loss": 0.1372, "step": 6095 }, { - "epoch": 0.15484198502348015, - "grad_norm": 0.6603026986122131, - "learning_rate": 1.8967720099843466e-05, - "loss": 0.1399, + "epoch": 0.309660388852226, + "grad_norm": 0.630707859992981, + "learning_rate": 1.7935597407651828e-05, + "loss": 0.1319, "step": 6100 }, { - "epoch": 0.15496890468333546, - "grad_norm": 0.7205612659454346, - "learning_rate": 1.8966873968777764e-05, - "loss": 0.1468, + "epoch": 0.30991420884308846, + "grad_norm": 0.5199107527732849, + "learning_rate": 1.793390527437941e-05, + "loss": 0.1371, "step": 6105 }, { - "epoch": 0.15509582434319075, - "grad_norm": 0.8641361594200134, - "learning_rate": 1.8966027837712063e-05, - "loss": 0.1751, + "epoch": 0.31016802883395095, + "grad_norm": 0.5185748338699341, + "learning_rate": 1.7932213141106995e-05, + "loss": 0.1467, "step": 6110 }, { - "epoch": 0.15522274400304606, - "grad_norm": 0.838188111782074, - "learning_rate": 1.896518170664636e-05, - "loss": 0.1932, + "epoch": 0.31042184882481344, + "grad_norm": 0.7415216565132141, + "learning_rate": 1.793052100783458e-05, + "loss": 0.1482, "step": 6115 }, { - "epoch": 0.15534966366290137, - "grad_norm": 0.6686227917671204, - "learning_rate": 1.896433557558066e-05, - "loss": 0.1677, + "epoch": 0.31067566881567593, + "grad_norm": 0.5409974455833435, + "learning_rate": 1.7928828874562162e-05, + "loss": 0.1218, "step": 6120 }, { - "epoch": 0.15547658332275668, - "grad_norm": 0.7871036529541016, - "learning_rate": 1.8963489444514958e-05, - "loss": 0.1933, + "epoch": 0.3109294888065384, + "grad_norm": 0.47021251916885376, + "learning_rate": 1.7927136741289745e-05, + "loss": 0.137, "step": 6125 }, { - "epoch": 0.155603502982612, - "grad_norm": 1.1935811042785645, - "learning_rate": 1.8962643313449253e-05, - "loss": 0.1592, + "epoch": 0.31118330879740086, + "grad_norm": 0.36402925848960876, + "learning_rate": 1.792544460801733e-05, + "loss": 0.1197, "step": 6130 }, { - "epoch": 0.1557304226424673, - "grad_norm": 0.7604871392250061, - "learning_rate": 1.896179718238355e-05, - "loss": 0.1921, + "epoch": 0.31143712878826335, + "grad_norm": 0.485312819480896, + "learning_rate": 1.7923752474744913e-05, + "loss": 0.1321, "step": 6135 }, { - "epoch": 0.15585734230232262, - "grad_norm": 0.7212712168693542, - "learning_rate": 1.896095105131785e-05, - "loss": 0.1538, + "epoch": 0.31169094877912584, + "grad_norm": 0.7221769690513611, + "learning_rate": 1.7922060341472496e-05, + "loss": 0.1479, "step": 6140 }, { - "epoch": 0.15598426196217793, - "grad_norm": 0.7861191630363464, - "learning_rate": 1.8960104920252148e-05, - "loss": 0.172, + "epoch": 0.31194476876998833, + "grad_norm": 0.722754180431366, + "learning_rate": 1.792036820820008e-05, + "loss": 0.136, "step": 6145 }, { - "epoch": 0.15611118162203325, - "grad_norm": 0.7203767895698547, - "learning_rate": 1.8959258789186447e-05, - "loss": 0.1613, + "epoch": 0.3121985887608508, + "grad_norm": 0.43338173627853394, + "learning_rate": 1.7918676074927663e-05, + "loss": 0.135, "step": 6150 }, { - "epoch": 0.15623810128188856, - "grad_norm": 0.7216160297393799, - "learning_rate": 1.8958412658120745e-05, - "loss": 0.1567, + "epoch": 0.31245240875171326, + "grad_norm": 0.7877047061920166, + "learning_rate": 1.7916983941655247e-05, + "loss": 0.1345, "step": 6155 }, { - "epoch": 0.15636502094174387, - "grad_norm": 0.8903867602348328, - "learning_rate": 1.8957566527055043e-05, - "loss": 0.1736, + "epoch": 0.31270622874257575, + "grad_norm": 0.8918716907501221, + "learning_rate": 1.791529180838283e-05, + "loss": 0.1506, "step": 6160 }, { - "epoch": 0.15649194060159918, - "grad_norm": 0.9110920429229736, - "learning_rate": 1.8956720395989342e-05, - "loss": 0.1823, + "epoch": 0.31296004873343825, + "grad_norm": 0.4334893524646759, + "learning_rate": 1.7913599675110414e-05, + "loss": 0.1468, "step": 6165 }, { - "epoch": 0.1566188602614545, - "grad_norm": 0.6396601796150208, - "learning_rate": 1.8955874264923637e-05, - "loss": 0.2043, + "epoch": 0.31321386872430074, + "grad_norm": 0.6378242373466492, + "learning_rate": 1.7911907541837997e-05, + "loss": 0.1381, "step": 6170 }, { - "epoch": 0.1567457799213098, - "grad_norm": 1.394593596458435, - "learning_rate": 1.8955028133857935e-05, - "loss": 0.2142, + "epoch": 0.31346768871516323, + "grad_norm": 0.6442030072212219, + "learning_rate": 1.791021540856558e-05, + "loss": 0.1546, "step": 6175 }, { - "epoch": 0.15687269958116512, - "grad_norm": 1.4538708925247192, - "learning_rate": 1.8954182002792234e-05, - "loss": 0.1762, + "epoch": 0.31372150870602566, + "grad_norm": 0.5167520046234131, + "learning_rate": 1.7908523275293164e-05, + "loss": 0.1357, "step": 6180 }, { - "epoch": 0.15699961924102043, - "grad_norm": 1.5693036317825317, - "learning_rate": 1.8953335871726532e-05, - "loss": 0.2055, + "epoch": 0.31397532869688816, + "grad_norm": 0.5824690461158752, + "learning_rate": 1.7906831142020748e-05, + "loss": 0.1268, "step": 6185 }, { - "epoch": 0.15712653890087575, - "grad_norm": 0.7161654829978943, - "learning_rate": 1.895248974066083e-05, - "loss": 0.1785, + "epoch": 0.31422914868775065, + "grad_norm": 0.7088459134101868, + "learning_rate": 1.790513900874833e-05, + "loss": 0.1586, "step": 6190 }, { - "epoch": 0.15725345856073106, - "grad_norm": 1.031315803527832, - "learning_rate": 1.895164360959513e-05, - "loss": 0.1844, + "epoch": 0.31448296867861314, + "grad_norm": 0.6302781701087952, + "learning_rate": 1.7903446875475915e-05, + "loss": 0.1298, "step": 6195 }, { - "epoch": 0.15738037822058637, - "grad_norm": 0.8810437321662903, - "learning_rate": 1.8950797478529427e-05, - "loss": 0.1701, + "epoch": 0.31473678866947563, + "grad_norm": 0.5944651365280151, + "learning_rate": 1.7901754742203495e-05, + "loss": 0.1448, "step": 6200 }, { - "epoch": 0.15750729788044168, - "grad_norm": 0.7988215088844299, - "learning_rate": 1.8949951347463726e-05, - "loss": 0.1649, + "epoch": 0.31499060866033807, + "grad_norm": 0.679571270942688, + "learning_rate": 1.7900062608931082e-05, + "loss": 0.1469, "step": 6205 }, { - "epoch": 0.157634217540297, - "grad_norm": 0.9385743141174316, - "learning_rate": 1.8949105216398024e-05, - "loss": 0.1951, + "epoch": 0.31524442865120056, + "grad_norm": 0.522102952003479, + "learning_rate": 1.7898370475658666e-05, + "loss": 0.1414, "step": 6210 }, { - "epoch": 0.1577611372001523, - "grad_norm": 0.8391952514648438, - "learning_rate": 1.894825908533232e-05, - "loss": 0.2017, + "epoch": 0.31549824864206305, + "grad_norm": 0.5716415047645569, + "learning_rate": 1.789667834238625e-05, + "loss": 0.1409, "step": 6215 }, { - "epoch": 0.15788805686000762, - "grad_norm": 0.7573888897895813, - "learning_rate": 1.8947412954266617e-05, - "loss": 0.178, + "epoch": 0.31575206863292554, + "grad_norm": 0.901875913143158, + "learning_rate": 1.7894986209113833e-05, + "loss": 0.1292, "step": 6220 }, { - "epoch": 0.15801497651986293, - "grad_norm": 0.7835760116577148, - "learning_rate": 1.8946566823200916e-05, - "loss": 0.1954, + "epoch": 0.31600588862378803, + "grad_norm": 0.7464191317558289, + "learning_rate": 1.7893294075841413e-05, + "loss": 0.1411, "step": 6225 }, { - "epoch": 0.15814189617971824, - "grad_norm": 0.9245469570159912, - "learning_rate": 1.8945720692135214e-05, - "loss": 0.1995, + "epoch": 0.31625970861465047, + "grad_norm": 0.451816201210022, + "learning_rate": 1.7891601942569e-05, + "loss": 0.1368, "step": 6230 }, { - "epoch": 0.15826881583957356, - "grad_norm": 0.6028946042060852, - "learning_rate": 1.894487456106951e-05, - "loss": 0.1975, + "epoch": 0.31651352860551296, + "grad_norm": 0.6408305168151855, + "learning_rate": 1.788990980929658e-05, + "loss": 0.1352, "step": 6235 }, { - "epoch": 0.15839573549942887, - "grad_norm": 1.001177430152893, - "learning_rate": 1.8944028430003808e-05, - "loss": 0.1998, + "epoch": 0.31676734859637545, + "grad_norm": 0.4555467367172241, + "learning_rate": 1.7888217676024164e-05, + "loss": 0.1309, "step": 6240 }, { - "epoch": 0.15852265515928418, - "grad_norm": 0.7765277028083801, - "learning_rate": 1.8943182298938106e-05, - "loss": 0.1972, + "epoch": 0.31702116858723794, + "grad_norm": 0.46165990829467773, + "learning_rate": 1.788652554275175e-05, + "loss": 0.1435, "step": 6245 }, { - "epoch": 0.1586495748191395, - "grad_norm": 1.9572219848632812, - "learning_rate": 1.8942336167872404e-05, - "loss": 0.1693, + "epoch": 0.31727498857810044, + "grad_norm": 0.40598687529563904, + "learning_rate": 1.788483340947933e-05, + "loss": 0.1415, "step": 6250 }, { - "epoch": 0.1587764944789948, - "grad_norm": 0.9383246898651123, - "learning_rate": 1.8941490036806703e-05, - "loss": 0.1855, + "epoch": 0.3175288085689629, + "grad_norm": 0.6253679990768433, + "learning_rate": 1.7883141276206914e-05, + "loss": 0.125, "step": 6255 }, { - "epoch": 0.15890341413885012, - "grad_norm": 1.1316033601760864, - "learning_rate": 1.8940643905741e-05, - "loss": 0.2005, + "epoch": 0.31778262855982536, + "grad_norm": 0.46861565113067627, + "learning_rate": 1.7881449142934498e-05, + "loss": 0.1302, "step": 6260 }, { - "epoch": 0.15903033379870543, - "grad_norm": 0.657035231590271, - "learning_rate": 1.89397977746753e-05, - "loss": 0.1636, + "epoch": 0.31803644855068786, + "grad_norm": 0.6676781177520752, + "learning_rate": 1.787975700966208e-05, + "loss": 0.1358, "step": 6265 }, { - "epoch": 0.15915725345856074, - "grad_norm": 1.0564684867858887, - "learning_rate": 1.8938951643609598e-05, - "loss": 0.1989, + "epoch": 0.31829026854155035, + "grad_norm": 0.6015679240226746, + "learning_rate": 1.7878064876389668e-05, + "loss": 0.1259, "step": 6270 }, { - "epoch": 0.15928417311841606, - "grad_norm": 1.1770910024642944, - "learning_rate": 1.8938105512543893e-05, - "loss": 0.1636, + "epoch": 0.31854408853241284, + "grad_norm": 0.5872548818588257, + "learning_rate": 1.787637274311725e-05, + "loss": 0.138, "step": 6275 }, { - "epoch": 0.15941109277827134, - "grad_norm": 0.7809187173843384, - "learning_rate": 1.893725938147819e-05, - "loss": 0.1563, + "epoch": 0.3187979085232753, + "grad_norm": 0.4974575936794281, + "learning_rate": 1.7874680609844832e-05, + "loss": 0.1137, "step": 6280 }, { - "epoch": 0.15953801243812665, - "grad_norm": 0.8313079476356506, - "learning_rate": 1.893641325041249e-05, - "loss": 0.1442, + "epoch": 0.31905172851413777, + "grad_norm": 0.5124915838241577, + "learning_rate": 1.7872988476572415e-05, + "loss": 0.1423, "step": 6285 }, { - "epoch": 0.15966493209798197, - "grad_norm": 1.113350510597229, - "learning_rate": 1.8935567119346788e-05, - "loss": 0.1934, + "epoch": 0.31930554850500026, + "grad_norm": 0.5141370892524719, + "learning_rate": 1.78712963433e-05, + "loss": 0.1341, "step": 6290 }, { - "epoch": 0.15979185175783728, - "grad_norm": 0.8417625427246094, - "learning_rate": 1.8934720988281087e-05, - "loss": 0.1696, + "epoch": 0.31955936849586275, + "grad_norm": 0.621908962726593, + "learning_rate": 1.7869604210027583e-05, + "loss": 0.1241, "step": 6295 }, { - "epoch": 0.1599187714176926, - "grad_norm": 1.0500043630599976, - "learning_rate": 1.8933874857215385e-05, - "loss": 0.2163, + "epoch": 0.31981318848672524, + "grad_norm": 0.8363323211669922, + "learning_rate": 1.7867912076755166e-05, + "loss": 0.1406, "step": 6300 }, { - "epoch": 0.1600456910775479, - "grad_norm": 0.9974502921104431, - "learning_rate": 1.8933028726149683e-05, - "loss": 0.1724, + "epoch": 0.3200670084775877, + "grad_norm": 0.48739150166511536, + "learning_rate": 1.786621994348275e-05, + "loss": 0.1296, "step": 6305 }, { - "epoch": 0.16017261073740321, - "grad_norm": 0.7254921793937683, - "learning_rate": 1.8932182595083982e-05, - "loss": 0.1377, + "epoch": 0.32032082846845017, + "grad_norm": 0.629400908946991, + "learning_rate": 1.7864527810210333e-05, + "loss": 0.1317, "step": 6310 }, { - "epoch": 0.16029953039725853, - "grad_norm": 1.007614254951477, - "learning_rate": 1.8931336464018277e-05, - "loss": 0.2237, + "epoch": 0.32057464845931266, + "grad_norm": 0.4386022090911865, + "learning_rate": 1.7862835676937917e-05, + "loss": 0.1283, "step": 6315 }, { - "epoch": 0.16042645005711384, - "grad_norm": 0.6485284566879272, - "learning_rate": 1.8930490332952575e-05, - "loss": 0.1625, + "epoch": 0.32082846845017515, + "grad_norm": 0.44304966926574707, + "learning_rate": 1.78611435436655e-05, + "loss": 0.129, "step": 6320 }, { - "epoch": 0.16055336971696915, - "grad_norm": 0.7313831448554993, - "learning_rate": 1.8929644201886874e-05, - "loss": 0.1641, + "epoch": 0.3210822884410376, + "grad_norm": 0.8448670506477356, + "learning_rate": 1.7859451410393084e-05, + "loss": 0.1452, "step": 6325 }, { - "epoch": 0.16068028937682446, - "grad_norm": 0.8221515417098999, - "learning_rate": 1.8928798070821172e-05, - "loss": 0.1772, + "epoch": 0.3213361084319001, + "grad_norm": 0.4689197838306427, + "learning_rate": 1.7857759277120667e-05, + "loss": 0.1127, "step": 6330 }, { - "epoch": 0.16080720903667978, - "grad_norm": 1.1429940462112427, - "learning_rate": 1.892795193975547e-05, - "loss": 0.2182, + "epoch": 0.32158992842276257, + "grad_norm": 0.6370121240615845, + "learning_rate": 1.785606714384825e-05, + "loss": 0.1357, "step": 6335 }, { - "epoch": 0.1609341286965351, - "grad_norm": 0.6285759806632996, - "learning_rate": 1.892710580868977e-05, - "loss": 0.1758, + "epoch": 0.32184374841362506, + "grad_norm": 0.5023421049118042, + "learning_rate": 1.7854375010575834e-05, + "loss": 0.1465, "step": 6340 }, { - "epoch": 0.1610610483563904, - "grad_norm": 0.799536406993866, - "learning_rate": 1.8926259677624067e-05, - "loss": 0.1351, + "epoch": 0.32209756840448756, + "grad_norm": 0.6802017688751221, + "learning_rate": 1.7852682877303418e-05, + "loss": 0.1416, "step": 6345 }, { - "epoch": 0.16118796801624571, - "grad_norm": 1.9492549896240234, - "learning_rate": 1.8925413546558366e-05, - "loss": 0.1877, + "epoch": 0.32235138839535, + "grad_norm": 0.48663991689682007, + "learning_rate": 1.7850990744031e-05, + "loss": 0.1262, "step": 6350 }, { - "epoch": 0.16131488767610103, - "grad_norm": 5.557721138000488, - "learning_rate": 1.892456741549266e-05, - "loss": 0.1623, + "epoch": 0.3226052083862125, + "grad_norm": 0.43129396438598633, + "learning_rate": 1.7849298610758585e-05, + "loss": 0.1402, "step": 6355 }, { - "epoch": 0.16144180733595634, - "grad_norm": 1.4518221616744995, - "learning_rate": 1.892372128442696e-05, - "loss": 0.1403, + "epoch": 0.322859028377075, + "grad_norm": 0.7644272446632385, + "learning_rate": 1.784760647748617e-05, + "loss": 0.1396, "step": 6360 }, { - "epoch": 0.16156872699581165, - "grad_norm": 0.6306847333908081, - "learning_rate": 1.8922875153361258e-05, - "loss": 0.1895, + "epoch": 0.32311284836793747, + "grad_norm": 0.6173144578933716, + "learning_rate": 1.7845914344213752e-05, + "loss": 0.1371, "step": 6365 }, { - "epoch": 0.16169564665566696, - "grad_norm": 0.6823400259017944, - "learning_rate": 1.8922029022295556e-05, - "loss": 0.1613, + "epoch": 0.32336666835879996, + "grad_norm": 0.6148359179496765, + "learning_rate": 1.7844222210941336e-05, + "loss": 0.1211, "step": 6370 }, { - "epoch": 0.16182256631552228, - "grad_norm": 1.1895471811294556, - "learning_rate": 1.892118289122985e-05, - "loss": 0.2092, + "epoch": 0.3236204883496624, + "grad_norm": 0.5884629487991333, + "learning_rate": 1.784253007766892e-05, + "loss": 0.126, "step": 6375 }, { - "epoch": 0.1619494859753776, - "grad_norm": 0.8734690546989441, - "learning_rate": 1.892033676016415e-05, - "loss": 0.1976, + "epoch": 0.3238743083405249, + "grad_norm": 0.6891257166862488, + "learning_rate": 1.7840837944396503e-05, + "loss": 0.1325, "step": 6380 }, { - "epoch": 0.1620764056352329, - "grad_norm": 1.2854211330413818, - "learning_rate": 1.8919490629098448e-05, - "loss": 0.1943, + "epoch": 0.3241281283313874, + "grad_norm": 0.5167293548583984, + "learning_rate": 1.7839145811124086e-05, + "loss": 0.1283, "step": 6385 }, { - "epoch": 0.1622033252950882, - "grad_norm": 0.692595899105072, - "learning_rate": 1.8918644498032746e-05, - "loss": 0.1873, + "epoch": 0.32438194832224987, + "grad_norm": 0.42851531505584717, + "learning_rate": 1.783745367785167e-05, + "loss": 0.1353, "step": 6390 }, { - "epoch": 0.16233024495494353, - "grad_norm": 0.6211111545562744, - "learning_rate": 1.8917798366967045e-05, - "loss": 0.2008, + "epoch": 0.32463576831311236, + "grad_norm": 0.4503128230571747, + "learning_rate": 1.7835761544579253e-05, + "loss": 0.1259, "step": 6395 }, { - "epoch": 0.16245716461479884, - "grad_norm": 0.6592433452606201, - "learning_rate": 1.8916952235901343e-05, - "loss": 0.154, + "epoch": 0.3248895883039748, + "grad_norm": 0.8484407067298889, + "learning_rate": 1.7834069411306837e-05, + "loss": 0.1467, "step": 6400 }, { - "epoch": 0.16258408427465415, - "grad_norm": 0.9100998640060425, - "learning_rate": 1.891610610483564e-05, - "loss": 0.2066, + "epoch": 0.3251434082948373, + "grad_norm": 0.5505496263504028, + "learning_rate": 1.7832377278034417e-05, + "loss": 0.1503, "step": 6405 }, { - "epoch": 0.16271100393450946, - "grad_norm": 0.9442713856697083, - "learning_rate": 1.891525997376994e-05, - "loss": 0.1669, + "epoch": 0.3253972282856998, + "grad_norm": 0.5813527703285217, + "learning_rate": 1.7830685144762004e-05, + "loss": 0.1375, "step": 6410 }, { - "epoch": 0.16283792359436478, - "grad_norm": 1.1889876127243042, - "learning_rate": 1.8914413842704235e-05, - "loss": 0.1736, + "epoch": 0.32565104827656227, + "grad_norm": 0.4346156120300293, + "learning_rate": 1.7828993011489584e-05, + "loss": 0.1548, "step": 6415 }, { - "epoch": 0.1629648432542201, - "grad_norm": 0.8376091122627258, - "learning_rate": 1.8913567711638533e-05, - "loss": 0.1332, + "epoch": 0.32590486826742476, + "grad_norm": 0.8428875207901001, + "learning_rate": 1.782730087821717e-05, + "loss": 0.1355, "step": 6420 }, { - "epoch": 0.1630917629140754, - "grad_norm": 0.8283790349960327, - "learning_rate": 1.891272158057283e-05, - "loss": 0.1745, + "epoch": 0.3261586882582872, + "grad_norm": 0.5086374878883362, + "learning_rate": 1.7825608744944755e-05, + "loss": 0.1477, "step": 6425 }, { - "epoch": 0.1632186825739307, - "grad_norm": 1.3841686248779297, - "learning_rate": 1.891187544950713e-05, - "loss": 0.1835, + "epoch": 0.3264125082491497, + "grad_norm": 0.49893704056739807, + "learning_rate": 1.7823916611672335e-05, + "loss": 0.1396, "step": 6430 }, { - "epoch": 0.16334560223378602, - "grad_norm": 0.6143671870231628, - "learning_rate": 1.891102931844143e-05, - "loss": 0.1613, + "epoch": 0.3266663282400122, + "grad_norm": 0.4795598089694977, + "learning_rate": 1.7822224478399922e-05, + "loss": 0.1382, "step": 6435 }, { - "epoch": 0.16347252189364134, - "grad_norm": 0.9216020107269287, - "learning_rate": 1.8910183187375727e-05, - "loss": 0.1758, + "epoch": 0.3269201482308747, + "grad_norm": 0.5606343150138855, + "learning_rate": 1.7820532345127502e-05, + "loss": 0.1386, "step": 6440 }, { - "epoch": 0.16359944155349665, - "grad_norm": 0.6763547658920288, - "learning_rate": 1.8909337056310025e-05, - "loss": 0.2026, + "epoch": 0.32717396822173717, + "grad_norm": 0.48243793845176697, + "learning_rate": 1.7818840211855086e-05, + "loss": 0.1241, "step": 6445 }, { - "epoch": 0.16372636121335196, - "grad_norm": 0.5535255670547485, - "learning_rate": 1.8908490925244324e-05, - "loss": 0.1895, + "epoch": 0.3274277882125996, + "grad_norm": 0.5099393725395203, + "learning_rate": 1.7817148078582672e-05, + "loss": 0.1387, "step": 6450 }, { - "epoch": 0.16385328087320725, - "grad_norm": 0.8237041234970093, - "learning_rate": 1.890764479417862e-05, - "loss": 0.1802, + "epoch": 0.3276816082034621, + "grad_norm": 0.3960450291633606, + "learning_rate": 1.7815455945310253e-05, + "loss": 0.1212, "step": 6455 }, { - "epoch": 0.16398020053306256, - "grad_norm": 0.6463406682014465, - "learning_rate": 1.8906798663112917e-05, - "loss": 0.1746, + "epoch": 0.3279354281943246, + "grad_norm": 0.7666064500808716, + "learning_rate": 1.781376381203784e-05, + "loss": 0.1406, "step": 6460 }, { - "epoch": 0.16410712019291787, - "grad_norm": 0.7955700755119324, - "learning_rate": 1.8905952532047215e-05, - "loss": 0.1766, + "epoch": 0.3281892481851871, + "grad_norm": 0.6125515699386597, + "learning_rate": 1.781207167876542e-05, + "loss": 0.1341, "step": 6465 }, { - "epoch": 0.16423403985277318, - "grad_norm": 0.819997251033783, - "learning_rate": 1.8905106400981514e-05, - "loss": 0.1807, + "epoch": 0.32844306817604957, + "grad_norm": 0.5433800220489502, + "learning_rate": 1.7810379545493003e-05, + "loss": 0.1335, "step": 6470 }, { - "epoch": 0.1643609595126285, - "grad_norm": 1.09817373752594, - "learning_rate": 1.8904260269915812e-05, - "loss": 0.1589, + "epoch": 0.328696888166912, + "grad_norm": 0.5496737957000732, + "learning_rate": 1.780868741222059e-05, + "loss": 0.1399, "step": 6475 }, { - "epoch": 0.1644878791724838, - "grad_norm": 0.765940248966217, - "learning_rate": 1.890341413885011e-05, - "loss": 0.2041, + "epoch": 0.3289507081577745, + "grad_norm": 0.549060583114624, + "learning_rate": 1.780699527894817e-05, + "loss": 0.1251, "step": 6480 }, { - "epoch": 0.16461479883233912, - "grad_norm": 0.7870344519615173, - "learning_rate": 1.890256800778441e-05, - "loss": 0.1878, + "epoch": 0.329204528148637, + "grad_norm": 0.4195408821105957, + "learning_rate": 1.7805303145675754e-05, + "loss": 0.1245, "step": 6485 }, { - "epoch": 0.16474171849219443, - "grad_norm": 0.6039844751358032, - "learning_rate": 1.8901721876718707e-05, - "loss": 0.1726, + "epoch": 0.3294583481394995, + "grad_norm": 0.48527300357818604, + "learning_rate": 1.7803611012403337e-05, + "loss": 0.1325, "step": 6490 }, { - "epoch": 0.16486863815204975, - "grad_norm": 2.104506492614746, - "learning_rate": 1.8900875745653002e-05, - "loss": 0.1803, + "epoch": 0.32971216813036197, + "grad_norm": 0.4478241801261902, + "learning_rate": 1.780191887913092e-05, + "loss": 0.1227, "step": 6495 }, { - "epoch": 0.16499555781190506, - "grad_norm": 0.44605669379234314, - "learning_rate": 1.89000296145873e-05, - "loss": 0.1959, + "epoch": 0.3299659881212244, + "grad_norm": 0.3931959569454193, + "learning_rate": 1.7800226745858505e-05, + "loss": 0.1245, "step": 6500 }, { - "epoch": 0.16512247747176037, - "grad_norm": 0.7596286535263062, - "learning_rate": 1.88991834835216e-05, - "loss": 0.1739, + "epoch": 0.3302198081120869, + "grad_norm": 0.33147504925727844, + "learning_rate": 1.7798534612586088e-05, + "loss": 0.1131, "step": 6505 }, { - "epoch": 0.16524939713161568, - "grad_norm": 0.5393249988555908, - "learning_rate": 1.8898337352455898e-05, - "loss": 0.1735, + "epoch": 0.3304736281029494, + "grad_norm": 0.6583611965179443, + "learning_rate": 1.779684247931367e-05, + "loss": 0.1493, "step": 6510 }, { - "epoch": 0.165376316791471, - "grad_norm": 0.7619224190711975, - "learning_rate": 1.8897491221390193e-05, - "loss": 0.1708, + "epoch": 0.3307274480938119, + "grad_norm": 0.5396264791488647, + "learning_rate": 1.7795150346041255e-05, + "loss": 0.1239, "step": 6515 }, { - "epoch": 0.1655032364513263, - "grad_norm": 0.7943680286407471, - "learning_rate": 1.889664509032449e-05, - "loss": 0.1854, + "epoch": 0.3309812680846744, + "grad_norm": 0.5870803594589233, + "learning_rate": 1.779345821276884e-05, + "loss": 0.1464, "step": 6520 }, { - "epoch": 0.16563015611118162, - "grad_norm": 0.6978405714035034, - "learning_rate": 1.889579895925879e-05, - "loss": 0.186, + "epoch": 0.3312350880755368, + "grad_norm": 0.45324239134788513, + "learning_rate": 1.7791766079496422e-05, + "loss": 0.1359, "step": 6525 }, { - "epoch": 0.16575707577103693, - "grad_norm": 0.8244835138320923, - "learning_rate": 1.8894952828193088e-05, - "loss": 0.1894, + "epoch": 0.3314889080663993, + "grad_norm": 0.5347620248794556, + "learning_rate": 1.7790073946224006e-05, + "loss": 0.1249, "step": 6530 }, { - "epoch": 0.16588399543089224, - "grad_norm": 0.8973921537399292, - "learning_rate": 1.8894106697127386e-05, - "loss": 0.1835, + "epoch": 0.3317427280572618, + "grad_norm": 0.5685572624206543, + "learning_rate": 1.778838181295159e-05, + "loss": 0.1421, "step": 6535 }, { - "epoch": 0.16601091509074756, - "grad_norm": 0.7801916003227234, - "learning_rate": 1.8893260566061685e-05, - "loss": 0.1713, + "epoch": 0.3319965480481243, + "grad_norm": 0.5411074161529541, + "learning_rate": 1.7786689679679173e-05, + "loss": 0.1233, "step": 6540 }, { - "epoch": 0.16613783475060287, - "grad_norm": 1.2481701374053955, - "learning_rate": 1.8892414434995983e-05, - "loss": 0.1871, + "epoch": 0.3322503680389868, + "grad_norm": 0.42632856965065, + "learning_rate": 1.7784997546406756e-05, + "loss": 0.1339, "step": 6545 }, { - "epoch": 0.16626475441045818, - "grad_norm": 1.1135246753692627, - "learning_rate": 1.889156830393028e-05, - "loss": 0.2086, + "epoch": 0.3325041880298492, + "grad_norm": 0.45992130041122437, + "learning_rate": 1.778330541313434e-05, + "loss": 0.1395, "step": 6550 }, { - "epoch": 0.1663916740703135, - "grad_norm": 0.5591463446617126, - "learning_rate": 1.8890722172864576e-05, - "loss": 0.173, + "epoch": 0.3327580080207117, + "grad_norm": 0.7072666883468628, + "learning_rate": 1.7781613279861923e-05, + "loss": 0.1312, "step": 6555 }, { - "epoch": 0.1665185937301688, - "grad_norm": 0.8096752166748047, - "learning_rate": 1.8889876041798875e-05, - "loss": 0.2061, + "epoch": 0.3330118280115742, + "grad_norm": 0.5420939922332764, + "learning_rate": 1.7779921146589507e-05, + "loss": 0.1209, "step": 6560 }, { - "epoch": 0.16664551339002412, - "grad_norm": 0.7261133790016174, - "learning_rate": 1.8889029910733173e-05, - "loss": 0.1822, + "epoch": 0.3332656480024367, + "grad_norm": 0.5253438949584961, + "learning_rate": 1.777822901331709e-05, + "loss": 0.1315, "step": 6565 }, { - "epoch": 0.16677243304987943, - "grad_norm": 0.9683718085289001, - "learning_rate": 1.888818377966747e-05, - "loss": 0.1551, + "epoch": 0.3335194679932992, + "grad_norm": 0.51002436876297, + "learning_rate": 1.7776536880044674e-05, + "loss": 0.1467, "step": 6570 }, { - "epoch": 0.16689935270973474, - "grad_norm": 0.9057483077049255, - "learning_rate": 1.888733764860177e-05, - "loss": 0.18, + "epoch": 0.3337732879841616, + "grad_norm": 0.7875125408172607, + "learning_rate": 1.7774844746772258e-05, + "loss": 0.1201, "step": 6575 }, { - "epoch": 0.16702627236959006, - "grad_norm": 0.6116618514060974, - "learning_rate": 1.888649151753607e-05, - "loss": 0.1893, + "epoch": 0.3340271079750241, + "grad_norm": 0.4544963240623474, + "learning_rate": 1.777315261349984e-05, + "loss": 0.1239, "step": 6580 }, { - "epoch": 0.16715319202944537, - "grad_norm": 1.2629517316818237, - "learning_rate": 1.8885645386470367e-05, - "loss": 0.1647, + "epoch": 0.3342809279658866, + "grad_norm": 0.579490602016449, + "learning_rate": 1.7771460480227425e-05, + "loss": 0.1551, "step": 6585 }, { - "epoch": 0.16728011168930068, - "grad_norm": 0.8015618324279785, - "learning_rate": 1.8884799255404665e-05, - "loss": 0.1968, + "epoch": 0.3345347479567491, + "grad_norm": 0.5468235015869141, + "learning_rate": 1.776976834695501e-05, + "loss": 0.1406, "step": 6590 }, { - "epoch": 0.167407031349156, - "grad_norm": 0.6021865010261536, - "learning_rate": 1.888395312433896e-05, - "loss": 0.1717, + "epoch": 0.3347885679476115, + "grad_norm": 0.5087330341339111, + "learning_rate": 1.7768076213682592e-05, + "loss": 0.14, "step": 6595 }, { - "epoch": 0.1675339510090113, - "grad_norm": 0.8732836246490479, - "learning_rate": 1.888310699327326e-05, - "loss": 0.1756, + "epoch": 0.335042387938474, + "grad_norm": 0.48226407170295715, + "learning_rate": 1.7766384080410175e-05, + "loss": 0.1214, "step": 6600 }, { - "epoch": 0.16766087066886662, - "grad_norm": 0.7330558896064758, - "learning_rate": 1.8882260862207557e-05, - "loss": 0.188, + "epoch": 0.3352962079293365, + "grad_norm": 0.4688023626804352, + "learning_rate": 1.776469194713776e-05, + "loss": 0.1089, "step": 6605 }, { - "epoch": 0.16778779032872193, - "grad_norm": 1.3980216979980469, - "learning_rate": 1.8881414731141856e-05, - "loss": 0.2098, + "epoch": 0.335550027920199, + "grad_norm": 0.7546516060829163, + "learning_rate": 1.7762999813865342e-05, + "loss": 0.1428, "step": 6610 }, { - "epoch": 0.16791470998857724, - "grad_norm": 1.1672130823135376, - "learning_rate": 1.8880568600076154e-05, - "loss": 0.1765, + "epoch": 0.3358038479110615, + "grad_norm": 0.46591129899024963, + "learning_rate": 1.7761307680592926e-05, + "loss": 0.126, "step": 6615 }, { - "epoch": 0.16804162964843256, - "grad_norm": 0.9968149065971375, - "learning_rate": 1.8879722469010452e-05, - "loss": 0.1833, + "epoch": 0.33605766790192393, + "grad_norm": 0.5849207043647766, + "learning_rate": 1.7759615547320506e-05, + "loss": 0.1322, "step": 6620 }, { - "epoch": 0.16816854930828784, - "grad_norm": 0.7558118104934692, - "learning_rate": 1.887887633794475e-05, - "loss": 0.1683, + "epoch": 0.3363114878927864, + "grad_norm": 0.461297869682312, + "learning_rate": 1.7757923414048093e-05, + "loss": 0.1355, "step": 6625 }, { - "epoch": 0.16829546896814315, - "grad_norm": 0.6800826787948608, - "learning_rate": 1.887803020687905e-05, - "loss": 0.1481, + "epoch": 0.3365653078836489, + "grad_norm": 0.4782043397426605, + "learning_rate": 1.7756231280775677e-05, + "loss": 0.1337, "step": 6630 }, { - "epoch": 0.16842238862799846, - "grad_norm": 0.9693293571472168, - "learning_rate": 1.8877184075813344e-05, - "loss": 0.1642, + "epoch": 0.3368191278745114, + "grad_norm": 1.3908518552780151, + "learning_rate": 1.7754539147503257e-05, + "loss": 0.1418, "step": 6635 }, { - "epoch": 0.16854930828785378, - "grad_norm": 0.9028599858283997, - "learning_rate": 1.8876337944747643e-05, - "loss": 0.2, + "epoch": 0.3370729478653739, + "grad_norm": 0.5779076218605042, + "learning_rate": 1.7752847014230844e-05, + "loss": 0.1416, "step": 6640 }, { - "epoch": 0.1686762279477091, - "grad_norm": 0.7811661958694458, - "learning_rate": 1.887549181368194e-05, - "loss": 0.1713, + "epoch": 0.33732676785623633, + "grad_norm": 0.4418136477470398, + "learning_rate": 1.7751154880958424e-05, + "loss": 0.1194, "step": 6645 }, { - "epoch": 0.1688031476075644, - "grad_norm": 0.8806522488594055, - "learning_rate": 1.887464568261624e-05, - "loss": 0.1852, + "epoch": 0.3375805878470988, + "grad_norm": 0.47528406977653503, + "learning_rate": 1.7749462747686007e-05, + "loss": 0.1362, "step": 6650 }, { - "epoch": 0.16893006726741971, - "grad_norm": 0.7711669206619263, - "learning_rate": 1.8873799551550534e-05, - "loss": 0.1453, + "epoch": 0.3378344078379613, + "grad_norm": 0.6058582663536072, + "learning_rate": 1.7747770614413594e-05, + "loss": 0.156, "step": 6655 }, { - "epoch": 0.16905698692727503, - "grad_norm": 0.5760945677757263, - "learning_rate": 1.8872953420484833e-05, - "loss": 0.1402, + "epoch": 0.3380882278288238, + "grad_norm": 0.4911196827888489, + "learning_rate": 1.7746078481141175e-05, + "loss": 0.1294, "step": 6660 }, { - "epoch": 0.16918390658713034, - "grad_norm": 0.9353062510490417, - "learning_rate": 1.887210728941913e-05, - "loss": 0.2077, + "epoch": 0.3383420478196863, + "grad_norm": 0.5730322003364563, + "learning_rate": 1.774438634786876e-05, + "loss": 0.1217, "step": 6665 }, { - "epoch": 0.16931082624698565, - "grad_norm": 0.8693033456802368, - "learning_rate": 1.887126115835343e-05, - "loss": 0.1602, + "epoch": 0.33859586781054873, + "grad_norm": 0.3868119418621063, + "learning_rate": 1.774269421459634e-05, + "loss": 0.1237, "step": 6670 }, { - "epoch": 0.16943774590684096, - "grad_norm": 1.1909596920013428, - "learning_rate": 1.8870415027287728e-05, - "loss": 0.1876, + "epoch": 0.3388496878014112, + "grad_norm": 0.70493483543396, + "learning_rate": 1.7741002081323925e-05, + "loss": 0.1471, "step": 6675 }, { - "epoch": 0.16956466556669628, - "grad_norm": 0.7478213310241699, - "learning_rate": 1.8869568896222026e-05, - "loss": 0.1529, + "epoch": 0.3391035077922737, + "grad_norm": 0.6178105473518372, + "learning_rate": 1.7739309948051512e-05, + "loss": 0.1282, "step": 6680 }, { - "epoch": 0.1696915852265516, - "grad_norm": 0.7570421695709229, - "learning_rate": 1.8868722765156325e-05, - "loss": 0.1814, + "epoch": 0.3393573277831362, + "grad_norm": 0.5365161895751953, + "learning_rate": 1.7737617814779092e-05, + "loss": 0.1191, "step": 6685 }, { - "epoch": 0.1698185048864069, - "grad_norm": 0.750109851360321, - "learning_rate": 1.8867876634090623e-05, - "loss": 0.1733, + "epoch": 0.3396111477739987, + "grad_norm": 0.4318663477897644, + "learning_rate": 1.7735925681506676e-05, + "loss": 0.126, "step": 6690 }, { - "epoch": 0.1699454245462622, - "grad_norm": 0.7969471216201782, - "learning_rate": 1.8867030503024918e-05, - "loss": 0.1656, + "epoch": 0.33986496776486114, + "grad_norm": 0.46456629037857056, + "learning_rate": 1.773423354823426e-05, + "loss": 0.1327, "step": 6695 }, { - "epoch": 0.17007234420611753, - "grad_norm": 1.4073054790496826, - "learning_rate": 1.8866184371959217e-05, - "loss": 0.1788, + "epoch": 0.34011878775572363, + "grad_norm": 0.7820452451705933, + "learning_rate": 1.7732541414961843e-05, + "loss": 0.131, "step": 6700 }, { - "epoch": 0.17019926386597284, - "grad_norm": 2.105388879776001, - "learning_rate": 1.8865338240893515e-05, - "loss": 0.155, + "epoch": 0.3403726077465861, + "grad_norm": 0.6297656297683716, + "learning_rate": 1.7730849281689426e-05, + "loss": 0.151, "step": 6705 }, { - "epoch": 0.17032618352582815, - "grad_norm": 1.138078212738037, - "learning_rate": 1.8864492109827813e-05, - "loss": 0.1279, + "epoch": 0.3406264277374486, + "grad_norm": 0.4756261706352234, + "learning_rate": 1.772915714841701e-05, + "loss": 0.1388, "step": 6710 }, { - "epoch": 0.17045310318568346, - "grad_norm": 0.6923502087593079, - "learning_rate": 1.8863645978762112e-05, - "loss": 0.189, + "epoch": 0.3408802477283111, + "grad_norm": 0.5708016157150269, + "learning_rate": 1.7727465015144594e-05, + "loss": 0.1191, "step": 6715 }, { - "epoch": 0.17058002284553878, - "grad_norm": 1.0822755098342896, - "learning_rate": 1.886279984769641e-05, - "loss": 0.1657, + "epoch": 0.34113406771917354, + "grad_norm": 0.5044305324554443, + "learning_rate": 1.7725772881872177e-05, + "loss": 0.1238, "step": 6720 }, { - "epoch": 0.1707069425053941, - "grad_norm": 0.5901085138320923, - "learning_rate": 1.886195371663071e-05, - "loss": 0.1798, + "epoch": 0.34138788771003603, + "grad_norm": 0.39757969975471497, + "learning_rate": 1.772408074859976e-05, + "loss": 0.1364, "step": 6725 }, { - "epoch": 0.1708338621652494, - "grad_norm": 0.6933346390724182, - "learning_rate": 1.8861107585565007e-05, - "loss": 0.1687, + "epoch": 0.3416417077008985, + "grad_norm": 0.425277441740036, + "learning_rate": 1.7722388615327344e-05, + "loss": 0.1233, "step": 6730 }, { - "epoch": 0.1709607818251047, - "grad_norm": 0.8787699937820435, - "learning_rate": 1.8860261454499305e-05, - "loss": 0.2205, + "epoch": 0.341895527691761, + "grad_norm": 0.5457815527915955, + "learning_rate": 1.7720696482054928e-05, + "loss": 0.1335, "step": 6735 }, { - "epoch": 0.17108770148496003, - "grad_norm": 0.6464111804962158, - "learning_rate": 1.88594153234336e-05, - "loss": 0.185, + "epoch": 0.3421493476826235, + "grad_norm": 0.9425898194313049, + "learning_rate": 1.771900434878251e-05, + "loss": 0.125, "step": 6740 }, { - "epoch": 0.17121462114481534, - "grad_norm": 0.7503834962844849, - "learning_rate": 1.88585691923679e-05, - "loss": 0.1964, + "epoch": 0.34240316767348594, + "grad_norm": 0.6296150088310242, + "learning_rate": 1.7717312215510095e-05, + "loss": 0.1156, "step": 6745 }, { - "epoch": 0.17134154080467065, - "grad_norm": 1.9429404735565186, - "learning_rate": 1.8857723061302197e-05, - "loss": 0.1925, + "epoch": 0.34265698766434843, + "grad_norm": 0.4702848196029663, + "learning_rate": 1.771562008223768e-05, + "loss": 0.1213, "step": 6750 }, { - "epoch": 0.17146846046452596, - "grad_norm": 0.8583249449729919, - "learning_rate": 1.8856876930236496e-05, - "loss": 0.1889, + "epoch": 0.3429108076552109, + "grad_norm": 0.48555007576942444, + "learning_rate": 1.7713927948965262e-05, + "loss": 0.1297, "step": 6755 }, { - "epoch": 0.17159538012438127, - "grad_norm": 0.617583692073822, - "learning_rate": 1.8856030799170794e-05, - "loss": 0.1414, + "epoch": 0.3431646276460734, + "grad_norm": 0.46021294593811035, + "learning_rate": 1.7712235815692845e-05, + "loss": 0.1325, "step": 6760 }, { - "epoch": 0.1717222997842366, - "grad_norm": 0.4760209619998932, - "learning_rate": 1.8855184668105092e-05, - "loss": 0.1258, + "epoch": 0.3434184476369359, + "grad_norm": 0.7857270836830139, + "learning_rate": 1.771054368242043e-05, + "loss": 0.1354, "step": 6765 }, { - "epoch": 0.1718492194440919, - "grad_norm": 1.164777398109436, - "learning_rate": 1.885433853703939e-05, - "loss": 0.1699, + "epoch": 0.34367226762779834, + "grad_norm": 0.538543701171875, + "learning_rate": 1.7708851549148013e-05, + "loss": 0.1471, "step": 6770 }, { - "epoch": 0.1719761391039472, - "grad_norm": 0.5676122903823853, - "learning_rate": 1.885349240597369e-05, - "loss": 0.1572, + "epoch": 0.34392608761866084, + "grad_norm": 0.5883283615112305, + "learning_rate": 1.7707159415875596e-05, + "loss": 0.1453, "step": 6775 }, { - "epoch": 0.17210305876380252, - "grad_norm": 0.9473775029182434, - "learning_rate": 1.8852646274907984e-05, - "loss": 0.1547, + "epoch": 0.34417990760952333, + "grad_norm": 0.5997344851493835, + "learning_rate": 1.770546728260318e-05, + "loss": 0.1381, "step": 6780 }, { - "epoch": 0.17222997842365784, - "grad_norm": 0.7924340963363647, - "learning_rate": 1.8851800143842283e-05, - "loss": 0.1765, + "epoch": 0.3444337276003858, + "grad_norm": 0.46415236592292786, + "learning_rate": 1.7703775149330763e-05, + "loss": 0.1331, "step": 6785 }, { - "epoch": 0.17235689808351315, - "grad_norm": 0.7073217630386353, - "learning_rate": 1.885095401277658e-05, - "loss": 0.1506, + "epoch": 0.3446875475912483, + "grad_norm": 0.46198391914367676, + "learning_rate": 1.7702083016058347e-05, + "loss": 0.129, "step": 6790 }, { - "epoch": 0.17248381774336846, - "grad_norm": 0.846244752407074, - "learning_rate": 1.885010788171088e-05, - "loss": 0.1464, + "epoch": 0.34494136758211075, + "grad_norm": 0.6081626415252686, + "learning_rate": 1.770039088278593e-05, + "loss": 0.1367, "step": 6795 }, { - "epoch": 0.17261073740322375, - "grad_norm": 1.7191411256790161, - "learning_rate": 1.8849261750645174e-05, - "loss": 0.2123, + "epoch": 0.34519518757297324, + "grad_norm": 0.5060707926750183, + "learning_rate": 1.769869874951351e-05, + "loss": 0.1317, "step": 6800 }, { - "epoch": 0.17273765706307906, - "grad_norm": 0.6426877379417419, - "learning_rate": 1.8848415619579473e-05, - "loss": 0.1677, + "epoch": 0.34544900756383573, + "grad_norm": 0.48941001296043396, + "learning_rate": 1.7697006616241097e-05, + "loss": 0.1218, "step": 6805 }, { - "epoch": 0.17286457672293437, - "grad_norm": 0.9737340211868286, - "learning_rate": 1.884756948851377e-05, - "loss": 0.181, + "epoch": 0.3457028275546982, + "grad_norm": 0.8074930906295776, + "learning_rate": 1.769531448296868e-05, + "loss": 0.1178, "step": 6810 }, { - "epoch": 0.17299149638278968, - "grad_norm": 0.8782788515090942, - "learning_rate": 1.884672335744807e-05, - "loss": 0.1728, + "epoch": 0.3459566475455607, + "grad_norm": 0.46753403544425964, + "learning_rate": 1.7693622349696264e-05, + "loss": 0.1152, "step": 6815 }, { - "epoch": 0.173118416042645, - "grad_norm": 1.5127662420272827, - "learning_rate": 1.8845877226382368e-05, - "loss": 0.161, + "epoch": 0.34621046753642315, + "grad_norm": 0.5410972237586975, + "learning_rate": 1.7691930216423848e-05, + "loss": 0.1364, "step": 6820 }, { - "epoch": 0.1732453357025003, - "grad_norm": 0.756269097328186, - "learning_rate": 1.8845031095316666e-05, - "loss": 0.1775, + "epoch": 0.34646428752728564, + "grad_norm": 0.5206438302993774, + "learning_rate": 1.7690238083151428e-05, + "loss": 0.1292, "step": 6825 }, { - "epoch": 0.17337225536235562, - "grad_norm": 1.1190637350082397, - "learning_rate": 1.8844184964250965e-05, - "loss": 0.1784, + "epoch": 0.34671810751814813, + "grad_norm": 0.4511585831642151, + "learning_rate": 1.7688545949879015e-05, + "loss": 0.1236, "step": 6830 }, { - "epoch": 0.17349917502221093, - "grad_norm": 0.6519043445587158, - "learning_rate": 1.8843338833185263e-05, - "loss": 0.1677, + "epoch": 0.3469719275090106, + "grad_norm": 0.6129817962646484, + "learning_rate": 1.76868538166066e-05, + "loss": 0.1209, "step": 6835 }, { - "epoch": 0.17362609468206625, - "grad_norm": 0.562362015247345, - "learning_rate": 1.8842492702119558e-05, - "loss": 0.1615, + "epoch": 0.3472257474998731, + "grad_norm": 1.1212575435638428, + "learning_rate": 1.768516168333418e-05, + "loss": 0.1293, "step": 6840 }, { - "epoch": 0.17375301434192156, - "grad_norm": 0.9013134837150574, - "learning_rate": 1.8841646571053857e-05, - "loss": 0.1599, + "epoch": 0.34747956749073555, + "grad_norm": 0.760433554649353, + "learning_rate": 1.7683469550061766e-05, + "loss": 0.1382, "step": 6845 }, { - "epoch": 0.17387993400177687, - "grad_norm": 1.1281301975250244, - "learning_rate": 1.8840800439988155e-05, - "loss": 0.1836, + "epoch": 0.34773338748159804, + "grad_norm": 0.6482082605361938, + "learning_rate": 1.7681777416789346e-05, + "loss": 0.1354, "step": 6850 }, { - "epoch": 0.17400685366163218, - "grad_norm": 0.6284507513046265, - "learning_rate": 1.8839954308922453e-05, - "loss": 0.1693, + "epoch": 0.34798720747246054, + "grad_norm": 0.606609046459198, + "learning_rate": 1.7680085283516933e-05, + "loss": 0.1174, "step": 6855 }, { - "epoch": 0.1741337733214875, - "grad_norm": 0.8526862263679504, - "learning_rate": 1.8839108177856752e-05, - "loss": 0.1757, + "epoch": 0.348241027463323, + "grad_norm": 0.49537140130996704, + "learning_rate": 1.7678393150244516e-05, + "loss": 0.1272, "step": 6860 }, { - "epoch": 0.1742606929813428, - "grad_norm": 0.5255853533744812, - "learning_rate": 1.883826204679105e-05, - "loss": 0.1423, + "epoch": 0.3484948474541855, + "grad_norm": 0.5630388259887695, + "learning_rate": 1.7676701016972096e-05, + "loss": 0.1455, "step": 6865 }, { - "epoch": 0.17438761264119812, - "grad_norm": 1.0841474533081055, - "learning_rate": 1.883741591572535e-05, - "loss": 0.1548, + "epoch": 0.34874866744504796, + "grad_norm": 0.5360369086265564, + "learning_rate": 1.7675008883699683e-05, + "loss": 0.1206, "step": 6870 }, { - "epoch": 0.17451453230105343, - "grad_norm": 0.8983005881309509, - "learning_rate": 1.8836569784659647e-05, - "loss": 0.1762, + "epoch": 0.34900248743591045, + "grad_norm": 0.5766199231147766, + "learning_rate": 1.7673316750427264e-05, + "loss": 0.1311, "step": 6875 }, { - "epoch": 0.17464145196090874, - "grad_norm": 0.6987091302871704, - "learning_rate": 1.8835723653593942e-05, - "loss": 0.2105, + "epoch": 0.34925630742677294, + "grad_norm": 0.4689859449863434, + "learning_rate": 1.7671624617154847e-05, + "loss": 0.1259, "step": 6880 }, { - "epoch": 0.17476837162076406, - "grad_norm": 0.9165411591529846, - "learning_rate": 1.883487752252824e-05, - "loss": 0.2145, + "epoch": 0.34951012741763543, + "grad_norm": 0.8159186840057373, + "learning_rate": 1.7669932483882434e-05, + "loss": 0.1237, "step": 6885 }, { - "epoch": 0.17489529128061937, - "grad_norm": 1.8744593858718872, - "learning_rate": 1.883403139146254e-05, - "loss": 0.1655, + "epoch": 0.34976394740849787, + "grad_norm": 0.9604336619377136, + "learning_rate": 1.7668240350610014e-05, + "loss": 0.1288, "step": 6890 }, { - "epoch": 0.17502221094047468, - "grad_norm": 0.7095727324485779, - "learning_rate": 1.8833185260396837e-05, - "loss": 0.1774, + "epoch": 0.35001776739936036, + "grad_norm": 1.4752824306488037, + "learning_rate": 1.7666548217337598e-05, + "loss": 0.1251, "step": 6895 }, { - "epoch": 0.17514913060033, - "grad_norm": 0.7658310532569885, - "learning_rate": 1.8832339129331136e-05, - "loss": 0.2045, + "epoch": 0.35027158739022285, + "grad_norm": 0.449650377035141, + "learning_rate": 1.766485608406518e-05, + "loss": 0.1479, "step": 6900 }, { - "epoch": 0.1752760502601853, - "grad_norm": 1.0158400535583496, - "learning_rate": 1.8831492998265434e-05, - "loss": 0.1702, + "epoch": 0.35052540738108534, + "grad_norm": 0.5203964114189148, + "learning_rate": 1.7663163950792765e-05, + "loss": 0.1424, "step": 6905 }, { - "epoch": 0.17540296992004062, - "grad_norm": 0.7917830348014832, - "learning_rate": 1.8830646867199733e-05, - "loss": 0.1627, + "epoch": 0.35077922737194783, + "grad_norm": 0.5282999277114868, + "learning_rate": 1.7661471817520352e-05, + "loss": 0.1269, "step": 6910 }, { - "epoch": 0.17552988957989593, - "grad_norm": 0.9741947650909424, - "learning_rate": 1.882980073613403e-05, - "loss": 0.1901, + "epoch": 0.35103304736281027, + "grad_norm": 0.41369152069091797, + "learning_rate": 1.7659779684247932e-05, + "loss": 0.1144, "step": 6915 }, { - "epoch": 0.17565680923975124, - "grad_norm": 1.6325863599777222, - "learning_rate": 1.8828954605068326e-05, - "loss": 0.1877, + "epoch": 0.35128686735367276, + "grad_norm": 2.030303716659546, + "learning_rate": 1.7658087550975515e-05, + "loss": 0.1372, "step": 6920 }, { - "epoch": 0.17578372889960656, - "grad_norm": 1.2895431518554688, - "learning_rate": 1.8828108474002624e-05, - "loss": 0.1633, + "epoch": 0.35154068734453525, + "grad_norm": 0.463283896446228, + "learning_rate": 1.76563954177031e-05, + "loss": 0.1329, "step": 6925 }, { - "epoch": 0.17591064855946187, - "grad_norm": 0.671952486038208, - "learning_rate": 1.8827262342936923e-05, - "loss": 0.2064, + "epoch": 0.35179450733539774, + "grad_norm": 0.4552387595176697, + "learning_rate": 1.7654703284430683e-05, + "loss": 0.1317, "step": 6930 }, { - "epoch": 0.17603756821931718, - "grad_norm": 1.1663230657577515, - "learning_rate": 1.882641621187122e-05, - "loss": 0.1853, + "epoch": 0.35204832732626024, + "grad_norm": 0.9171412587165833, + "learning_rate": 1.7653011151158266e-05, + "loss": 0.1284, "step": 6935 }, { - "epoch": 0.1761644878791725, - "grad_norm": 0.70639967918396, - "learning_rate": 1.8825570080805516e-05, - "loss": 0.1796, + "epoch": 0.35230214731712267, + "grad_norm": 0.43250319361686707, + "learning_rate": 1.765131901788585e-05, + "loss": 0.1199, "step": 6940 }, { - "epoch": 0.1762914075390278, - "grad_norm": 2.0304923057556152, - "learning_rate": 1.8824723949739815e-05, - "loss": 0.1861, + "epoch": 0.35255596730798516, + "grad_norm": 0.8429256081581116, + "learning_rate": 1.7649626884613433e-05, + "loss": 0.1375, "step": 6945 }, { - "epoch": 0.17641832719888312, - "grad_norm": 0.6560999155044556, - "learning_rate": 1.8823877818674113e-05, - "loss": 0.1766, + "epoch": 0.35280978729884765, + "grad_norm": 0.651530921459198, + "learning_rate": 1.7647934751341017e-05, + "loss": 0.1313, "step": 6950 }, { - "epoch": 0.17654524685873843, - "grad_norm": 0.6706276535987854, - "learning_rate": 1.882303168760841e-05, - "loss": 0.1661, + "epoch": 0.35306360728971015, + "grad_norm": 0.632590115070343, + "learning_rate": 1.76462426180686e-05, + "loss": 0.1437, "step": 6955 }, { - "epoch": 0.17667216651859374, - "grad_norm": 0.8716412782669067, - "learning_rate": 1.882218555654271e-05, - "loss": 0.1824, + "epoch": 0.35331742728057264, + "grad_norm": 0.5507182478904724, + "learning_rate": 1.7644550484796184e-05, + "loss": 0.1353, "step": 6960 }, { - "epoch": 0.17679908617844906, - "grad_norm": 0.6197476983070374, - "learning_rate": 1.8821339425477008e-05, - "loss": 0.1951, + "epoch": 0.3535712472714351, + "grad_norm": 0.5852574110031128, + "learning_rate": 1.7642858351523767e-05, + "loss": 0.1166, "step": 6965 }, { - "epoch": 0.17692600583830434, - "grad_norm": 0.7620746493339539, - "learning_rate": 1.8820493294411307e-05, - "loss": 0.1756, + "epoch": 0.35382506726229757, + "grad_norm": 0.4446243643760681, + "learning_rate": 1.764116621825135e-05, + "loss": 0.1333, "step": 6970 }, { - "epoch": 0.17705292549815965, - "grad_norm": 0.9096162915229797, - "learning_rate": 1.8819647163345605e-05, - "loss": 0.1615, + "epoch": 0.35407888725316006, + "grad_norm": 0.6388731598854065, + "learning_rate": 1.7639474084978934e-05, + "loss": 0.1312, "step": 6975 }, { - "epoch": 0.17717984515801496, - "grad_norm": 0.6968966722488403, - "learning_rate": 1.88188010322799e-05, - "loss": 0.1665, + "epoch": 0.35433270724402255, + "grad_norm": 0.4913591742515564, + "learning_rate": 1.7637781951706518e-05, + "loss": 0.1276, "step": 6980 }, { - "epoch": 0.17730676481787028, - "grad_norm": 0.6340092420578003, - "learning_rate": 1.88179549012142e-05, - "loss": 0.1686, + "epoch": 0.35458652723488504, + "grad_norm": 0.5663020014762878, + "learning_rate": 1.76360898184341e-05, + "loss": 0.1228, "step": 6985 }, { - "epoch": 0.1774336844777256, - "grad_norm": 0.6624810695648193, - "learning_rate": 1.8817108770148497e-05, - "loss": 0.1663, + "epoch": 0.3548403472257475, + "grad_norm": 0.3867751657962799, + "learning_rate": 1.7634397685161685e-05, + "loss": 0.1331, "step": 6990 }, { - "epoch": 0.1775606041375809, - "grad_norm": 0.6406440138816833, - "learning_rate": 1.8816262639082795e-05, - "loss": 0.1633, + "epoch": 0.35509416721660997, + "grad_norm": 1.012452483177185, + "learning_rate": 1.763270555188927e-05, + "loss": 0.1431, "step": 6995 }, { - "epoch": 0.17768752379743621, - "grad_norm": 0.7023237943649292, - "learning_rate": 1.8815416508017094e-05, - "loss": 0.1752, + "epoch": 0.35534798720747246, + "grad_norm": 0.5785044431686401, + "learning_rate": 1.7631013418616852e-05, + "loss": 0.146, "step": 7000 }, { - "epoch": 0.17781444345729153, - "grad_norm": 0.815453827381134, - "learning_rate": 1.8814570376951392e-05, - "loss": 0.1541, + "epoch": 0.35560180719833495, + "grad_norm": 0.5155715346336365, + "learning_rate": 1.7629321285344436e-05, + "loss": 0.1153, "step": 7005 }, { - "epoch": 0.17794136311714684, - "grad_norm": 0.9376952648162842, - "learning_rate": 1.881372424588569e-05, - "loss": 0.1868, + "epoch": 0.35585562718919744, + "grad_norm": 0.5367943644523621, + "learning_rate": 1.762762915207202e-05, + "loss": 0.125, "step": 7010 }, { - "epoch": 0.17806828277700215, - "grad_norm": 0.879621148109436, - "learning_rate": 1.881287811481999e-05, - "loss": 0.1775, + "epoch": 0.3561094471800599, + "grad_norm": 0.3979216516017914, + "learning_rate": 1.7625937018799603e-05, + "loss": 0.1232, "step": 7015 }, { - "epoch": 0.17819520243685746, - "grad_norm": 0.7836249470710754, - "learning_rate": 1.8812031983754284e-05, - "loss": 0.2011, + "epoch": 0.35636326717092237, + "grad_norm": 0.5183656215667725, + "learning_rate": 1.7624244885527186e-05, + "loss": 0.1181, "step": 7020 }, { - "epoch": 0.17832212209671278, - "grad_norm": 0.9400976300239563, - "learning_rate": 1.8811185852688582e-05, - "loss": 0.2065, + "epoch": 0.35661708716178486, + "grad_norm": 0.45130103826522827, + "learning_rate": 1.762255275225477e-05, + "loss": 0.1325, "step": 7025 }, { - "epoch": 0.1784490417565681, - "grad_norm": 1.2125358581542969, - "learning_rate": 1.881033972162288e-05, - "loss": 0.1743, + "epoch": 0.35687090715264735, + "grad_norm": 0.7615092992782593, + "learning_rate": 1.762086061898235e-05, + "loss": 0.1321, "step": 7030 }, { - "epoch": 0.1785759614164234, - "grad_norm": 0.8177736401557922, - "learning_rate": 1.880949359055718e-05, - "loss": 0.1814, + "epoch": 0.35712472714350985, + "grad_norm": 0.5867871046066284, + "learning_rate": 1.7619168485709937e-05, + "loss": 0.1184, "step": 7035 }, { - "epoch": 0.1787028810762787, - "grad_norm": 1.111288070678711, - "learning_rate": 1.8808647459491477e-05, - "loss": 0.1635, + "epoch": 0.3573785471343723, + "grad_norm": 1.0698941946029663, + "learning_rate": 1.761747635243752e-05, + "loss": 0.1272, "step": 7040 }, { - "epoch": 0.17882980073613403, - "grad_norm": 1.0142552852630615, - "learning_rate": 1.8807801328425776e-05, - "loss": 0.1775, + "epoch": 0.3576323671252348, + "grad_norm": 0.5040280222892761, + "learning_rate": 1.76157842191651e-05, + "loss": 0.1323, "step": 7045 }, { - "epoch": 0.17895672039598934, - "grad_norm": 0.809646487236023, - "learning_rate": 1.8806955197360074e-05, - "loss": 0.198, + "epoch": 0.35788618711609727, + "grad_norm": 0.49704474210739136, + "learning_rate": 1.7614092085892688e-05, + "loss": 0.133, "step": 7050 }, { - "epoch": 0.17908364005584465, - "grad_norm": 0.8719913363456726, - "learning_rate": 1.8806109066294373e-05, - "loss": 0.2035, + "epoch": 0.35814000710695976, + "grad_norm": 0.7625434398651123, + "learning_rate": 1.7612399952620268e-05, + "loss": 0.1295, "step": 7055 }, { - "epoch": 0.17921055971569996, - "grad_norm": 0.9484214186668396, - "learning_rate": 1.8805262935228668e-05, - "loss": 0.1535, + "epoch": 0.35839382709782225, + "grad_norm": 0.5237678289413452, + "learning_rate": 1.7610707819347855e-05, + "loss": 0.1282, "step": 7060 }, { - "epoch": 0.17933747937555528, - "grad_norm": 0.7689060568809509, - "learning_rate": 1.8804416804162966e-05, - "loss": 0.1369, + "epoch": 0.3586476470886847, + "grad_norm": 0.640903115272522, + "learning_rate": 1.7609015686075438e-05, + "loss": 0.1323, "step": 7065 }, { - "epoch": 0.1794643990354106, - "grad_norm": 0.8345035910606384, - "learning_rate": 1.8803570673097264e-05, - "loss": 0.1835, + "epoch": 0.3589014670795472, + "grad_norm": 0.5553655028343201, + "learning_rate": 1.760732355280302e-05, + "loss": 0.1261, "step": 7070 }, { - "epoch": 0.1795913186952659, - "grad_norm": 1.3943219184875488, - "learning_rate": 1.8802724542031563e-05, - "loss": 0.1824, + "epoch": 0.35915528707040967, + "grad_norm": 0.5053536891937256, + "learning_rate": 1.7605631419530605e-05, + "loss": 0.1298, "step": 7075 }, { - "epoch": 0.1797182383551212, - "grad_norm": 3.0737946033477783, - "learning_rate": 1.8801878410965858e-05, - "loss": 0.1772, + "epoch": 0.35940910706127216, + "grad_norm": 0.5184898972511292, + "learning_rate": 1.7603939286258186e-05, + "loss": 0.1348, "step": 7080 }, { - "epoch": 0.17984515801497652, - "grad_norm": 0.7701429724693298, - "learning_rate": 1.8801032279900156e-05, - "loss": 0.1713, + "epoch": 0.35966292705213465, + "grad_norm": 0.777260959148407, + "learning_rate": 1.760224715298577e-05, + "loss": 0.1373, "step": 7085 }, { - "epoch": 0.17997207767483184, - "grad_norm": 0.8493186235427856, - "learning_rate": 1.8800186148834455e-05, - "loss": 0.1692, + "epoch": 0.3599167470429971, + "grad_norm": 0.4676660895347595, + "learning_rate": 1.7600555019713356e-05, + "loss": 0.1254, "step": 7090 }, { - "epoch": 0.18009899733468715, - "grad_norm": 0.6952025890350342, - "learning_rate": 1.8799340017768753e-05, - "loss": 0.1824, + "epoch": 0.3601705670338596, + "grad_norm": 0.6567785739898682, + "learning_rate": 1.7598862886440936e-05, + "loss": 0.1165, "step": 7095 }, { - "epoch": 0.18022591699454246, - "grad_norm": 0.7369063496589661, - "learning_rate": 1.879849388670305e-05, - "loss": 0.1876, + "epoch": 0.36042438702472207, + "grad_norm": 0.7958646416664124, + "learning_rate": 1.7597170753168523e-05, + "loss": 0.1334, "step": 7100 }, { - "epoch": 0.18035283665439777, - "grad_norm": 0.6352242827415466, - "learning_rate": 1.879764775563735e-05, - "loss": 0.1277, + "epoch": 0.36067820701558456, + "grad_norm": 0.43118539452552795, + "learning_rate": 1.7595478619896103e-05, + "loss": 0.1203, "step": 7105 }, { - "epoch": 0.1804797563142531, - "grad_norm": 0.8525051474571228, - "learning_rate": 1.8796801624571648e-05, - "loss": 0.1853, + "epoch": 0.36093202700644705, + "grad_norm": 0.44612541794776917, + "learning_rate": 1.7593786486623687e-05, + "loss": 0.1049, "step": 7110 }, { - "epoch": 0.1806066759741084, - "grad_norm": 1.4094996452331543, - "learning_rate": 1.8795955493505947e-05, - "loss": 0.1857, + "epoch": 0.3611858469973095, + "grad_norm": 0.43221428990364075, + "learning_rate": 1.7592094353351274e-05, + "loss": 0.1152, "step": 7115 }, { - "epoch": 0.1807335956339637, - "grad_norm": 0.6135801672935486, - "learning_rate": 1.879510936244024e-05, - "loss": 0.1796, + "epoch": 0.361439666988172, + "grad_norm": 0.486248642206192, + "learning_rate": 1.7590402220078854e-05, + "loss": 0.1281, "step": 7120 }, { - "epoch": 0.18086051529381902, - "grad_norm": 0.8286238312721252, - "learning_rate": 1.879426323137454e-05, - "loss": 0.1411, + "epoch": 0.3616934869790345, + "grad_norm": 0.4399028420448303, + "learning_rate": 1.7588710086806437e-05, + "loss": 0.1321, "step": 7125 }, { - "epoch": 0.18098743495367434, - "grad_norm": 0.8748704195022583, - "learning_rate": 1.879341710030884e-05, - "loss": 0.1639, + "epoch": 0.36194730696989696, + "grad_norm": 0.6637834906578064, + "learning_rate": 1.758701795353402e-05, + "loss": 0.1285, "step": 7130 }, { - "epoch": 0.18111435461352965, - "grad_norm": 0.7812581658363342, - "learning_rate": 1.8792570969243137e-05, - "loss": 0.1881, + "epoch": 0.36220112696075946, + "grad_norm": 0.4433422386646271, + "learning_rate": 1.7585325820261605e-05, + "loss": 0.133, "step": 7135 }, { - "epoch": 0.18124127427338493, - "grad_norm": 0.8302584886550903, - "learning_rate": 1.8791724838177435e-05, - "loss": 0.1758, + "epoch": 0.3624549469516219, + "grad_norm": 0.7549034953117371, + "learning_rate": 1.7583633686989188e-05, + "loss": 0.1279, "step": 7140 }, { - "epoch": 0.18136819393324025, - "grad_norm": 0.6813400983810425, - "learning_rate": 1.8790878707111734e-05, - "loss": 0.1571, + "epoch": 0.3627087669424844, + "grad_norm": 0.48059457540512085, + "learning_rate": 1.758194155371677e-05, + "loss": 0.1149, "step": 7145 }, { - "epoch": 0.18149511359309556, - "grad_norm": 0.91890949010849, - "learning_rate": 1.8790032576046032e-05, - "loss": 0.1464, + "epoch": 0.3629625869333469, + "grad_norm": 0.582513689994812, + "learning_rate": 1.7580249420444355e-05, + "loss": 0.1423, "step": 7150 }, { - "epoch": 0.18162203325295087, - "grad_norm": 0.9063435196876526, - "learning_rate": 1.878918644498033e-05, - "loss": 0.1651, + "epoch": 0.36321640692420937, + "grad_norm": 0.6541072130203247, + "learning_rate": 1.757855728717194e-05, + "loss": 0.1229, "step": 7155 }, { - "epoch": 0.18174895291280618, - "grad_norm": 0.7701747417449951, - "learning_rate": 1.8788340313914626e-05, - "loss": 0.1573, + "epoch": 0.3634702269150718, + "grad_norm": 0.5201939344406128, + "learning_rate": 1.7576865153899522e-05, + "loss": 0.1194, "step": 7160 }, { - "epoch": 0.1818758725726615, - "grad_norm": 0.7381592988967896, - "learning_rate": 1.8787494182848924e-05, - "loss": 0.1477, + "epoch": 0.3637240469059343, + "grad_norm": 1.192158579826355, + "learning_rate": 1.7575173020627106e-05, + "loss": 0.1261, "step": 7165 }, { - "epoch": 0.1820027922325168, - "grad_norm": 0.6792211532592773, - "learning_rate": 1.8786648051783222e-05, - "loss": 0.1802, + "epoch": 0.3639778668967968, + "grad_norm": 0.5061715245246887, + "learning_rate": 1.757348088735469e-05, + "loss": 0.1282, "step": 7170 }, { - "epoch": 0.18212971189237212, - "grad_norm": 0.8941872715950012, - "learning_rate": 1.878580192071752e-05, - "loss": 0.1715, + "epoch": 0.3642316868876593, + "grad_norm": 0.45553848147392273, + "learning_rate": 1.7571788754082273e-05, + "loss": 0.1186, "step": 7175 }, { - "epoch": 0.18225663155222743, - "grad_norm": 0.6623015999794006, - "learning_rate": 1.878495578965182e-05, - "loss": 0.1567, + "epoch": 0.36448550687852177, + "grad_norm": 0.4875742793083191, + "learning_rate": 1.7570096620809856e-05, + "loss": 0.124, "step": 7180 }, { - "epoch": 0.18238355121208275, - "grad_norm": 1.0507497787475586, - "learning_rate": 1.8784109658586118e-05, - "loss": 0.1868, + "epoch": 0.3647393268693842, + "grad_norm": 0.39498981833457947, + "learning_rate": 1.756840448753744e-05, + "loss": 0.1223, "step": 7185 }, { - "epoch": 0.18251047087193806, - "grad_norm": 0.7597683072090149, - "learning_rate": 1.8783263527520416e-05, - "loss": 0.1614, + "epoch": 0.3649931468602467, + "grad_norm": 0.6347449421882629, + "learning_rate": 1.7566712354265023e-05, + "loss": 0.1164, "step": 7190 }, { - "epoch": 0.18263739053179337, - "grad_norm": 0.7641739845275879, - "learning_rate": 1.8782417396454714e-05, - "loss": 0.1528, + "epoch": 0.3652469668511092, + "grad_norm": 0.5973386764526367, + "learning_rate": 1.7565020220992607e-05, + "loss": 0.1207, "step": 7195 }, { - "epoch": 0.18276431019164868, - "grad_norm": 0.7833907008171082, - "learning_rate": 1.878157126538901e-05, - "loss": 0.1707, + "epoch": 0.3655007868419717, + "grad_norm": 0.44844862818717957, + "learning_rate": 1.756332808772019e-05, + "loss": 0.1268, "step": 7200 }, { - "epoch": 0.182891229851504, - "grad_norm": 1.176645278930664, - "learning_rate": 1.8780725134323308e-05, - "loss": 0.2023, + "epoch": 0.3657546068328342, + "grad_norm": 0.44825243949890137, + "learning_rate": 1.7561635954447774e-05, + "loss": 0.1333, "step": 7205 }, { - "epoch": 0.1830181495113593, - "grad_norm": 0.7155908942222595, - "learning_rate": 1.8779879003257606e-05, - "loss": 0.1795, + "epoch": 0.3660084268236966, + "grad_norm": 0.664547860622406, + "learning_rate": 1.7559943821175358e-05, + "loss": 0.1131, "step": 7210 }, { - "epoch": 0.18314506917121462, - "grad_norm": 0.6959702968597412, - "learning_rate": 1.8779032872191905e-05, - "loss": 0.1499, + "epoch": 0.3662622468145591, + "grad_norm": 0.5262232422828674, + "learning_rate": 1.755825168790294e-05, + "loss": 0.1314, "step": 7215 }, { - "epoch": 0.18327198883106993, - "grad_norm": 1.3303697109222412, - "learning_rate": 1.87781867411262e-05, - "loss": 0.1696, + "epoch": 0.3665160668054216, + "grad_norm": 0.5070418119430542, + "learning_rate": 1.7556559554630525e-05, + "loss": 0.1326, "step": 7220 }, { - "epoch": 0.18339890849092524, - "grad_norm": 0.6842988729476929, - "learning_rate": 1.8777340610060498e-05, - "loss": 0.1809, + "epoch": 0.3667698867962841, + "grad_norm": 0.7615456581115723, + "learning_rate": 1.7554867421358108e-05, + "loss": 0.1262, "step": 7225 }, { - "epoch": 0.18352582815078056, - "grad_norm": 0.7046438455581665, - "learning_rate": 1.8776494478994796e-05, - "loss": 0.1536, + "epoch": 0.3670237067871466, + "grad_norm": 0.44965729117393494, + "learning_rate": 1.7553175288085692e-05, + "loss": 0.1211, "step": 7230 }, { - "epoch": 0.18365274781063587, - "grad_norm": 0.6917726397514343, - "learning_rate": 1.8775648347929095e-05, - "loss": 0.1647, + "epoch": 0.367277526778009, + "grad_norm": 0.6415624022483826, + "learning_rate": 1.7551483154813272e-05, + "loss": 0.1188, "step": 7235 }, { - "epoch": 0.18377966747049118, - "grad_norm": 0.8281446695327759, - "learning_rate": 1.8774802216863393e-05, - "loss": 0.1871, + "epoch": 0.3675313467688715, + "grad_norm": 0.4776475131511688, + "learning_rate": 1.754979102154086e-05, + "loss": 0.1141, "step": 7240 }, { - "epoch": 0.1839065871303465, - "grad_norm": 0.9808202385902405, - "learning_rate": 1.877395608579769e-05, - "loss": 0.1682, + "epoch": 0.367785166759734, + "grad_norm": 0.4545101225376129, + "learning_rate": 1.7548098888268442e-05, + "loss": 0.1252, "step": 7245 }, { - "epoch": 0.1840335067902018, - "grad_norm": 1.0015809535980225, - "learning_rate": 1.877310995473199e-05, - "loss": 0.1666, + "epoch": 0.3680389867505965, + "grad_norm": 0.8184462189674377, + "learning_rate": 1.7546406754996026e-05, + "loss": 0.122, "step": 7250 }, { - "epoch": 0.18416042645005712, - "grad_norm": 0.7039207816123962, - "learning_rate": 1.877226382366629e-05, - "loss": 0.2084, + "epoch": 0.368292806741459, + "grad_norm": 0.46976208686828613, + "learning_rate": 1.754471462172361e-05, + "loss": 0.1219, "step": 7255 }, { - "epoch": 0.18428734610991243, - "grad_norm": 0.6634758114814758, - "learning_rate": 1.8771417692600587e-05, - "loss": 0.1546, + "epoch": 0.3685466267323214, + "grad_norm": 0.41548067331314087, + "learning_rate": 1.754302248845119e-05, + "loss": 0.1132, "step": 7260 }, { - "epoch": 0.18441426576976774, - "grad_norm": 0.551347017288208, - "learning_rate": 1.8770571561534882e-05, - "loss": 0.1512, + "epoch": 0.3688004467231839, + "grad_norm": 0.781281590461731, + "learning_rate": 1.7541330355178777e-05, + "loss": 0.1401, "step": 7265 }, { - "epoch": 0.18454118542962306, - "grad_norm": 0.6990922093391418, - "learning_rate": 1.876972543046918e-05, - "loss": 0.1652, + "epoch": 0.3690542667140464, + "grad_norm": 0.3721882700920105, + "learning_rate": 1.753963822190636e-05, + "loss": 0.1154, "step": 7270 }, { - "epoch": 0.18466810508947837, - "grad_norm": 0.9311314225196838, - "learning_rate": 1.876887929940348e-05, - "loss": 0.1767, + "epoch": 0.3693080867049089, + "grad_norm": 0.5015051960945129, + "learning_rate": 1.753794608863394e-05, + "loss": 0.1186, "step": 7275 }, { - "epoch": 0.18479502474933368, - "grad_norm": 1.135787010192871, - "learning_rate": 1.8768033168337777e-05, - "loss": 0.1492, + "epoch": 0.3695619066957714, + "grad_norm": 0.49192380905151367, + "learning_rate": 1.7536253955361527e-05, + "loss": 0.1115, "step": 7280 }, { - "epoch": 0.184921944409189, - "grad_norm": 0.7039095759391785, - "learning_rate": 1.8767187037272075e-05, - "loss": 0.1443, + "epoch": 0.3698157266866338, + "grad_norm": 0.4846615195274353, + "learning_rate": 1.7534561822089107e-05, + "loss": 0.1333, "step": 7285 }, { - "epoch": 0.1850488640690443, - "grad_norm": 0.8102678060531616, - "learning_rate": 1.8766340906206374e-05, - "loss": 0.1629, + "epoch": 0.3700695466774963, + "grad_norm": 0.40966522693634033, + "learning_rate": 1.753286968881669e-05, + "loss": 0.1185, "step": 7290 }, { - "epoch": 0.18517578372889962, - "grad_norm": 0.8359146118164062, - "learning_rate": 1.8765494775140672e-05, - "loss": 0.1787, + "epoch": 0.3703233666683588, + "grad_norm": 0.533069372177124, + "learning_rate": 1.7531177555544278e-05, + "loss": 0.1181, "step": 7295 }, { - "epoch": 0.18530270338875493, - "grad_norm": 0.9321544170379639, - "learning_rate": 1.876464864407497e-05, - "loss": 0.1502, + "epoch": 0.3705771866592213, + "grad_norm": 0.5515578985214233, + "learning_rate": 1.7529485422271858e-05, + "loss": 0.1148, "step": 7300 }, { - "epoch": 0.18542962304861024, - "grad_norm": 0.9030573964118958, - "learning_rate": 1.8763802513009266e-05, - "loss": 0.1444, + "epoch": 0.3708310066500838, + "grad_norm": 0.5015854239463806, + "learning_rate": 1.7527793288999445e-05, + "loss": 0.1193, "step": 7305 }, { - "epoch": 0.18555654270846555, - "grad_norm": 0.6359595060348511, - "learning_rate": 1.8762956381943564e-05, - "loss": 0.1621, + "epoch": 0.3710848266409462, + "grad_norm": 0.4574815630912781, + "learning_rate": 1.7526101155727025e-05, + "loss": 0.1355, "step": 7310 }, { - "epoch": 0.18568346236832084, - "grad_norm": 0.8706849813461304, - "learning_rate": 1.8762110250877862e-05, - "loss": 0.1971, + "epoch": 0.3713386466318087, + "grad_norm": 0.5332877039909363, + "learning_rate": 1.752440902245461e-05, + "loss": 0.1175, "step": 7315 }, { - "epoch": 0.18581038202817615, - "grad_norm": 0.6044660806655884, - "learning_rate": 1.876126411981216e-05, - "loss": 0.1487, + "epoch": 0.3715924666226712, + "grad_norm": 0.6135624647140503, + "learning_rate": 1.7522716889182196e-05, + "loss": 0.1268, "step": 7320 }, { - "epoch": 0.18593730168803146, - "grad_norm": 0.6026991009712219, - "learning_rate": 1.876041798874646e-05, - "loss": 0.1673, + "epoch": 0.3718462866135337, + "grad_norm": 0.5522362589836121, + "learning_rate": 1.7521024755909776e-05, + "loss": 0.1205, "step": 7325 }, { - "epoch": 0.18606422134788678, - "grad_norm": 0.9694281816482544, - "learning_rate": 1.8759571857680758e-05, - "loss": 0.164, + "epoch": 0.3721001066043962, + "grad_norm": 0.41892364621162415, + "learning_rate": 1.751933262263736e-05, + "loss": 0.1272, "step": 7330 }, { - "epoch": 0.1861911410077421, - "grad_norm": 0.6777485013008118, - "learning_rate": 1.8758725726615056e-05, - "loss": 0.1605, + "epoch": 0.3723539265952586, + "grad_norm": 0.5606332421302795, + "learning_rate": 1.7517640489364943e-05, + "loss": 0.1238, "step": 7335 }, { - "epoch": 0.1863180606675974, - "grad_norm": 1.025314211845398, - "learning_rate": 1.8757879595549354e-05, - "loss": 0.1537, + "epoch": 0.3726077465861211, + "grad_norm": 0.4218759834766388, + "learning_rate": 1.7515948356092526e-05, + "loss": 0.12, "step": 7340 }, { - "epoch": 0.18644498032745271, - "grad_norm": 0.7085960507392883, - "learning_rate": 1.875703346448365e-05, - "loss": 0.154, + "epoch": 0.3728615665769836, + "grad_norm": 0.45138901472091675, + "learning_rate": 1.7514256222820113e-05, + "loss": 0.1245, "step": 7345 }, { - "epoch": 0.18657189998730803, - "grad_norm": 1.149012565612793, - "learning_rate": 1.8756187333417948e-05, - "loss": 0.1552, + "epoch": 0.3731153865678461, + "grad_norm": 0.8056939244270325, + "learning_rate": 1.7512564089547694e-05, + "loss": 0.1328, "step": 7350 }, { - "epoch": 0.18669881964716334, - "grad_norm": 0.9013091921806335, - "learning_rate": 1.8755341202352246e-05, - "loss": 0.1732, + "epoch": 0.3733692065587086, + "grad_norm": 0.5511149168014526, + "learning_rate": 1.7510871956275277e-05, + "loss": 0.1121, "step": 7355 }, { - "epoch": 0.18682573930701865, - "grad_norm": 0.6369568109512329, - "learning_rate": 1.8754495071286545e-05, - "loss": 0.157, + "epoch": 0.373623026549571, + "grad_norm": 0.5154682993888855, + "learning_rate": 1.750917982300286e-05, + "loss": 0.132, "step": 7360 }, { - "epoch": 0.18695265896687396, - "grad_norm": 0.7926901578903198, - "learning_rate": 1.875364894022084e-05, - "loss": 0.1601, + "epoch": 0.3738768465404335, + "grad_norm": 1.9294079542160034, + "learning_rate": 1.7507487689730444e-05, + "loss": 0.1322, "step": 7365 }, { - "epoch": 0.18707957862672928, - "grad_norm": 0.8441897034645081, - "learning_rate": 1.8752802809155138e-05, - "loss": 0.1727, + "epoch": 0.374130666531296, + "grad_norm": 0.43836021423339844, + "learning_rate": 1.7505795556458028e-05, + "loss": 0.116, "step": 7370 }, { - "epoch": 0.1872064982865846, - "grad_norm": 0.9481744170188904, - "learning_rate": 1.8751956678089436e-05, - "loss": 0.1929, + "epoch": 0.3743844865221585, + "grad_norm": 0.43150243163108826, + "learning_rate": 1.750410342318561e-05, + "loss": 0.1197, "step": 7375 }, { - "epoch": 0.1873334179464399, - "grad_norm": 0.6957658529281616, - "learning_rate": 1.8751110547023735e-05, - "loss": 0.1619, + "epoch": 0.374638306513021, + "grad_norm": 0.606220543384552, + "learning_rate": 1.7502411289913195e-05, + "loss": 0.1232, "step": 7380 }, { - "epoch": 0.1874603376062952, - "grad_norm": 0.7399747967720032, - "learning_rate": 1.8750264415958033e-05, - "loss": 0.1858, + "epoch": 0.3748921265038834, + "grad_norm": 0.4498575031757355, + "learning_rate": 1.750071915664078e-05, + "loss": 0.1182, "step": 7385 }, { - "epoch": 0.18758725726615053, - "grad_norm": 1.5368715524673462, - "learning_rate": 1.874941828489233e-05, - "loss": 0.1812, + "epoch": 0.3751459464947459, + "grad_norm": 0.5238727331161499, + "learning_rate": 1.7499027023368362e-05, + "loss": 0.1433, "step": 7390 }, { - "epoch": 0.18771417692600584, - "grad_norm": 0.5746408700942993, - "learning_rate": 1.874857215382663e-05, - "loss": 0.1465, + "epoch": 0.3753997664856084, + "grad_norm": 0.5070326328277588, + "learning_rate": 1.7497334890095945e-05, + "loss": 0.1309, "step": 7395 }, { - "epoch": 0.18784109658586115, - "grad_norm": 0.5817438960075378, - "learning_rate": 1.874772602276093e-05, - "loss": 0.1477, + "epoch": 0.3756535864764709, + "grad_norm": 0.4144206643104553, + "learning_rate": 1.749564275682353e-05, + "loss": 0.1259, "step": 7400 }, { - "epoch": 0.18796801624571646, - "grad_norm": 0.8119709491729736, - "learning_rate": 1.8746879891695224e-05, - "loss": 0.162, + "epoch": 0.3759074064673334, + "grad_norm": 0.476362943649292, + "learning_rate": 1.7493950623551113e-05, + "loss": 0.1213, "step": 7405 }, { - "epoch": 0.18809493590557178, - "grad_norm": 0.7287126779556274, - "learning_rate": 1.8746033760629522e-05, - "loss": 0.1626, + "epoch": 0.37616122645819583, + "grad_norm": 0.41985994577407837, + "learning_rate": 1.7492258490278696e-05, + "loss": 0.1143, "step": 7410 }, { - "epoch": 0.1882218555654271, - "grad_norm": 0.6784968376159668, - "learning_rate": 1.874518762956382e-05, - "loss": 0.1733, + "epoch": 0.3764150464490583, + "grad_norm": 0.6084445714950562, + "learning_rate": 1.749056635700628e-05, + "loss": 0.1269, "step": 7415 }, { - "epoch": 0.1883487752252824, - "grad_norm": 0.731298565864563, - "learning_rate": 1.874434149849812e-05, - "loss": 0.1859, + "epoch": 0.3766688664399208, + "grad_norm": 0.39930278062820435, + "learning_rate": 1.7488874223733863e-05, + "loss": 0.1232, "step": 7420 }, { - "epoch": 0.1884756948851377, - "grad_norm": 0.8334957361221313, - "learning_rate": 1.8743495367432417e-05, - "loss": 0.1859, + "epoch": 0.3769226864307833, + "grad_norm": 0.4924406111240387, + "learning_rate": 1.7487182090461447e-05, + "loss": 0.124, "step": 7425 }, { - "epoch": 0.18860261454499302, - "grad_norm": 0.6941249966621399, - "learning_rate": 1.8742649236366716e-05, - "loss": 0.187, + "epoch": 0.3771765064216458, + "grad_norm": 0.5184084177017212, + "learning_rate": 1.748548995718903e-05, + "loss": 0.1227, "step": 7430 }, { - "epoch": 0.18872953420484834, - "grad_norm": 0.8187282085418701, - "learning_rate": 1.8741803105301014e-05, - "loss": 0.1722, + "epoch": 0.37743032641250823, + "grad_norm": 0.5747602581977844, + "learning_rate": 1.7483797823916614e-05, + "loss": 0.1309, "step": 7435 }, { - "epoch": 0.18885645386470365, - "grad_norm": 0.5423329472541809, - "learning_rate": 1.8740956974235312e-05, - "loss": 0.145, + "epoch": 0.3776841464033707, + "grad_norm": 0.6429848670959473, + "learning_rate": 1.7482105690644194e-05, + "loss": 0.1202, "step": 7440 }, { - "epoch": 0.18898337352455896, - "grad_norm": 0.799338698387146, - "learning_rate": 1.8740110843169607e-05, - "loss": 0.1317, + "epoch": 0.3779379663942332, + "grad_norm": 0.6303040981292725, + "learning_rate": 1.748041355737178e-05, + "loss": 0.119, "step": 7445 }, { - "epoch": 0.18911029318441427, - "grad_norm": 0.8490557670593262, - "learning_rate": 1.8739264712103906e-05, - "loss": 0.1692, + "epoch": 0.3781917863850957, + "grad_norm": 0.8696273565292358, + "learning_rate": 1.7478721424099364e-05, + "loss": 0.1191, "step": 7450 }, { - "epoch": 0.1892372128442696, - "grad_norm": 0.929019033908844, - "learning_rate": 1.8738418581038204e-05, - "loss": 0.182, + "epoch": 0.37844560637595814, + "grad_norm": 0.7249330878257751, + "learning_rate": 1.7477029290826948e-05, + "loss": 0.1285, "step": 7455 }, { - "epoch": 0.1893641325041249, - "grad_norm": 0.6385636329650879, - "learning_rate": 1.8737572449972503e-05, - "loss": 0.141, + "epoch": 0.37869942636682064, + "grad_norm": 0.37654367089271545, + "learning_rate": 1.747533715755453e-05, + "loss": 0.1287, "step": 7460 }, { - "epoch": 0.1894910521639802, - "grad_norm": 0.6605702638626099, - "learning_rate": 1.87367263189068e-05, - "loss": 0.1561, + "epoch": 0.3789532463576831, + "grad_norm": 0.3263428807258606, + "learning_rate": 1.747364502428211e-05, + "loss": 0.109, "step": 7465 }, { - "epoch": 0.18961797182383552, - "grad_norm": 0.6947289109230042, - "learning_rate": 1.87358801878411e-05, - "loss": 0.1634, + "epoch": 0.3792070663485456, + "grad_norm": 0.5799312591552734, + "learning_rate": 1.74719528910097e-05, + "loss": 0.1226, "step": 7470 }, { - "epoch": 0.18974489148369084, - "grad_norm": 0.6596311926841736, - "learning_rate": 1.8735034056775398e-05, - "loss": 0.1809, + "epoch": 0.3794608863394081, + "grad_norm": 0.47346293926239014, + "learning_rate": 1.7470260757737282e-05, + "loss": 0.1142, "step": 7475 }, { - "epoch": 0.18987181114354615, - "grad_norm": 1.3895491361618042, - "learning_rate": 1.8734187925709696e-05, - "loss": 0.154, + "epoch": 0.37971470633027055, + "grad_norm": 0.44059112668037415, + "learning_rate": 1.7468568624464862e-05, + "loss": 0.1104, "step": 7480 }, { - "epoch": 0.18999873080340143, - "grad_norm": 0.6460750699043274, - "learning_rate": 1.873334179464399e-05, - "loss": 0.1578, + "epoch": 0.37996852632113304, + "grad_norm": 0.5601668357849121, + "learning_rate": 1.746687649119245e-05, + "loss": 0.1318, "step": 7485 }, { - "epoch": 0.19012565046325675, - "grad_norm": 0.7274441123008728, - "learning_rate": 1.873249566357829e-05, - "loss": 0.1672, + "epoch": 0.38022234631199553, + "grad_norm": 0.6741954684257507, + "learning_rate": 1.746518435792003e-05, + "loss": 0.123, "step": 7490 }, { - "epoch": 0.19025257012311206, - "grad_norm": 1.3003753423690796, - "learning_rate": 1.8731649532512588e-05, - "loss": 0.2023, + "epoch": 0.380476166302858, + "grad_norm": 0.7247961759567261, + "learning_rate": 1.7463492224647616e-05, + "loss": 0.1275, "step": 7495 }, { - "epoch": 0.19037948978296737, - "grad_norm": 0.8084313273429871, - "learning_rate": 1.8730803401446886e-05, - "loss": 0.1477, + "epoch": 0.3807299862937205, + "grad_norm": 0.4358140826225281, + "learning_rate": 1.74618000913752e-05, + "loss": 0.1214, "step": 7500 }, { - "epoch": 0.19050640944282268, - "grad_norm": 0.791117250919342, - "learning_rate": 1.872995727038118e-05, - "loss": 0.1542, + "epoch": 0.38098380628458295, + "grad_norm": 0.35858985781669617, + "learning_rate": 1.746010795810278e-05, + "loss": 0.1377, "step": 7505 }, { - "epoch": 0.190633329102678, - "grad_norm": 0.7385177612304688, - "learning_rate": 1.872911113931548e-05, - "loss": 0.1719, + "epoch": 0.38123762627544544, + "grad_norm": 0.5448585152626038, + "learning_rate": 1.7458415824830367e-05, + "loss": 0.1244, "step": 7510 }, { - "epoch": 0.1907602487625333, - "grad_norm": 0.72374427318573, - "learning_rate": 1.8728265008249778e-05, - "loss": 0.1801, + "epoch": 0.38149144626630793, + "grad_norm": 0.6085044145584106, + "learning_rate": 1.7456723691557947e-05, + "loss": 0.1137, "step": 7515 }, { - "epoch": 0.19088716842238862, - "grad_norm": 0.6721182465553284, - "learning_rate": 1.8727418877184077e-05, - "loss": 0.1843, + "epoch": 0.3817452662571704, + "grad_norm": 0.43685221672058105, + "learning_rate": 1.745503155828553e-05, + "loss": 0.1193, "step": 7520 }, { - "epoch": 0.19101408808224393, - "grad_norm": 0.7877911925315857, - "learning_rate": 1.8726572746118375e-05, - "loss": 0.1916, + "epoch": 0.3819990862480329, + "grad_norm": 0.4632774293422699, + "learning_rate": 1.7453339425013118e-05, + "loss": 0.1253, "step": 7525 }, { - "epoch": 0.19114100774209924, - "grad_norm": 0.817273736000061, - "learning_rate": 1.8725726615052673e-05, - "loss": 0.1731, + "epoch": 0.38225290623889535, + "grad_norm": 0.67642742395401, + "learning_rate": 1.7451647291740698e-05, + "loss": 0.1173, "step": 7530 }, { - "epoch": 0.19126792740195456, - "grad_norm": 0.5845680236816406, - "learning_rate": 1.8724880483986972e-05, - "loss": 0.1698, + "epoch": 0.38250672622975784, + "grad_norm": 0.5087795853614807, + "learning_rate": 1.744995515846828e-05, + "loss": 0.1271, "step": 7535 }, { - "epoch": 0.19139484706180987, - "grad_norm": 0.8731700778007507, - "learning_rate": 1.872403435292127e-05, - "loss": 0.168, + "epoch": 0.38276054622062033, + "grad_norm": 0.5182584524154663, + "learning_rate": 1.7448263025195865e-05, + "loss": 0.1189, "step": 7540 }, { - "epoch": 0.19152176672166518, - "grad_norm": 0.8915535807609558, - "learning_rate": 1.8723188221855565e-05, - "loss": 0.1778, + "epoch": 0.3830143662114828, + "grad_norm": 0.6090805530548096, + "learning_rate": 1.744657089192345e-05, + "loss": 0.1135, "step": 7545 }, { - "epoch": 0.1916486863815205, - "grad_norm": 0.793067991733551, - "learning_rate": 1.8722342090789864e-05, - "loss": 0.1501, + "epoch": 0.3832681862023453, + "grad_norm": 0.558914065361023, + "learning_rate": 1.7444878758651035e-05, + "loss": 0.1275, "step": 7550 }, { - "epoch": 0.1917756060413758, - "grad_norm": 3.4339993000030518, - "learning_rate": 1.8721495959724162e-05, - "loss": 0.1355, + "epoch": 0.38352200619320775, + "grad_norm": 0.4196544885635376, + "learning_rate": 1.7443186625378615e-05, + "loss": 0.1208, "step": 7555 }, { - "epoch": 0.19190252570123112, - "grad_norm": 0.695007860660553, - "learning_rate": 1.872064982865846e-05, - "loss": 0.1622, + "epoch": 0.38377582618407025, + "grad_norm": 0.6540439128875732, + "learning_rate": 1.74414944921062e-05, + "loss": 0.1332, "step": 7560 }, { - "epoch": 0.19202944536108643, - "grad_norm": 0.6344842910766602, - "learning_rate": 1.871980369759276e-05, - "loss": 0.1531, + "epoch": 0.38402964617493274, + "grad_norm": 0.9480463266372681, + "learning_rate": 1.7439802358833783e-05, + "loss": 0.1152, "step": 7565 }, { - "epoch": 0.19215636502094174, - "grad_norm": 0.6114038825035095, - "learning_rate": 1.8718957566527057e-05, - "loss": 0.1376, + "epoch": 0.38428346616579523, + "grad_norm": 0.68366539478302, + "learning_rate": 1.7438110225561366e-05, + "loss": 0.1121, "step": 7570 }, { - "epoch": 0.19228328468079706, - "grad_norm": 0.6089299917221069, - "learning_rate": 1.8718111435461356e-05, - "loss": 0.1669, + "epoch": 0.3845372861566577, + "grad_norm": 0.7604976296424866, + "learning_rate": 1.743641809228895e-05, + "loss": 0.1317, "step": 7575 }, { - "epoch": 0.19241020434065237, - "grad_norm": 0.678822934627533, - "learning_rate": 1.8717265304395654e-05, - "loss": 0.1462, + "epoch": 0.38479110614752016, + "grad_norm": 0.704014003276825, + "learning_rate": 1.7434725959016533e-05, + "loss": 0.1103, "step": 7580 }, { - "epoch": 0.19253712400050768, - "grad_norm": 0.6496794819831848, - "learning_rate": 1.871641917332995e-05, - "loss": 0.1591, + "epoch": 0.38504492613838265, + "grad_norm": 0.5294590592384338, + "learning_rate": 1.7433033825744117e-05, + "loss": 0.1276, "step": 7585 }, { - "epoch": 0.192664043660363, - "grad_norm": 0.8509161472320557, - "learning_rate": 1.8715573042264247e-05, - "loss": 0.1652, + "epoch": 0.38529874612924514, + "grad_norm": 0.3316403925418854, + "learning_rate": 1.74313416924717e-05, + "loss": 0.1062, "step": 7590 }, { - "epoch": 0.1927909633202183, - "grad_norm": 0.5538321733474731, - "learning_rate": 1.8714726911198546e-05, - "loss": 0.1907, + "epoch": 0.38555256612010763, + "grad_norm": 0.48089930415153503, + "learning_rate": 1.7429649559199284e-05, + "loss": 0.114, "step": 7595 }, { - "epoch": 0.19291788298007362, - "grad_norm": 0.8712360858917236, - "learning_rate": 1.8713880780132844e-05, - "loss": 0.1362, + "epoch": 0.3858063861109701, + "grad_norm": 0.4831618368625641, + "learning_rate": 1.7427957425926867e-05, + "loss": 0.1184, "step": 7600 }, { - "epoch": 0.19304480263992893, - "grad_norm": 1.6256508827209473, - "learning_rate": 1.8713034649067143e-05, - "loss": 0.1604, + "epoch": 0.38606020610183256, + "grad_norm": 0.49979323148727417, + "learning_rate": 1.742626529265445e-05, + "loss": 0.1335, "step": 7605 }, { - "epoch": 0.19317172229978424, - "grad_norm": 0.7028308510780334, - "learning_rate": 1.871218851800144e-05, - "loss": 0.1744, + "epoch": 0.38631402609269505, + "grad_norm": 1.3292902708053589, + "learning_rate": 1.7424573159382034e-05, + "loss": 0.1143, "step": 7610 }, { - "epoch": 0.19329864195963956, - "grad_norm": 1.1182763576507568, - "learning_rate": 1.871134238693574e-05, - "loss": 0.1789, + "epoch": 0.38656784608355754, + "grad_norm": 0.4008750915527344, + "learning_rate": 1.7422881026109618e-05, + "loss": 0.1229, "step": 7615 }, { - "epoch": 0.19342556161949487, - "grad_norm": 0.8550707101821899, - "learning_rate": 1.8710496255870038e-05, - "loss": 0.1467, + "epoch": 0.38682166607442003, + "grad_norm": 0.5050025582313538, + "learning_rate": 1.74211888928372e-05, + "loss": 0.1137, "step": 7620 }, { - "epoch": 0.19355248127935018, - "grad_norm": 0.7218382954597473, - "learning_rate": 1.8709650124804333e-05, - "loss": 0.1525, + "epoch": 0.3870754860652825, + "grad_norm": 0.7327058911323547, + "learning_rate": 1.7419496759564785e-05, + "loss": 0.1408, "step": 7625 }, { - "epoch": 0.1936794009392055, - "grad_norm": 0.9323040246963501, - "learning_rate": 1.870880399373863e-05, - "loss": 0.1467, + "epoch": 0.38732930605614496, + "grad_norm": 0.5808429718017578, + "learning_rate": 1.741780462629237e-05, + "loss": 0.1055, "step": 7630 }, { - "epoch": 0.1938063205990608, - "grad_norm": 0.52815842628479, - "learning_rate": 1.870795786267293e-05, - "loss": 0.1595, + "epoch": 0.38758312604700745, + "grad_norm": 1.0923219919204712, + "learning_rate": 1.7416112493019952e-05, + "loss": 0.128, "step": 7635 }, { - "epoch": 0.19393324025891612, - "grad_norm": 0.5932378768920898, - "learning_rate": 1.8707111731607228e-05, - "loss": 0.1374, + "epoch": 0.38783694603786995, + "grad_norm": 0.6931188106536865, + "learning_rate": 1.7414420359747536e-05, + "loss": 0.119, "step": 7640 }, { - "epoch": 0.19406015991877143, - "grad_norm": 1.3722801208496094, - "learning_rate": 1.8706265600541523e-05, - "loss": 0.1937, + "epoch": 0.38809076602873244, + "grad_norm": 0.7696263790130615, + "learning_rate": 1.741272822647512e-05, + "loss": 0.1138, "step": 7645 }, { - "epoch": 0.19418707957862674, - "grad_norm": 0.6893827319145203, - "learning_rate": 1.870541946947582e-05, - "loss": 0.1927, + "epoch": 0.38834458601959493, + "grad_norm": 0.5164752006530762, + "learning_rate": 1.7411036093202703e-05, + "loss": 0.1151, "step": 7650 }, { - "epoch": 0.19431399923848205, - "grad_norm": 1.206262469291687, - "learning_rate": 1.870457333841012e-05, - "loss": 0.136, + "epoch": 0.38859840601045736, + "grad_norm": 0.4094414710998535, + "learning_rate": 1.7409343959930286e-05, + "loss": 0.1241, "step": 7655 }, { - "epoch": 0.19444091889833734, - "grad_norm": 0.8460313081741333, - "learning_rate": 1.8703727207344418e-05, - "loss": 0.1495, + "epoch": 0.38885222600131986, + "grad_norm": 0.43349120020866394, + "learning_rate": 1.740765182665787e-05, + "loss": 0.1269, "step": 7660 }, { - "epoch": 0.19456783855819265, - "grad_norm": 1.018388271331787, - "learning_rate": 1.8702881076278717e-05, - "loss": 0.1625, + "epoch": 0.38910604599218235, + "grad_norm": 0.5583718419075012, + "learning_rate": 1.7405959693385453e-05, + "loss": 0.1239, "step": 7665 }, { - "epoch": 0.19469475821804796, - "grad_norm": 0.6767957210540771, - "learning_rate": 1.8702034945213015e-05, - "loss": 0.1459, + "epoch": 0.38935986598304484, + "grad_norm": 0.45432811975479126, + "learning_rate": 1.7404267560113034e-05, + "loss": 0.1353, "step": 7670 }, { - "epoch": 0.19482167787790328, - "grad_norm": 0.5677594542503357, - "learning_rate": 1.8701188814147313e-05, - "loss": 0.1532, + "epoch": 0.38961368597390733, + "grad_norm": 0.4887847900390625, + "learning_rate": 1.740257542684062e-05, + "loss": 0.1189, "step": 7675 }, { - "epoch": 0.1949485975377586, - "grad_norm": 0.8595093488693237, - "learning_rate": 1.8700342683081612e-05, - "loss": 0.1688, + "epoch": 0.38986750596476977, + "grad_norm": 0.6936730146408081, + "learning_rate": 1.7400883293568204e-05, + "loss": 0.1206, "step": 7680 }, { - "epoch": 0.1950755171976139, - "grad_norm": 1.6635637283325195, - "learning_rate": 1.8699496552015907e-05, - "loss": 0.1617, + "epoch": 0.39012132595563226, + "grad_norm": 0.49213695526123047, + "learning_rate": 1.7399191160295784e-05, + "loss": 0.1092, "step": 7685 }, { - "epoch": 0.1952024368574692, - "grad_norm": 0.550910234451294, - "learning_rate": 1.8698650420950205e-05, - "loss": 0.1606, + "epoch": 0.39037514594649475, + "grad_norm": 0.38413718342781067, + "learning_rate": 1.739749902702337e-05, + "loss": 0.1195, "step": 7690 }, { - "epoch": 0.19532935651732453, - "grad_norm": 1.0446285009384155, - "learning_rate": 1.8697804289884504e-05, - "loss": 0.1924, + "epoch": 0.39062896593735724, + "grad_norm": 0.5132599472999573, + "learning_rate": 1.739580689375095e-05, + "loss": 0.1336, "step": 7695 }, { - "epoch": 0.19545627617717984, - "grad_norm": 0.6521877646446228, - "learning_rate": 1.8696958158818802e-05, - "loss": 0.1841, + "epoch": 0.39088278592821973, + "grad_norm": 0.5477653741836548, + "learning_rate": 1.7394114760478538e-05, + "loss": 0.1258, "step": 7700 }, { - "epoch": 0.19558319583703515, - "grad_norm": 0.7004679441452026, - "learning_rate": 1.86961120277531e-05, - "loss": 0.1538, + "epoch": 0.39113660591908217, + "grad_norm": 0.3821299076080322, + "learning_rate": 1.7392422627206122e-05, + "loss": 0.1269, "step": 7705 }, { - "epoch": 0.19571011549689046, - "grad_norm": 0.8821632266044617, - "learning_rate": 1.86952658966874e-05, - "loss": 0.1764, + "epoch": 0.39139042590994466, + "grad_norm": 0.4275185167789459, + "learning_rate": 1.7390730493933702e-05, + "loss": 0.1288, "step": 7710 }, { - "epoch": 0.19583703515674578, - "grad_norm": 0.728492021560669, - "learning_rate": 1.8694419765621697e-05, - "loss": 0.1348, + "epoch": 0.39164424590080715, + "grad_norm": 0.6078634858131409, + "learning_rate": 1.738903836066129e-05, + "loss": 0.1171, "step": 7715 }, { - "epoch": 0.1959639548166011, - "grad_norm": 0.9274089932441711, - "learning_rate": 1.8693573634555996e-05, - "loss": 0.1537, + "epoch": 0.39189806589166964, + "grad_norm": 0.5919456481933594, + "learning_rate": 1.738734622738887e-05, + "loss": 0.1187, "step": 7720 }, { - "epoch": 0.1960908744764564, - "grad_norm": 0.5917150974273682, - "learning_rate": 1.869272750349029e-05, - "loss": 0.153, + "epoch": 0.3921518858825321, + "grad_norm": 0.3606504797935486, + "learning_rate": 1.7385654094116453e-05, + "loss": 0.1196, "step": 7725 }, { - "epoch": 0.1962177941363117, - "grad_norm": 0.652963399887085, - "learning_rate": 1.869188137242459e-05, - "loss": 0.1504, + "epoch": 0.3924057058733946, + "grad_norm": 0.5512848496437073, + "learning_rate": 1.738396196084404e-05, + "loss": 0.1143, "step": 7730 }, { - "epoch": 0.19634471379616703, - "grad_norm": 1.1884173154830933, - "learning_rate": 1.8691035241358888e-05, - "loss": 0.193, + "epoch": 0.39265952586425706, + "grad_norm": 0.5730594396591187, + "learning_rate": 1.738226982757162e-05, + "loss": 0.1215, "step": 7735 }, { - "epoch": 0.19647163345602234, - "grad_norm": 0.6579567193984985, - "learning_rate": 1.8690189110293186e-05, - "loss": 0.1629, + "epoch": 0.39291334585511956, + "grad_norm": 0.4641408622264862, + "learning_rate": 1.7380577694299207e-05, + "loss": 0.117, "step": 7740 }, { - "epoch": 0.19659855311587765, - "grad_norm": 0.6726020574569702, - "learning_rate": 1.8689342979227484e-05, - "loss": 0.1496, + "epoch": 0.39316716584598205, + "grad_norm": 0.417926162481308, + "learning_rate": 1.7378885561026787e-05, + "loss": 0.1095, "step": 7745 }, { - "epoch": 0.19672547277573296, - "grad_norm": 2.16939377784729, - "learning_rate": 1.8688496848161783e-05, - "loss": 0.1682, + "epoch": 0.3934209858368445, + "grad_norm": 0.6021568775177002, + "learning_rate": 1.737719342775437e-05, + "loss": 0.1163, "step": 7750 }, { - "epoch": 0.19685239243558827, - "grad_norm": 1.3795497417449951, - "learning_rate": 1.868765071709608e-05, - "loss": 0.1537, + "epoch": 0.393674805827707, + "grad_norm": 0.5516533851623535, + "learning_rate": 1.7375501294481957e-05, + "loss": 0.1147, "step": 7755 }, { - "epoch": 0.1969793120954436, - "grad_norm": 0.6639158725738525, - "learning_rate": 1.868680458603038e-05, - "loss": 0.1776, + "epoch": 0.39392862581856947, + "grad_norm": 0.49156588315963745, + "learning_rate": 1.7373809161209537e-05, + "loss": 0.119, "step": 7760 }, { - "epoch": 0.1971062317552989, - "grad_norm": 1.0337809324264526, - "learning_rate": 1.8685958454964678e-05, - "loss": 0.1803, + "epoch": 0.39418244580943196, + "grad_norm": 0.546325147151947, + "learning_rate": 1.737211702793712e-05, + "loss": 0.1385, "step": 7765 }, { - "epoch": 0.1972331514151542, - "grad_norm": 1.2481013536453247, - "learning_rate": 1.8685112323898973e-05, - "loss": 0.1872, + "epoch": 0.39443626580029445, + "grad_norm": 0.4427269399166107, + "learning_rate": 1.7370424894664704e-05, + "loss": 0.1237, "step": 7770 }, { - "epoch": 0.19736007107500952, - "grad_norm": 0.6991986036300659, - "learning_rate": 1.868426619283327e-05, - "loss": 0.1316, + "epoch": 0.3946900857911569, + "grad_norm": 0.42994052171707153, + "learning_rate": 1.7368732761392288e-05, + "loss": 0.107, "step": 7775 }, { - "epoch": 0.19748699073486484, - "grad_norm": 0.5837782621383667, - "learning_rate": 1.868342006176757e-05, - "loss": 0.1622, + "epoch": 0.3949439057820194, + "grad_norm": 0.4181389808654785, + "learning_rate": 1.736704062811987e-05, + "loss": 0.0955, "step": 7780 }, { - "epoch": 0.19761391039472015, - "grad_norm": 0.6858274936676025, - "learning_rate": 1.8682573930701868e-05, - "loss": 0.1566, + "epoch": 0.39519772577288187, + "grad_norm": 0.599090576171875, + "learning_rate": 1.7365348494847455e-05, + "loss": 0.1237, "step": 7785 }, { - "epoch": 0.19774083005457546, - "grad_norm": 0.7206102013587952, - "learning_rate": 1.8681727799636163e-05, - "loss": 0.1645, + "epoch": 0.39545154576374436, + "grad_norm": 0.5317444801330566, + "learning_rate": 1.736365636157504e-05, + "loss": 0.1143, "step": 7790 }, { - "epoch": 0.19786774971443077, - "grad_norm": 0.6292104721069336, - "learning_rate": 1.868088166857046e-05, - "loss": 0.1402, + "epoch": 0.39570536575460685, + "grad_norm": 0.5095632672309875, + "learning_rate": 1.7361964228302622e-05, + "loss": 0.1297, "step": 7795 }, { - "epoch": 0.1979946693742861, - "grad_norm": 0.8133127689361572, - "learning_rate": 1.868003553750476e-05, - "loss": 0.1535, + "epoch": 0.3959591857454693, + "grad_norm": 0.7343167662620544, + "learning_rate": 1.7360272095030206e-05, + "loss": 0.1296, "step": 7800 }, { - "epoch": 0.1981215890341414, - "grad_norm": 1.0243924856185913, - "learning_rate": 1.867918940643906e-05, - "loss": 0.1491, + "epoch": 0.3962130057363318, + "grad_norm": 0.7114145755767822, + "learning_rate": 1.735857996175779e-05, + "loss": 0.1186, "step": 7805 }, { - "epoch": 0.1982485086939967, - "grad_norm": 0.5368655920028687, - "learning_rate": 1.8678343275373357e-05, - "loss": 0.1274, + "epoch": 0.39646682572719427, + "grad_norm": 0.52663254737854, + "learning_rate": 1.7356887828485373e-05, + "loss": 0.1318, "step": 7810 }, { - "epoch": 0.19837542835385202, - "grad_norm": 0.7077710628509521, - "learning_rate": 1.8677497144307655e-05, - "loss": 0.1675, + "epoch": 0.39672064571805676, + "grad_norm": 0.33781489729881287, + "learning_rate": 1.7355195695212956e-05, + "loss": 0.1075, "step": 7815 }, { - "epoch": 0.19850234801370734, - "grad_norm": 0.7949740886688232, - "learning_rate": 1.8676651013241954e-05, - "loss": 0.1451, + "epoch": 0.39697446570891926, + "grad_norm": 0.8520582914352417, + "learning_rate": 1.735350356194054e-05, + "loss": 0.1175, "step": 7820 }, { - "epoch": 0.19862926767356265, - "grad_norm": 0.7462958097457886, - "learning_rate": 1.8675804882176252e-05, - "loss": 0.1616, + "epoch": 0.3972282856997817, + "grad_norm": 0.448352187871933, + "learning_rate": 1.7351811428668123e-05, + "loss": 0.1189, "step": 7825 }, { - "epoch": 0.19875618733341793, - "grad_norm": 0.7694410681724548, - "learning_rate": 1.8674958751110547e-05, - "loss": 0.1614, + "epoch": 0.3974821056906442, + "grad_norm": 1.0264869928359985, + "learning_rate": 1.7350119295395707e-05, + "loss": 0.1208, "step": 7830 }, { - "epoch": 0.19888310699327325, - "grad_norm": 1.3424712419509888, - "learning_rate": 1.8674112620044845e-05, - "loss": 0.1606, + "epoch": 0.3977359256815067, + "grad_norm": 0.4782174527645111, + "learning_rate": 1.734842716212329e-05, + "loss": 0.1211, "step": 7835 }, { - "epoch": 0.19901002665312856, - "grad_norm": 0.7225872278213501, - "learning_rate": 1.8673266488979144e-05, - "loss": 0.1463, + "epoch": 0.39798974567236917, + "grad_norm": 0.41531625390052795, + "learning_rate": 1.7346735028850874e-05, + "loss": 0.1131, "step": 7840 }, { - "epoch": 0.19913694631298387, - "grad_norm": 0.5036975741386414, - "learning_rate": 1.8672420357913442e-05, - "loss": 0.1459, + "epoch": 0.39824356566323166, + "grad_norm": 0.5510904788970947, + "learning_rate": 1.7345042895578458e-05, + "loss": 0.1068, "step": 7845 }, { - "epoch": 0.19926386597283918, - "grad_norm": 0.7603396773338318, - "learning_rate": 1.867157422684774e-05, - "loss": 0.1604, + "epoch": 0.3984973856540941, + "grad_norm": 0.6751682758331299, + "learning_rate": 1.734335076230604e-05, + "loss": 0.1381, "step": 7850 }, { - "epoch": 0.1993907856326945, - "grad_norm": 1.0169436931610107, - "learning_rate": 1.867072809578204e-05, - "loss": 0.1469, + "epoch": 0.3987512056449566, + "grad_norm": 0.47615987062454224, + "learning_rate": 1.7341658629033625e-05, + "loss": 0.1038, "step": 7855 }, { - "epoch": 0.1995177052925498, - "grad_norm": 0.7359219193458557, - "learning_rate": 1.8669881964716337e-05, - "loss": 0.1511, + "epoch": 0.3990050256358191, + "grad_norm": 0.5570587515830994, + "learning_rate": 1.7339966495761208e-05, + "loss": 0.111, "step": 7860 }, { - "epoch": 0.19964462495240512, - "grad_norm": 0.5629939436912537, - "learning_rate": 1.8669035833650636e-05, - "loss": 0.1718, + "epoch": 0.39925884562668157, + "grad_norm": 0.5412005186080933, + "learning_rate": 1.7338274362488792e-05, + "loss": 0.1264, "step": 7865 }, { - "epoch": 0.19977154461226043, - "grad_norm": 0.6710687875747681, - "learning_rate": 1.866818970258493e-05, - "loss": 0.1572, + "epoch": 0.39951266561754406, + "grad_norm": 1.4431235790252686, + "learning_rate": 1.7336582229216375e-05, + "loss": 0.1239, "step": 7870 }, { - "epoch": 0.19989846427211574, - "grad_norm": 0.6223983764648438, - "learning_rate": 1.866734357151923e-05, - "loss": 0.143, + "epoch": 0.3997664856084065, + "grad_norm": 0.3393210768699646, + "learning_rate": 1.7334890095943956e-05, + "loss": 0.1383, "step": 7875 }, { - "epoch": 0.20002538393197106, - "grad_norm": 0.6477605104446411, - "learning_rate": 1.8666497440453528e-05, - "loss": 0.1397, + "epoch": 0.400020305599269, + "grad_norm": 0.5208285450935364, + "learning_rate": 1.7333197962671542e-05, + "loss": 0.1172, "step": 7880 }, { - "epoch": 0.20015230359182637, - "grad_norm": 0.6845738291740417, - "learning_rate": 1.8665651309387826e-05, - "loss": 0.1429, + "epoch": 0.4002741255901315, + "grad_norm": 0.5236523747444153, + "learning_rate": 1.7331505829399126e-05, + "loss": 0.1098, "step": 7885 }, { - "epoch": 0.20027922325168168, - "grad_norm": 0.8847113847732544, - "learning_rate": 1.8664805178322124e-05, - "loss": 0.1674, + "epoch": 0.40052794558099397, + "grad_norm": 0.6051152944564819, + "learning_rate": 1.732981369612671e-05, + "loss": 0.1165, "step": 7890 }, { - "epoch": 0.200406142911537, - "grad_norm": 0.5271224975585938, - "learning_rate": 1.8663959047256423e-05, - "loss": 0.1773, + "epoch": 0.40078176557185646, + "grad_norm": 0.8865431547164917, + "learning_rate": 1.7328121562854293e-05, + "loss": 0.1145, "step": 7895 }, { - "epoch": 0.2005330625713923, - "grad_norm": 0.7400374412536621, - "learning_rate": 1.866311291619072e-05, - "loss": 0.1289, + "epoch": 0.4010355855627189, + "grad_norm": 0.5429019927978516, + "learning_rate": 1.7326429429581873e-05, + "loss": 0.1297, "step": 7900 }, { - "epoch": 0.20065998223124762, - "grad_norm": 0.9786463379859924, - "learning_rate": 1.866226678512502e-05, - "loss": 0.1563, + "epoch": 0.4012894055535814, + "grad_norm": 0.7259514331817627, + "learning_rate": 1.732473729630946e-05, + "loss": 0.1103, "step": 7905 }, { - "epoch": 0.20078690189110293, - "grad_norm": 1.2766469717025757, - "learning_rate": 1.8661420654059315e-05, - "loss": 0.1635, + "epoch": 0.4015432255444439, + "grad_norm": 0.6104267239570618, + "learning_rate": 1.7323045163037044e-05, + "loss": 0.1066, "step": 7910 }, { - "epoch": 0.20091382155095824, - "grad_norm": 0.7786610126495361, - "learning_rate": 1.8660574522993613e-05, - "loss": 0.1336, + "epoch": 0.4017970455353064, + "grad_norm": 0.4790496230125427, + "learning_rate": 1.7321353029764624e-05, + "loss": 0.1265, "step": 7915 }, { - "epoch": 0.20104074121081356, - "grad_norm": 0.6705454587936401, - "learning_rate": 1.865972839192791e-05, - "loss": 0.1456, + "epoch": 0.40205086552616887, + "grad_norm": 0.39838600158691406, + "learning_rate": 1.731966089649221e-05, + "loss": 0.105, "step": 7920 }, { - "epoch": 0.20116766087066887, - "grad_norm": 0.8099325895309448, - "learning_rate": 1.865888226086221e-05, - "loss": 0.1843, + "epoch": 0.4023046855170313, + "grad_norm": 0.44898778200149536, + "learning_rate": 1.731796876321979e-05, + "loss": 0.1256, "step": 7925 }, { - "epoch": 0.20129458053052418, - "grad_norm": 0.5900343060493469, - "learning_rate": 1.8658036129796505e-05, - "loss": 0.1408, + "epoch": 0.4025585055078938, + "grad_norm": 1.1361500024795532, + "learning_rate": 1.7316276629947375e-05, + "loss": 0.1154, "step": 7930 }, { - "epoch": 0.2014215001903795, - "grad_norm": 0.634425699710846, - "learning_rate": 1.8657189998730803e-05, - "loss": 0.1571, + "epoch": 0.4028123254987563, + "grad_norm": 0.44268670678138733, + "learning_rate": 1.731458449667496e-05, + "loss": 0.122, "step": 7935 }, { - "epoch": 0.2015484198502348, - "grad_norm": 0.8995475769042969, - "learning_rate": 1.8656343867665102e-05, - "loss": 0.1602, + "epoch": 0.4030661454896188, + "grad_norm": 0.5239094495773315, + "learning_rate": 1.731289236340254e-05, + "loss": 0.1109, "step": 7940 }, { - "epoch": 0.20167533951009012, - "grad_norm": 0.6463816165924072, - "learning_rate": 1.86554977365994e-05, - "loss": 0.1367, + "epoch": 0.40331996548048127, + "grad_norm": 0.46446603536605835, + "learning_rate": 1.731120023013013e-05, + "loss": 0.1077, "step": 7945 }, { - "epoch": 0.20180225916994543, - "grad_norm": 0.6387722492218018, - "learning_rate": 1.86546516055337e-05, - "loss": 0.1698, + "epoch": 0.4035737854713437, + "grad_norm": 0.36538025736808777, + "learning_rate": 1.730950809685771e-05, + "loss": 0.1189, "step": 7950 }, { - "epoch": 0.20192917882980074, - "grad_norm": 0.6147080063819885, - "learning_rate": 1.8653805474467997e-05, - "loss": 0.1749, + "epoch": 0.4038276054622062, + "grad_norm": 0.3762917220592499, + "learning_rate": 1.7307815963585292e-05, + "loss": 0.1186, "step": 7955 }, { - "epoch": 0.20205609848965606, - "grad_norm": 1.0516314506530762, - "learning_rate": 1.8652959343402295e-05, - "loss": 0.153, + "epoch": 0.4040814254530687, + "grad_norm": 0.3559653162956238, + "learning_rate": 1.730612383031288e-05, + "loss": 0.1242, "step": 7960 }, { - "epoch": 0.20218301814951137, - "grad_norm": 0.8082872629165649, - "learning_rate": 1.8652113212336594e-05, - "loss": 0.1546, + "epoch": 0.4043352454439312, + "grad_norm": 0.5034398436546326, + "learning_rate": 1.730443169704046e-05, + "loss": 0.1222, "step": 7965 }, { - "epoch": 0.20230993780936668, - "grad_norm": 0.9268187284469604, - "learning_rate": 1.865126708127089e-05, - "loss": 0.1638, + "epoch": 0.40458906543479367, + "grad_norm": 0.8461323976516724, + "learning_rate": 1.7302739563768043e-05, + "loss": 0.1281, "step": 7970 }, { - "epoch": 0.202436857469222, - "grad_norm": 0.4831843972206116, - "learning_rate": 1.8650420950205187e-05, - "loss": 0.1318, + "epoch": 0.4048428854256561, + "grad_norm": 0.4809536337852478, + "learning_rate": 1.7301047430495626e-05, + "loss": 0.1085, "step": 7975 }, { - "epoch": 0.2025637771290773, - "grad_norm": 1.0250616073608398, - "learning_rate": 1.8649574819139486e-05, - "loss": 0.1603, + "epoch": 0.4050967054165186, + "grad_norm": 0.5192294716835022, + "learning_rate": 1.729935529722321e-05, + "loss": 0.1183, "step": 7980 }, { - "epoch": 0.20269069678893262, - "grad_norm": 0.6957110166549683, - "learning_rate": 1.8648728688073784e-05, - "loss": 0.1648, + "epoch": 0.4053505254073811, + "grad_norm": 0.45198673009872437, + "learning_rate": 1.7297663163950797e-05, + "loss": 0.1144, "step": 7985 }, { - "epoch": 0.20281761644878793, - "grad_norm": 1.5765780210494995, - "learning_rate": 1.8647882557008082e-05, - "loss": 0.1288, + "epoch": 0.4056043453982436, + "grad_norm": 0.4005521535873413, + "learning_rate": 1.7295971030678377e-05, + "loss": 0.1195, "step": 7990 }, { - "epoch": 0.20294453610864324, - "grad_norm": 0.6731282472610474, - "learning_rate": 1.864703642594238e-05, - "loss": 0.1782, + "epoch": 0.4058581653891061, + "grad_norm": 0.5918262004852295, + "learning_rate": 1.729427889740596e-05, + "loss": 0.1212, "step": 7995 }, { - "epoch": 0.20307145576849853, - "grad_norm": 0.49546951055526733, - "learning_rate": 1.864619029487668e-05, - "loss": 0.1898, + "epoch": 0.4061119853799685, + "grad_norm": 0.46647128462791443, + "learning_rate": 1.7292586764133544e-05, + "loss": 0.1101, "step": 8000 }, { - "epoch": 0.20319837542835384, - "grad_norm": 0.7585256099700928, - "learning_rate": 1.8645344163810978e-05, - "loss": 0.1433, + "epoch": 0.406365805370831, + "grad_norm": 0.43062543869018555, + "learning_rate": 1.7290894630861128e-05, + "loss": 0.1174, "step": 8005 }, { - "epoch": 0.20332529508820915, - "grad_norm": 0.6433865427970886, - "learning_rate": 1.8644498032745273e-05, - "loss": 0.1693, + "epoch": 0.4066196253616935, + "grad_norm": 0.4810847043991089, + "learning_rate": 1.728920249758871e-05, + "loss": 0.1212, "step": 8010 }, { - "epoch": 0.20345221474806446, - "grad_norm": 0.8376720547676086, - "learning_rate": 1.864365190167957e-05, - "loss": 0.152, + "epoch": 0.406873445352556, + "grad_norm": 0.5878148078918457, + "learning_rate": 1.7287510364316295e-05, + "loss": 0.1138, "step": 8015 }, { - "epoch": 0.20357913440791978, - "grad_norm": 0.6108659505844116, - "learning_rate": 1.864280577061387e-05, - "loss": 0.1598, + "epoch": 0.4071272653434184, + "grad_norm": 0.5232487916946411, + "learning_rate": 1.728581823104388e-05, + "loss": 0.1264, "step": 8020 }, { - "epoch": 0.2037060540677751, - "grad_norm": 0.8893314599990845, - "learning_rate": 1.8641959639548168e-05, - "loss": 0.1766, + "epoch": 0.4073810853342809, + "grad_norm": 0.3923046588897705, + "learning_rate": 1.7284126097771462e-05, + "loss": 0.1137, "step": 8025 }, { - "epoch": 0.2038329737276304, - "grad_norm": 0.7075525522232056, - "learning_rate": 1.8641113508482466e-05, - "loss": 0.1397, + "epoch": 0.4076349053251434, + "grad_norm": 0.5523611903190613, + "learning_rate": 1.7282433964499045e-05, + "loss": 0.1269, "step": 8030 }, { - "epoch": 0.2039598933874857, - "grad_norm": 0.70185387134552, - "learning_rate": 1.8640267377416765e-05, - "loss": 0.1673, + "epoch": 0.4078887253160059, + "grad_norm": 0.587213933467865, + "learning_rate": 1.728074183122663e-05, + "loss": 0.1182, "step": 8035 }, { - "epoch": 0.20408681304734103, - "grad_norm": 0.6469791531562805, - "learning_rate": 1.8639421246351063e-05, - "loss": 0.1558, + "epoch": 0.4081425453068684, + "grad_norm": 0.6476024985313416, + "learning_rate": 1.7279049697954213e-05, + "loss": 0.1283, "step": 8040 }, { - "epoch": 0.20421373270719634, - "grad_norm": 0.8632495403289795, - "learning_rate": 1.863857511528536e-05, - "loss": 0.1701, + "epoch": 0.4083963652977308, + "grad_norm": 0.5277358889579773, + "learning_rate": 1.7277357564681796e-05, + "loss": 0.1178, "step": 8045 }, { - "epoch": 0.20434065236705165, - "grad_norm": 0.6596360802650452, - "learning_rate": 1.8637728984219656e-05, - "loss": 0.124, + "epoch": 0.4086501852885933, + "grad_norm": 0.4592641294002533, + "learning_rate": 1.727566543140938e-05, + "loss": 0.1155, "step": 8050 }, { - "epoch": 0.20446757202690696, - "grad_norm": 0.6019644737243652, - "learning_rate": 1.8636882853153955e-05, - "loss": 0.1961, + "epoch": 0.4089040052794558, + "grad_norm": 0.5247008800506592, + "learning_rate": 1.7273973298136963e-05, + "loss": 0.1086, "step": 8055 }, { - "epoch": 0.20459449168676228, - "grad_norm": 0.7367954850196838, - "learning_rate": 1.8636036722088253e-05, - "loss": 0.1371, + "epoch": 0.4091578252703183, + "grad_norm": 0.5143386125564575, + "learning_rate": 1.7272281164864547e-05, + "loss": 0.1229, "step": 8060 }, { - "epoch": 0.2047214113466176, - "grad_norm": 0.7093590497970581, - "learning_rate": 1.863519059102255e-05, - "loss": 0.1727, + "epoch": 0.4094116452611808, + "grad_norm": 0.40900567173957825, + "learning_rate": 1.727058903159213e-05, + "loss": 0.1165, "step": 8065 }, { - "epoch": 0.2048483310064729, - "grad_norm": 0.7297560572624207, - "learning_rate": 1.8634344459956847e-05, - "loss": 0.1271, + "epoch": 0.4096654652520432, + "grad_norm": 0.44880881905555725, + "learning_rate": 1.7268896898319714e-05, + "loss": 0.1245, "step": 8070 }, { - "epoch": 0.2049752506663282, - "grad_norm": 0.860377848148346, - "learning_rate": 1.8633498328891145e-05, - "loss": 0.1212, + "epoch": 0.4099192852429057, + "grad_norm": 0.4040411710739136, + "learning_rate": 1.7267204765047297e-05, + "loss": 0.1625, "step": 8075 }, { - "epoch": 0.20510217032618352, - "grad_norm": 0.8792628049850464, - "learning_rate": 1.8632652197825443e-05, - "loss": 0.1425, + "epoch": 0.4101731052337682, + "grad_norm": 0.5253224968910217, + "learning_rate": 1.7265512631774877e-05, + "loss": 0.1105, "step": 8080 }, { - "epoch": 0.20522908998603884, - "grad_norm": 0.5156036019325256, - "learning_rate": 1.8631806066759742e-05, - "loss": 0.1772, + "epoch": 0.4104269252246307, + "grad_norm": 0.5281549692153931, + "learning_rate": 1.7263820498502464e-05, + "loss": 0.1205, "step": 8085 }, { - "epoch": 0.20535600964589415, - "grad_norm": 1.0228166580200195, - "learning_rate": 1.863095993569404e-05, - "loss": 0.1772, + "epoch": 0.4106807452154932, + "grad_norm": 0.4668600857257843, + "learning_rate": 1.7262128365230048e-05, + "loss": 0.1068, "step": 8090 }, { - "epoch": 0.20548292930574946, - "grad_norm": 0.7463400363922119, - "learning_rate": 1.863011380462834e-05, - "loss": 0.1471, + "epoch": 0.41093456520635563, + "grad_norm": 0.47776713967323303, + "learning_rate": 1.726043623195763e-05, + "loss": 0.1147, "step": 8095 }, { - "epoch": 0.20560984896560477, - "grad_norm": 0.8167277574539185, - "learning_rate": 1.8629267673562637e-05, - "loss": 0.1771, + "epoch": 0.4111883851972181, + "grad_norm": 0.8644548058509827, + "learning_rate": 1.7258744098685215e-05, + "loss": 0.1148, "step": 8100 }, { - "epoch": 0.2057367686254601, - "grad_norm": 0.7261965274810791, - "learning_rate": 1.8628421542496935e-05, - "loss": 0.1445, + "epoch": 0.4114422051880806, + "grad_norm": 0.44676366448402405, + "learning_rate": 1.7257051965412795e-05, + "loss": 0.1025, "step": 8105 }, { - "epoch": 0.2058636882853154, - "grad_norm": 0.7198735475540161, - "learning_rate": 1.862757541143123e-05, - "loss": 0.1456, + "epoch": 0.4116960251789431, + "grad_norm": 0.6538465619087219, + "learning_rate": 1.7255359832140382e-05, + "loss": 0.1112, "step": 8110 }, { - "epoch": 0.2059906079451707, - "grad_norm": 0.6607942581176758, - "learning_rate": 1.862672928036553e-05, - "loss": 0.1705, + "epoch": 0.4119498451698056, + "grad_norm": 0.5455487370491028, + "learning_rate": 1.7253667698867966e-05, + "loss": 0.1225, "step": 8115 }, { - "epoch": 0.20611752760502602, - "grad_norm": 0.6540218591690063, - "learning_rate": 1.8625883149299827e-05, - "loss": 0.1467, + "epoch": 0.41220366516066803, + "grad_norm": 0.5371622443199158, + "learning_rate": 1.7251975565595546e-05, + "loss": 0.1113, "step": 8120 }, { - "epoch": 0.20624444726488134, - "grad_norm": 0.6430326700210571, - "learning_rate": 1.8625037018234126e-05, - "loss": 0.1629, + "epoch": 0.4124574851515305, + "grad_norm": 0.5552743077278137, + "learning_rate": 1.7250283432323133e-05, + "loss": 0.1145, "step": 8125 }, { - "epoch": 0.20637136692473665, - "grad_norm": 0.577563464641571, - "learning_rate": 1.8624190887168424e-05, - "loss": 0.1509, + "epoch": 0.412711305142393, + "grad_norm": 0.40437573194503784, + "learning_rate": 1.7248591299050713e-05, + "loss": 0.115, "step": 8130 }, { - "epoch": 0.20649828658459196, - "grad_norm": 0.9005371332168579, - "learning_rate": 1.8623344756102722e-05, - "loss": 0.1503, + "epoch": 0.4129651251332555, + "grad_norm": 0.8257235884666443, + "learning_rate": 1.72468991657783e-05, + "loss": 0.1263, "step": 8135 }, { - "epoch": 0.20662520624444727, - "grad_norm": 0.8169564604759216, - "learning_rate": 1.862249862503702e-05, - "loss": 0.1543, + "epoch": 0.413218945124118, + "grad_norm": 0.5629894733428955, + "learning_rate": 1.7245207032505883e-05, + "loss": 0.1114, "step": 8140 }, { - "epoch": 0.20675212590430259, - "grad_norm": 0.6900368332862854, - "learning_rate": 1.862165249397132e-05, - "loss": 0.1574, + "epoch": 0.41347276511498043, + "grad_norm": 0.5272229909896851, + "learning_rate": 1.7243514899233464e-05, + "loss": 0.1197, "step": 8145 }, { - "epoch": 0.2068790455641579, - "grad_norm": 0.7297424674034119, - "learning_rate": 1.8620806362905614e-05, - "loss": 0.162, + "epoch": 0.4137265851058429, + "grad_norm": 0.5540295243263245, + "learning_rate": 1.724182276596105e-05, + "loss": 0.1073, "step": 8150 }, { - "epoch": 0.2070059652240132, - "grad_norm": 0.7286546230316162, - "learning_rate": 1.8619960231839913e-05, - "loss": 0.1742, + "epoch": 0.4139804050967054, + "grad_norm": 0.5846796631813049, + "learning_rate": 1.724013063268863e-05, + "loss": 0.1228, "step": 8155 }, { - "epoch": 0.20713288488386852, - "grad_norm": 0.9134151935577393, - "learning_rate": 1.861911410077421e-05, - "loss": 0.1503, + "epoch": 0.4142342250875679, + "grad_norm": 0.5993126034736633, + "learning_rate": 1.7238438499416214e-05, + "loss": 0.1118, "step": 8160 }, { - "epoch": 0.20725980454372384, - "grad_norm": 0.8254784345626831, - "learning_rate": 1.861826796970851e-05, - "loss": 0.1345, + "epoch": 0.4144880450784304, + "grad_norm": 0.47880518436431885, + "learning_rate": 1.72367463661438e-05, + "loss": 0.1389, "step": 8165 }, { - "epoch": 0.20738672420357915, - "grad_norm": 0.6548214554786682, - "learning_rate": 1.8617421838642808e-05, - "loss": 0.1578, + "epoch": 0.41474186506929284, + "grad_norm": 0.7932132482528687, + "learning_rate": 1.723505423287138e-05, + "loss": 0.1148, "step": 8170 }, { - "epoch": 0.20751364386343443, - "grad_norm": 0.9002837538719177, - "learning_rate": 1.8616575707577106e-05, - "loss": 0.1488, + "epoch": 0.41499568506015533, + "grad_norm": 0.5608577728271484, + "learning_rate": 1.7233362099598965e-05, + "loss": 0.1065, "step": 8175 }, { - "epoch": 0.20764056352328975, - "grad_norm": 0.7703000903129578, - "learning_rate": 1.8615729576511405e-05, - "loss": 0.1467, + "epoch": 0.4152495050510178, + "grad_norm": 0.5518209934234619, + "learning_rate": 1.723166996632655e-05, + "loss": 0.1186, "step": 8180 }, { - "epoch": 0.20776748318314506, - "grad_norm": 2.026776075363159, - "learning_rate": 1.8614883445445703e-05, - "loss": 0.1433, + "epoch": 0.4155033250418803, + "grad_norm": 0.38393184542655945, + "learning_rate": 1.7229977833054132e-05, + "loss": 0.112, "step": 8185 }, { - "epoch": 0.20789440284300037, - "grad_norm": 0.5788689851760864, - "learning_rate": 1.8614037314379998e-05, - "loss": 0.1425, + "epoch": 0.4157571450327428, + "grad_norm": 0.5350388884544373, + "learning_rate": 1.722828569978172e-05, + "loss": 0.1215, "step": 8190 }, { - "epoch": 0.20802132250285568, - "grad_norm": 0.8865661025047302, - "learning_rate": 1.8613191183314296e-05, - "loss": 0.1707, + "epoch": 0.41601096502360524, + "grad_norm": 0.487732857465744, + "learning_rate": 1.72265935665093e-05, + "loss": 0.1232, "step": 8195 }, { - "epoch": 0.208148242162711, - "grad_norm": 0.906574010848999, - "learning_rate": 1.8612345052248595e-05, - "loss": 0.1464, + "epoch": 0.41626478501446773, + "grad_norm": 0.7282403707504272, + "learning_rate": 1.7224901433236883e-05, + "loss": 0.1112, "step": 8200 }, { - "epoch": 0.2082751618225663, - "grad_norm": 0.5590483546257019, - "learning_rate": 1.8611498921182893e-05, - "loss": 0.1411, + "epoch": 0.4165186050053302, + "grad_norm": 0.49439603090286255, + "learning_rate": 1.7223209299964466e-05, + "loss": 0.1123, "step": 8205 }, { - "epoch": 0.20840208148242162, - "grad_norm": 0.6416975259780884, - "learning_rate": 1.8610652790117188e-05, - "loss": 0.1266, + "epoch": 0.4167724249961927, + "grad_norm": 0.3408522307872772, + "learning_rate": 1.722151716669205e-05, + "loss": 0.1047, "step": 8210 }, { - "epoch": 0.20852900114227693, - "grad_norm": 0.6208164691925049, - "learning_rate": 1.8609806659051487e-05, - "loss": 0.144, + "epoch": 0.4170262449870552, + "grad_norm": 0.5345622897148132, + "learning_rate": 1.7219825033419633e-05, + "loss": 0.1233, "step": 8215 }, { - "epoch": 0.20865592080213224, - "grad_norm": 0.7075273990631104, - "learning_rate": 1.8608960527985785e-05, - "loss": 0.1784, + "epoch": 0.41728006497791764, + "grad_norm": 0.4255228638648987, + "learning_rate": 1.7218132900147217e-05, + "loss": 0.1268, "step": 8220 }, { - "epoch": 0.20878284046198756, - "grad_norm": 1.2032784223556519, - "learning_rate": 1.8608114396920084e-05, - "loss": 0.146, + "epoch": 0.41753388496878013, + "grad_norm": 0.6682707667350769, + "learning_rate": 1.72164407668748e-05, + "loss": 0.1129, "step": 8225 }, { - "epoch": 0.20890976012184287, - "grad_norm": 1.2711583375930786, - "learning_rate": 1.8607268265854382e-05, - "loss": 0.1553, + "epoch": 0.4177877049596426, + "grad_norm": 0.49467936158180237, + "learning_rate": 1.7214748633602384e-05, + "loss": 0.1171, "step": 8230 }, { - "epoch": 0.20903667978169818, - "grad_norm": 0.6269170045852661, - "learning_rate": 1.860642213478868e-05, - "loss": 0.154, + "epoch": 0.4180415249505051, + "grad_norm": 0.4055912494659424, + "learning_rate": 1.7213056500329967e-05, + "loss": 0.117, "step": 8235 }, { - "epoch": 0.2091635994415535, - "grad_norm": 0.6315646767616272, - "learning_rate": 1.860557600372298e-05, - "loss": 0.1381, + "epoch": 0.4182953449413676, + "grad_norm": 0.47649750113487244, + "learning_rate": 1.721136436705755e-05, + "loss": 0.1015, "step": 8240 }, { - "epoch": 0.2092905191014088, - "grad_norm": 0.9511854648590088, - "learning_rate": 1.8604729872657277e-05, - "loss": 0.1521, + "epoch": 0.41854916493223004, + "grad_norm": 0.3205729126930237, + "learning_rate": 1.7209672233785134e-05, + "loss": 0.1138, "step": 8245 }, { - "epoch": 0.20941743876126412, - "grad_norm": 0.8934781551361084, - "learning_rate": 1.8603883741591572e-05, - "loss": 0.1616, + "epoch": 0.41880298492309254, + "grad_norm": 0.5603702068328857, + "learning_rate": 1.7207980100512718e-05, + "loss": 0.106, "step": 8250 }, { - "epoch": 0.20954435842111943, - "grad_norm": 0.9398448467254639, - "learning_rate": 1.860303761052587e-05, - "loss": 0.1266, + "epoch": 0.419056804913955, + "grad_norm": 0.6263189911842346, + "learning_rate": 1.72062879672403e-05, + "loss": 0.1135, "step": 8255 }, { - "epoch": 0.20967127808097474, - "grad_norm": 0.49070289731025696, - "learning_rate": 1.860219147946017e-05, - "loss": 0.143, + "epoch": 0.4193106249048175, + "grad_norm": 0.533976674079895, + "learning_rate": 1.7204595833967885e-05, + "loss": 0.1269, "step": 8260 }, { - "epoch": 0.20979819774083006, - "grad_norm": 0.6899620890617371, - "learning_rate": 1.8601345348394467e-05, - "loss": 0.1566, + "epoch": 0.41956444489568, + "grad_norm": 0.38608598709106445, + "learning_rate": 1.720290370069547e-05, + "loss": 0.1081, "step": 8265 }, { - "epoch": 0.20992511740068537, - "grad_norm": 0.8951395153999329, - "learning_rate": 1.8600499217328766e-05, - "loss": 0.1353, + "epoch": 0.41981826488654245, + "grad_norm": 0.329917311668396, + "learning_rate": 1.7201211567423052e-05, + "loss": 0.1102, "step": 8270 }, { - "epoch": 0.21005203706054068, - "grad_norm": 0.8683966994285583, - "learning_rate": 1.8599653086263064e-05, - "loss": 0.1608, + "epoch": 0.42007208487740494, + "grad_norm": 0.4950959086418152, + "learning_rate": 1.7199519434150636e-05, + "loss": 0.1032, "step": 8275 }, { - "epoch": 0.210178956720396, - "grad_norm": 0.66145259141922, - "learning_rate": 1.8598806955197363e-05, - "loss": 0.1747, + "epoch": 0.42032590486826743, + "grad_norm": 0.7091923356056213, + "learning_rate": 1.719782730087822e-05, + "loss": 0.108, "step": 8280 }, { - "epoch": 0.2103058763802513, - "grad_norm": 0.5993449091911316, - "learning_rate": 1.859796082413166e-05, - "loss": 0.146, + "epoch": 0.4205797248591299, + "grad_norm": 0.41781190037727356, + "learning_rate": 1.7196135167605803e-05, + "loss": 0.1106, "step": 8285 }, { - "epoch": 0.21043279604010662, - "grad_norm": 0.6439559459686279, - "learning_rate": 1.859711469306596e-05, - "loss": 0.1585, + "epoch": 0.42083354484999236, + "grad_norm": 0.745145857334137, + "learning_rate": 1.7194443034333386e-05, + "loss": 0.1133, "step": 8290 }, { - "epoch": 0.21055971569996193, - "grad_norm": 0.5552145838737488, - "learning_rate": 1.8596268562000254e-05, - "loss": 0.1169, + "epoch": 0.42108736484085485, + "grad_norm": 0.4343501925468445, + "learning_rate": 1.719275090106097e-05, + "loss": 0.11, "step": 8295 }, { - "epoch": 0.21068663535981724, - "grad_norm": 0.5549689531326294, - "learning_rate": 1.8595422430934553e-05, - "loss": 0.1481, + "epoch": 0.42134118483171734, + "grad_norm": 0.4626290202140808, + "learning_rate": 1.7191058767788553e-05, + "loss": 0.1079, "step": 8300 }, { - "epoch": 0.21081355501967255, - "grad_norm": 0.667026162147522, - "learning_rate": 1.859457629986885e-05, - "loss": 0.1694, + "epoch": 0.42159500482257983, + "grad_norm": 0.4289278984069824, + "learning_rate": 1.7189366634516137e-05, + "loss": 0.1084, "step": 8305 }, { - "epoch": 0.21094047467952787, - "grad_norm": 1.0621532201766968, - "learning_rate": 1.859373016880315e-05, - "loss": 0.1866, + "epoch": 0.4218488248134423, + "grad_norm": 0.3560695946216583, + "learning_rate": 1.7187674501243717e-05, + "loss": 0.109, "step": 8310 }, { - "epoch": 0.21106739433938318, - "grad_norm": 0.9059004783630371, - "learning_rate": 1.8592884037737448e-05, - "loss": 0.1817, + "epoch": 0.42210264480430476, + "grad_norm": 0.9896416664123535, + "learning_rate": 1.7185982367971304e-05, + "loss": 0.1067, "step": 8315 }, { - "epoch": 0.2111943139992385, - "grad_norm": 0.6194904446601868, - "learning_rate": 1.8592037906671746e-05, - "loss": 0.1436, + "epoch": 0.42235646479516725, + "grad_norm": 0.45371013879776, + "learning_rate": 1.7184290234698888e-05, + "loss": 0.1093, "step": 8320 }, { - "epoch": 0.2113212336590938, - "grad_norm": 1.169878363609314, - "learning_rate": 1.8591191775606045e-05, - "loss": 0.1517, + "epoch": 0.42261028478602974, + "grad_norm": 0.38966628909111023, + "learning_rate": 1.7182598101426468e-05, + "loss": 0.1037, "step": 8325 }, { - "epoch": 0.21144815331894912, - "grad_norm": 0.8158408999443054, - "learning_rate": 1.8590345644540343e-05, - "loss": 0.1682, + "epoch": 0.42286410477689224, + "grad_norm": 0.6311661005020142, + "learning_rate": 1.7180905968154055e-05, + "loss": 0.1128, "step": 8330 }, { - "epoch": 0.21157507297880443, - "grad_norm": 0.7277817726135254, - "learning_rate": 1.8589499513474638e-05, - "loss": 0.1372, + "epoch": 0.4231179247677547, + "grad_norm": 0.36234763264656067, + "learning_rate": 1.7179213834881635e-05, + "loss": 0.1075, "step": 8335 }, { - "epoch": 0.21170199263865974, - "grad_norm": 0.6744870543479919, - "learning_rate": 1.8588653382408937e-05, - "loss": 0.1382, + "epoch": 0.42337174475861716, + "grad_norm": 0.4338438808917999, + "learning_rate": 1.7177521701609222e-05, + "loss": 0.1137, "step": 8340 }, { - "epoch": 0.21182891229851503, - "grad_norm": 0.6482947468757629, - "learning_rate": 1.8587807251343235e-05, - "loss": 0.1496, + "epoch": 0.42362556474947966, + "grad_norm": 0.603247344493866, + "learning_rate": 1.7175829568336805e-05, + "loss": 0.1067, "step": 8345 }, { - "epoch": 0.21195583195837034, - "grad_norm": 0.7877559065818787, - "learning_rate": 1.8586961120277533e-05, - "loss": 0.1569, + "epoch": 0.42387938474034215, + "grad_norm": 0.7704381346702576, + "learning_rate": 1.7174137435064385e-05, + "loss": 0.1071, "step": 8350 }, { - "epoch": 0.21208275161822565, - "grad_norm": 0.6314103007316589, - "learning_rate": 1.858611498921183e-05, - "loss": 0.1546, + "epoch": 0.42413320473120464, + "grad_norm": 0.41250476241111755, + "learning_rate": 1.7172445301791972e-05, + "loss": 0.115, "step": 8355 }, { - "epoch": 0.21220967127808096, - "grad_norm": 0.6915703415870667, - "learning_rate": 1.8585268858146127e-05, - "loss": 0.1279, + "epoch": 0.42438702472206713, + "grad_norm": 0.4616197943687439, + "learning_rate": 1.7170753168519553e-05, + "loss": 0.112, "step": 8360 }, { - "epoch": 0.21233659093793628, - "grad_norm": 1.0024842023849487, - "learning_rate": 1.8584422727080425e-05, - "loss": 0.1299, + "epoch": 0.42464084471292957, + "grad_norm": 0.45028916001319885, + "learning_rate": 1.7169061035247136e-05, + "loss": 0.1158, "step": 8365 }, { - "epoch": 0.2124635105977916, - "grad_norm": 0.5174934267997742, - "learning_rate": 1.8583576596014724e-05, - "loss": 0.1607, + "epoch": 0.42489466470379206, + "grad_norm": 0.6397187113761902, + "learning_rate": 1.7167368901974723e-05, + "loss": 0.1111, "step": 8370 }, { - "epoch": 0.2125904302576469, - "grad_norm": 0.7474557757377625, - "learning_rate": 1.8582730464949022e-05, - "loss": 0.1611, + "epoch": 0.42514848469465455, + "grad_norm": 0.4066790044307709, + "learning_rate": 1.7165676768702303e-05, + "loss": 0.1185, "step": 8375 }, { - "epoch": 0.2127173499175022, - "grad_norm": 0.9110854268074036, - "learning_rate": 1.858188433388332e-05, - "loss": 0.1484, + "epoch": 0.42540230468551704, + "grad_norm": 0.7176026701927185, + "learning_rate": 1.716398463542989e-05, + "loss": 0.1129, "step": 8380 }, { - "epoch": 0.21284426957735753, - "grad_norm": 1.3008431196212769, - "learning_rate": 1.858103820281762e-05, - "loss": 0.1422, + "epoch": 0.42565612467637953, + "grad_norm": 0.6438480615615845, + "learning_rate": 1.716229250215747e-05, + "loss": 0.1186, "step": 8385 }, { - "epoch": 0.21297118923721284, - "grad_norm": 0.6753866076469421, - "learning_rate": 1.8580192071751917e-05, - "loss": 0.154, + "epoch": 0.42590994466724197, + "grad_norm": 0.48937782645225525, + "learning_rate": 1.7160600368885054e-05, + "loss": 0.1104, "step": 8390 }, { - "epoch": 0.21309810889706815, - "grad_norm": 0.8852114677429199, - "learning_rate": 1.8579345940686212e-05, - "loss": 0.1727, + "epoch": 0.42616376465810446, + "grad_norm": 0.46808022260665894, + "learning_rate": 1.7158908235612637e-05, + "loss": 0.11, "step": 8395 }, { - "epoch": 0.21322502855692346, - "grad_norm": 0.752415657043457, - "learning_rate": 1.857849980962051e-05, - "loss": 0.1417, + "epoch": 0.42641758464896695, + "grad_norm": 0.5450372099876404, + "learning_rate": 1.715721610234022e-05, + "loss": 0.1175, "step": 8400 }, { - "epoch": 0.21335194821677878, - "grad_norm": 0.8975776433944702, - "learning_rate": 1.857765367855481e-05, - "loss": 0.1389, + "epoch": 0.42667140463982944, + "grad_norm": 0.5416411757469177, + "learning_rate": 1.7155523969067804e-05, + "loss": 0.1329, "step": 8405 }, { - "epoch": 0.2134788678766341, - "grad_norm": 0.9106149673461914, - "learning_rate": 1.8576807547489107e-05, - "loss": 0.1759, + "epoch": 0.42692522463069194, + "grad_norm": 0.5366417169570923, + "learning_rate": 1.7153831835795388e-05, + "loss": 0.1159, "step": 8410 }, { - "epoch": 0.2136057875364894, - "grad_norm": 0.9494649171829224, - "learning_rate": 1.8575961416423406e-05, - "loss": 0.1526, + "epoch": 0.42717904462155437, + "grad_norm": 0.5638911128044128, + "learning_rate": 1.715213970252297e-05, + "loss": 0.1047, "step": 8415 }, { - "epoch": 0.2137327071963447, - "grad_norm": 0.8369239568710327, - "learning_rate": 1.8575115285357704e-05, - "loss": 0.1399, + "epoch": 0.42743286461241686, + "grad_norm": 0.4426100552082062, + "learning_rate": 1.7150447569250555e-05, + "loss": 0.1016, "step": 8420 }, { - "epoch": 0.21385962685620002, - "grad_norm": 0.9026253819465637, - "learning_rate": 1.8574269154292003e-05, - "loss": 0.1474, + "epoch": 0.42768668460327935, + "grad_norm": 0.4157399833202362, + "learning_rate": 1.714875543597814e-05, + "loss": 0.129, "step": 8425 }, { - "epoch": 0.21398654651605534, - "grad_norm": 0.8692620992660522, - "learning_rate": 1.85734230232263e-05, - "loss": 0.1552, + "epoch": 0.42794050459414185, + "grad_norm": 0.4836711287498474, + "learning_rate": 1.7147063302705722e-05, + "loss": 0.1071, "step": 8430 }, { - "epoch": 0.21411346617591065, - "grad_norm": 0.6013438105583191, - "learning_rate": 1.8572576892160596e-05, - "loss": 0.151, + "epoch": 0.42819432458500434, + "grad_norm": 0.5332268476486206, + "learning_rate": 1.7145371169433306e-05, + "loss": 0.1251, "step": 8435 }, { - "epoch": 0.21424038583576596, - "grad_norm": 0.7235345244407654, - "learning_rate": 1.8571730761094894e-05, - "loss": 0.1772, + "epoch": 0.4284481445758668, + "grad_norm": 0.5566955208778381, + "learning_rate": 1.714367903616089e-05, + "loss": 0.1124, "step": 8440 }, { - "epoch": 0.21436730549562127, - "grad_norm": 0.9424161911010742, - "learning_rate": 1.8570884630029193e-05, - "loss": 0.162, + "epoch": 0.42870196456672927, + "grad_norm": 0.6206413507461548, + "learning_rate": 1.7141986902888473e-05, + "loss": 0.1224, "step": 8445 }, { - "epoch": 0.2144942251554766, - "grad_norm": 0.5689753890037537, - "learning_rate": 1.857003849896349e-05, - "loss": 0.151, + "epoch": 0.42895578455759176, + "grad_norm": 0.44592681527137756, + "learning_rate": 1.7140294769616056e-05, + "loss": 0.1146, "step": 8450 }, { - "epoch": 0.2146211448153319, - "grad_norm": 0.6689040660858154, - "learning_rate": 1.856919236789779e-05, - "loss": 0.1232, + "epoch": 0.42920960454845425, + "grad_norm": 0.5079742074012756, + "learning_rate": 1.713860263634364e-05, + "loss": 0.1242, "step": 8455 }, { - "epoch": 0.2147480644751872, - "grad_norm": 0.5660226345062256, - "learning_rate": 1.8568346236832088e-05, - "loss": 0.1246, + "epoch": 0.42946342453931674, + "grad_norm": 0.41701123118400574, + "learning_rate": 1.7136910503071223e-05, + "loss": 0.1157, "step": 8460 }, { - "epoch": 0.21487498413504252, - "grad_norm": 0.7989674210548401, - "learning_rate": 1.8567500105766386e-05, - "loss": 0.173, + "epoch": 0.4297172445301792, + "grad_norm": 0.4740390479564667, + "learning_rate": 1.7135218369798807e-05, + "loss": 0.1171, "step": 8465 }, { - "epoch": 0.21500190379489784, - "grad_norm": 0.9442060589790344, - "learning_rate": 1.8566653974700685e-05, - "loss": 0.1849, + "epoch": 0.42997106452104167, + "grad_norm": 0.378922700881958, + "learning_rate": 1.713352623652639e-05, + "loss": 0.1159, "step": 8470 }, { - "epoch": 0.21512882345475315, - "grad_norm": 0.5568065047264099, - "learning_rate": 1.856580784363498e-05, - "loss": 0.1743, + "epoch": 0.43022488451190416, + "grad_norm": 0.45636868476867676, + "learning_rate": 1.7131834103253974e-05, + "loss": 0.1064, "step": 8475 }, { - "epoch": 0.21525574311460846, - "grad_norm": 1.9357846975326538, - "learning_rate": 1.8564961712569278e-05, - "loss": 0.1697, + "epoch": 0.43047870450276665, + "grad_norm": 0.4845265746116638, + "learning_rate": 1.7130141969981558e-05, + "loss": 0.1087, "step": 8480 }, { - "epoch": 0.21538266277446377, - "grad_norm": 0.5239430069923401, - "learning_rate": 1.8564115581503577e-05, - "loss": 0.1312, + "epoch": 0.43073252449362914, + "grad_norm": 0.41355660557746887, + "learning_rate": 1.712844983670914e-05, + "loss": 0.1154, "step": 8485 }, { - "epoch": 0.21550958243431909, - "grad_norm": 0.8390584588050842, - "learning_rate": 1.8563269450437875e-05, - "loss": 0.1339, + "epoch": 0.4309863444844916, + "grad_norm": 0.42567557096481323, + "learning_rate": 1.7126757703436725e-05, + "loss": 0.1131, "step": 8490 }, { - "epoch": 0.2156365020941744, - "grad_norm": 0.6821616888046265, - "learning_rate": 1.856242331937217e-05, - "loss": 0.1624, + "epoch": 0.43124016447535407, + "grad_norm": 0.46482059359550476, + "learning_rate": 1.7125065570164308e-05, + "loss": 0.1068, "step": 8495 }, { - "epoch": 0.2157634217540297, - "grad_norm": 0.6282166242599487, - "learning_rate": 1.856157718830647e-05, - "loss": 0.1602, + "epoch": 0.43149398446621656, + "grad_norm": 0.7739242911338806, + "learning_rate": 1.7123373436891892e-05, + "loss": 0.1175, "step": 8500 }, { - "epoch": 0.21589034141388502, - "grad_norm": 0.557814359664917, - "learning_rate": 1.8560731057240767e-05, - "loss": 0.1405, + "epoch": 0.43174780445707905, + "grad_norm": 0.4292283058166504, + "learning_rate": 1.7121681303619475e-05, + "loss": 0.1197, "step": 8505 }, { - "epoch": 0.21601726107374034, - "grad_norm": 0.6107650399208069, - "learning_rate": 1.8559884926175065e-05, - "loss": 0.1813, + "epoch": 0.43200162444794155, + "grad_norm": 0.4146791696548462, + "learning_rate": 1.711998917034706e-05, + "loss": 0.1333, "step": 8510 }, { - "epoch": 0.21614418073359565, - "grad_norm": 0.809515655040741, - "learning_rate": 1.8559038795109364e-05, - "loss": 0.1374, + "epoch": 0.432255444438804, + "grad_norm": 0.7794067859649658, + "learning_rate": 1.711829703707464e-05, + "loss": 0.1176, "step": 8515 }, { - "epoch": 0.21627110039345093, - "grad_norm": 0.6309608221054077, - "learning_rate": 1.8558192664043662e-05, - "loss": 0.1358, + "epoch": 0.4325092644296665, + "grad_norm": 0.4238576889038086, + "learning_rate": 1.7116604903802226e-05, + "loss": 0.0959, "step": 8520 }, { - "epoch": 0.21639802005330624, - "grad_norm": 0.5597699880599976, - "learning_rate": 1.855734653297796e-05, - "loss": 0.1472, + "epoch": 0.43276308442052897, + "grad_norm": 0.5824153423309326, + "learning_rate": 1.711491277052981e-05, + "loss": 0.1198, "step": 8525 }, { - "epoch": 0.21652493971316156, - "grad_norm": 0.9107801914215088, - "learning_rate": 1.855650040191226e-05, - "loss": 0.1298, + "epoch": 0.43301690441139146, + "grad_norm": 0.6489765644073486, + "learning_rate": 1.7113220637257393e-05, + "loss": 0.1005, "step": 8530 }, { - "epoch": 0.21665185937301687, - "grad_norm": 0.8724458813667297, - "learning_rate": 1.8555654270846554e-05, - "loss": 0.1731, + "epoch": 0.43327072440225395, + "grad_norm": 0.5258995890617371, + "learning_rate": 1.7111528503984977e-05, + "loss": 0.1167, "step": 8535 }, { - "epoch": 0.21677877903287218, - "grad_norm": 0.9554594159126282, - "learning_rate": 1.8554808139780852e-05, - "loss": 0.1575, + "epoch": 0.4335245443931164, + "grad_norm": 0.5935726761817932, + "learning_rate": 1.7109836370712557e-05, + "loss": 0.1123, "step": 8540 }, { - "epoch": 0.2169056986927275, - "grad_norm": 0.998311460018158, - "learning_rate": 1.855396200871515e-05, - "loss": 0.1474, + "epoch": 0.4337783643839789, + "grad_norm": 0.47369036078453064, + "learning_rate": 1.7108144237440144e-05, + "loss": 0.1208, "step": 8545 }, { - "epoch": 0.2170326183525828, - "grad_norm": 0.753568172454834, - "learning_rate": 1.855311587764945e-05, - "loss": 0.135, + "epoch": 0.43403218437484137, + "grad_norm": 0.5065063834190369, + "learning_rate": 1.7106452104167727e-05, + "loss": 0.1088, "step": 8550 }, { - "epoch": 0.21715953801243812, - "grad_norm": 0.6112189292907715, - "learning_rate": 1.8552269746583748e-05, - "loss": 0.1551, + "epoch": 0.43428600436570386, + "grad_norm": 0.4577081799507141, + "learning_rate": 1.7104759970895307e-05, + "loss": 0.1283, "step": 8555 }, { - "epoch": 0.21728645767229343, - "grad_norm": 0.9806708097457886, - "learning_rate": 1.8551423615518046e-05, - "loss": 0.19, + "epoch": 0.4345398243565663, + "grad_norm": 0.5575316548347473, + "learning_rate": 1.7103067837622894e-05, + "loss": 0.1152, "step": 8560 }, { - "epoch": 0.21741337733214874, - "grad_norm": 0.8752003312110901, - "learning_rate": 1.8550577484452344e-05, - "loss": 0.1721, + "epoch": 0.4347936443474288, + "grad_norm": 0.43118050694465637, + "learning_rate": 1.7101375704350475e-05, + "loss": 0.0927, "step": 8565 }, { - "epoch": 0.21754029699200406, - "grad_norm": 0.9703426361083984, - "learning_rate": 1.8549731353386643e-05, - "loss": 0.1731, + "epoch": 0.4350474643382913, + "grad_norm": 0.6905524134635925, + "learning_rate": 1.7099683571078058e-05, + "loss": 0.1209, "step": 8570 }, { - "epoch": 0.21766721665185937, - "grad_norm": 1.0566999912261963, - "learning_rate": 1.8548885222320938e-05, - "loss": 0.1701, + "epoch": 0.43530128432915377, + "grad_norm": 0.46179112792015076, + "learning_rate": 1.709799143780564e-05, + "loss": 0.1098, "step": 8575 }, { - "epoch": 0.21779413631171468, - "grad_norm": 1.099644660949707, - "learning_rate": 1.8548039091255236e-05, - "loss": 0.1572, + "epoch": 0.43555510432001626, + "grad_norm": 0.5374900698661804, + "learning_rate": 1.7096299304533225e-05, + "loss": 0.0966, "step": 8580 }, { - "epoch": 0.21792105597157, - "grad_norm": 0.6829742789268494, - "learning_rate": 1.8547192960189535e-05, - "loss": 0.147, + "epoch": 0.4358089243108787, + "grad_norm": 0.47844305634498596, + "learning_rate": 1.7094607171260812e-05, + "loss": 0.1098, "step": 8585 }, { - "epoch": 0.2180479756314253, - "grad_norm": 0.9238346219062805, - "learning_rate": 1.8546346829123833e-05, - "loss": 0.1457, + "epoch": 0.4360627443017412, + "grad_norm": 0.5935022234916687, + "learning_rate": 1.7092915037988392e-05, + "loss": 0.099, "step": 8590 }, { - "epoch": 0.21817489529128062, - "grad_norm": 0.5824851393699646, - "learning_rate": 1.854550069805813e-05, - "loss": 0.1427, + "epoch": 0.4363165642926037, + "grad_norm": 0.39847415685653687, + "learning_rate": 1.7091222904715976e-05, + "loss": 0.1041, "step": 8595 }, { - "epoch": 0.21830181495113593, - "grad_norm": 0.5803163647651672, - "learning_rate": 1.854465456699243e-05, - "loss": 0.1568, + "epoch": 0.4365703842834662, + "grad_norm": 0.6426656246185303, + "learning_rate": 1.708953077144356e-05, + "loss": 0.1143, "step": 8600 }, { - "epoch": 0.21842873461099124, - "grad_norm": 0.5832263827323914, - "learning_rate": 1.8543808435926728e-05, - "loss": 0.1401, + "epoch": 0.43682420427432866, + "grad_norm": 0.40940332412719727, + "learning_rate": 1.7087838638171143e-05, + "loss": 0.1117, "step": 8605 }, { - "epoch": 0.21855565427084656, - "grad_norm": 0.8433694839477539, - "learning_rate": 1.8542962304861027e-05, - "loss": 0.145, + "epoch": 0.4370780242651911, + "grad_norm": 0.5274299383163452, + "learning_rate": 1.7086146504898726e-05, + "loss": 0.1132, "step": 8610 }, { - "epoch": 0.21868257393070187, - "grad_norm": 0.592408299446106, - "learning_rate": 1.854211617379532e-05, - "loss": 0.1427, + "epoch": 0.4373318442560536, + "grad_norm": 0.48877760767936707, + "learning_rate": 1.708445437162631e-05, + "loss": 0.114, "step": 8615 }, { - "epoch": 0.21880949359055718, - "grad_norm": 1.0094596147537231, - "learning_rate": 1.854127004272962e-05, - "loss": 0.1395, + "epoch": 0.4375856642469161, + "grad_norm": 0.4619860053062439, + "learning_rate": 1.7082762238353894e-05, + "loss": 0.113, "step": 8620 }, { - "epoch": 0.2189364132504125, - "grad_norm": 1.2037129402160645, - "learning_rate": 1.854042391166392e-05, - "loss": 0.1374, + "epoch": 0.4378394842377786, + "grad_norm": 0.47805002331733704, + "learning_rate": 1.7081070105081477e-05, + "loss": 0.1201, "step": 8625 }, { - "epoch": 0.2190633329102678, - "grad_norm": 1.0147793292999268, - "learning_rate": 1.8539577780598217e-05, - "loss": 0.1452, + "epoch": 0.43809330422864107, + "grad_norm": 0.8779380917549133, + "learning_rate": 1.707937797180906e-05, + "loss": 0.1059, "step": 8630 }, { - "epoch": 0.21919025257012312, - "grad_norm": 0.7698993682861328, - "learning_rate": 1.8538731649532512e-05, - "loss": 0.1412, + "epoch": 0.4383471242195035, + "grad_norm": 0.7380253076553345, + "learning_rate": 1.7077685838536644e-05, + "loss": 0.1225, "step": 8635 }, { - "epoch": 0.21931717222997843, - "grad_norm": 0.6557686924934387, - "learning_rate": 1.853788551846681e-05, - "loss": 0.1242, + "epoch": 0.438600944210366, + "grad_norm": 0.4746687710285187, + "learning_rate": 1.7075993705264228e-05, + "loss": 0.1119, "step": 8640 }, { - "epoch": 0.21944409188983374, - "grad_norm": 0.5154640674591064, - "learning_rate": 1.853703938740111e-05, - "loss": 0.1347, + "epoch": 0.4388547642012285, + "grad_norm": 1.3679776191711426, + "learning_rate": 1.707430157199181e-05, + "loss": 0.1128, "step": 8645 }, { - "epoch": 0.21957101154968905, - "grad_norm": 0.6277763843536377, - "learning_rate": 1.8536193256335407e-05, - "loss": 0.1455, + "epoch": 0.439108584192091, + "grad_norm": 0.5259309411048889, + "learning_rate": 1.7072609438719395e-05, + "loss": 0.128, "step": 8650 }, { - "epoch": 0.21969793120954437, - "grad_norm": 0.7937450408935547, - "learning_rate": 1.8535347125269705e-05, - "loss": 0.1555, + "epoch": 0.43936240418295347, + "grad_norm": 0.4603613018989563, + "learning_rate": 1.707091730544698e-05, + "loss": 0.0927, "step": 8655 }, { - "epoch": 0.21982485086939968, - "grad_norm": 0.8372477889060974, - "learning_rate": 1.8534500994204004e-05, - "loss": 0.1752, + "epoch": 0.4396162241738159, + "grad_norm": 0.5576603412628174, + "learning_rate": 1.7069225172174562e-05, + "loss": 0.1329, "step": 8660 }, { - "epoch": 0.219951770529255, - "grad_norm": 0.7518324255943298, - "learning_rate": 1.8533654863138302e-05, - "loss": 0.1371, + "epoch": 0.4398700441646784, + "grad_norm": 0.5329744219779968, + "learning_rate": 1.7067533038902145e-05, + "loss": 0.1017, "step": 8665 }, { - "epoch": 0.2200786901891103, - "grad_norm": 0.6258273124694824, - "learning_rate": 1.85328087320726e-05, - "loss": 0.1452, + "epoch": 0.4401238641555409, + "grad_norm": 0.4557638168334961, + "learning_rate": 1.706584090562973e-05, + "loss": 0.1108, "step": 8670 }, { - "epoch": 0.22020560984896562, - "grad_norm": 2.3573811054229736, - "learning_rate": 1.8531962601006896e-05, - "loss": 0.1761, + "epoch": 0.4403776841464034, + "grad_norm": 0.6890199780464172, + "learning_rate": 1.7064148772357313e-05, + "loss": 0.1186, "step": 8675 }, { - "epoch": 0.22033252950882093, - "grad_norm": 2.1240198612213135, - "learning_rate": 1.8531116469941194e-05, - "loss": 0.1587, + "epoch": 0.4406315041372659, + "grad_norm": 0.526990532875061, + "learning_rate": 1.7062456639084896e-05, + "loss": 0.1048, "step": 8680 }, { - "epoch": 0.22045944916867624, - "grad_norm": 0.7184456586837769, - "learning_rate": 1.8530270338875492e-05, - "loss": 0.1138, + "epoch": 0.4408853241281283, + "grad_norm": 1.4475257396697998, + "learning_rate": 1.706076450581248e-05, + "loss": 0.1213, "step": 8685 }, { - "epoch": 0.22058636882853153, - "grad_norm": 0.5978677272796631, - "learning_rate": 1.852942420780979e-05, - "loss": 0.1391, + "epoch": 0.4411391441189908, + "grad_norm": 0.4177703261375427, + "learning_rate": 1.7059072372540063e-05, + "loss": 0.1076, "step": 8690 }, { - "epoch": 0.22071328848838684, - "grad_norm": 0.6272395849227905, - "learning_rate": 1.852857807674409e-05, - "loss": 0.1589, + "epoch": 0.4413929641098533, + "grad_norm": 1.0303242206573486, + "learning_rate": 1.7057380239267647e-05, + "loss": 0.1099, "step": 8695 }, { - "epoch": 0.22084020814824215, - "grad_norm": 1.287747859954834, - "learning_rate": 1.8527731945678388e-05, - "loss": 0.146, + "epoch": 0.4416467841007158, + "grad_norm": 0.3975723683834076, + "learning_rate": 1.705568810599523e-05, + "loss": 0.1072, "step": 8700 }, { - "epoch": 0.22096712780809746, - "grad_norm": 0.660239577293396, - "learning_rate": 1.8526885814612686e-05, - "loss": 0.1453, + "epoch": 0.4419006040915783, + "grad_norm": 1.3383222818374634, + "learning_rate": 1.7053995972722814e-05, + "loss": 0.1233, "step": 8705 }, { - "epoch": 0.22109404746795278, - "grad_norm": 0.6700506210327148, - "learning_rate": 1.8526039683546984e-05, - "loss": 0.1483, + "epoch": 0.4421544240824407, + "grad_norm": 1.025421142578125, + "learning_rate": 1.7052303839450397e-05, + "loss": 0.1101, "step": 8710 }, { - "epoch": 0.2212209671278081, - "grad_norm": 0.8075676560401917, - "learning_rate": 1.852519355248128e-05, - "loss": 0.1282, + "epoch": 0.4424082440733032, + "grad_norm": 0.33774542808532715, + "learning_rate": 1.705061170617798e-05, + "loss": 0.1008, "step": 8715 }, { - "epoch": 0.2213478867876634, - "grad_norm": 0.7455586194992065, - "learning_rate": 1.8524347421415578e-05, - "loss": 0.1295, + "epoch": 0.4426620640641657, + "grad_norm": 0.5173442959785461, + "learning_rate": 1.7048919572905564e-05, + "loss": 0.1063, "step": 8720 }, { - "epoch": 0.2214748064475187, - "grad_norm": 0.5464128255844116, - "learning_rate": 1.8523501290349876e-05, - "loss": 0.16, + "epoch": 0.4429158840550282, + "grad_norm": 0.46076419949531555, + "learning_rate": 1.7047227439633148e-05, + "loss": 0.1121, "step": 8725 }, { - "epoch": 0.22160172610737403, - "grad_norm": 4.001948356628418, - "learning_rate": 1.8522655159284175e-05, - "loss": 0.142, + "epoch": 0.4431697040458907, + "grad_norm": 0.5615339875221252, + "learning_rate": 1.704553530636073e-05, + "loss": 0.1144, "step": 8730 }, { - "epoch": 0.22172864576722934, - "grad_norm": 0.9237892627716064, - "learning_rate": 1.8521809028218473e-05, - "loss": 0.1511, + "epoch": 0.4434235240367531, + "grad_norm": 0.513152539730072, + "learning_rate": 1.7043843173088315e-05, + "loss": 0.1111, "step": 8735 }, { - "epoch": 0.22185556542708465, - "grad_norm": 0.8286184668540955, - "learning_rate": 1.852096289715277e-05, - "loss": 0.1846, + "epoch": 0.4436773440276156, + "grad_norm": 0.6728489995002747, + "learning_rate": 1.70421510398159e-05, + "loss": 0.1082, "step": 8740 }, { - "epoch": 0.22198248508693996, - "grad_norm": 0.9929538369178772, - "learning_rate": 1.852011676608707e-05, - "loss": 0.1631, + "epoch": 0.4439311640184781, + "grad_norm": 0.4770069122314453, + "learning_rate": 1.704045890654348e-05, + "loss": 0.1126, "step": 8745 }, { - "epoch": 0.22210940474679527, - "grad_norm": 0.6444843411445618, - "learning_rate": 1.8519270635021368e-05, - "loss": 0.1559, + "epoch": 0.4441849840093406, + "grad_norm": 0.3625965416431427, + "learning_rate": 1.7038766773271066e-05, + "loss": 0.1202, "step": 8750 }, { - "epoch": 0.2222363244066506, - "grad_norm": 0.7140047550201416, - "learning_rate": 1.8518424503955663e-05, - "loss": 0.1671, + "epoch": 0.4444388040002031, + "grad_norm": 0.579849123954773, + "learning_rate": 1.703707463999865e-05, + "loss": 0.116, "step": 8755 }, { - "epoch": 0.2223632440665059, - "grad_norm": 0.9100456237792969, - "learning_rate": 1.8517578372889962e-05, - "loss": 0.1399, + "epoch": 0.4446926239910655, + "grad_norm": 0.5784683227539062, + "learning_rate": 1.703538250672623e-05, + "loss": 0.1351, "step": 8760 }, { - "epoch": 0.2224901637263612, - "grad_norm": 0.7420306205749512, - "learning_rate": 1.851673224182426e-05, - "loss": 0.1399, + "epoch": 0.444946443981928, + "grad_norm": 0.5647311210632324, + "learning_rate": 1.7033690373453816e-05, + "loss": 0.101, "step": 8765 }, { - "epoch": 0.22261708338621652, - "grad_norm": 0.6294177174568176, - "learning_rate": 1.851588611075856e-05, - "loss": 0.1471, + "epoch": 0.4452002639727905, + "grad_norm": 0.49872612953186035, + "learning_rate": 1.7031998240181396e-05, + "loss": 0.1206, "step": 8770 }, { - "epoch": 0.22274400304607184, - "grad_norm": 0.6501321792602539, - "learning_rate": 1.8515039979692857e-05, - "loss": 0.1779, + "epoch": 0.445454083963653, + "grad_norm": 0.6362616419792175, + "learning_rate": 1.7030306106908983e-05, + "loss": 0.1242, "step": 8775 }, { - "epoch": 0.22287092270592715, - "grad_norm": 2.1139371395111084, - "learning_rate": 1.8514193848627152e-05, - "loss": 0.1395, + "epoch": 0.4457079039545155, + "grad_norm": 0.34307578206062317, + "learning_rate": 1.7028613973636564e-05, + "loss": 0.1058, "step": 8780 }, { - "epoch": 0.22299784236578246, - "grad_norm": 0.681423544883728, - "learning_rate": 1.851334771756145e-05, - "loss": 0.1727, + "epoch": 0.4459617239453779, + "grad_norm": 0.5416290163993835, + "learning_rate": 1.7026921840364147e-05, + "loss": 0.1151, "step": 8785 }, { - "epoch": 0.22312476202563777, - "grad_norm": 0.7431126236915588, - "learning_rate": 1.851250158649575e-05, - "loss": 0.162, + "epoch": 0.4462155439362404, + "grad_norm": 0.46642163395881653, + "learning_rate": 1.7025229707091734e-05, + "loss": 0.1197, "step": 8790 }, { - "epoch": 0.2232516816854931, - "grad_norm": 0.5957474112510681, - "learning_rate": 1.8511655455430047e-05, - "loss": 0.152, + "epoch": 0.4464693639271029, + "grad_norm": 0.3453334867954254, + "learning_rate": 1.7023537573819314e-05, + "loss": 0.107, "step": 8795 }, { - "epoch": 0.2233786013453484, - "grad_norm": 0.7054240107536316, - "learning_rate": 1.8510809324364346e-05, - "loss": 0.1878, + "epoch": 0.4467231839179654, + "grad_norm": 0.47410887479782104, + "learning_rate": 1.7021845440546898e-05, + "loss": 0.113, "step": 8800 }, { - "epoch": 0.2235055210052037, - "grad_norm": 0.7911894917488098, - "learning_rate": 1.8509963193298644e-05, - "loss": 0.162, + "epoch": 0.4469770039088279, + "grad_norm": 1.0129278898239136, + "learning_rate": 1.702015330727448e-05, + "loss": 0.1171, "step": 8805 }, { - "epoch": 0.22363244066505902, - "grad_norm": 1.2603085041046143, - "learning_rate": 1.8509117062232942e-05, - "loss": 0.1749, + "epoch": 0.4472308238996903, + "grad_norm": 0.6906533241271973, + "learning_rate": 1.7018461174002065e-05, + "loss": 0.1278, "step": 8810 }, { - "epoch": 0.22375936032491434, - "grad_norm": 1.0110243558883667, - "learning_rate": 1.850827093116724e-05, - "loss": 0.1455, + "epoch": 0.4474846438905528, + "grad_norm": 0.4985661804676056, + "learning_rate": 1.701676904072965e-05, + "loss": 0.1209, "step": 8815 }, { - "epoch": 0.22388627998476965, - "grad_norm": 1.0816550254821777, - "learning_rate": 1.8507424800101536e-05, - "loss": 0.1499, + "epoch": 0.4477384638814153, + "grad_norm": 0.4819851219654083, + "learning_rate": 1.7015076907457232e-05, + "loss": 0.1233, "step": 8820 }, { - "epoch": 0.22401319964462496, - "grad_norm": 1.229247808456421, - "learning_rate": 1.8506578669035834e-05, - "loss": 0.1742, + "epoch": 0.4479922838722778, + "grad_norm": 0.49135705828666687, + "learning_rate": 1.7013384774184815e-05, + "loss": 0.106, "step": 8825 }, { - "epoch": 0.22414011930448027, - "grad_norm": 0.7748664021492004, - "learning_rate": 1.8505732537970133e-05, - "loss": 0.1614, + "epoch": 0.4482461038631403, + "grad_norm": 0.5526263117790222, + "learning_rate": 1.70116926409124e-05, + "loss": 0.1196, "step": 8830 }, { - "epoch": 0.22426703896433559, - "grad_norm": 0.6173787117004395, - "learning_rate": 1.850488640690443e-05, - "loss": 0.1238, + "epoch": 0.4484999238540027, + "grad_norm": 0.6574402451515198, + "learning_rate": 1.7010000507639983e-05, + "loss": 0.1052, "step": 8835 }, { - "epoch": 0.2243939586241909, - "grad_norm": 1.3139923810958862, - "learning_rate": 1.850404027583873e-05, - "loss": 0.1477, + "epoch": 0.4487537438448652, + "grad_norm": 0.3850298225879669, + "learning_rate": 1.7008308374367566e-05, + "loss": 0.099, "step": 8840 }, { - "epoch": 0.2245208782840462, - "grad_norm": 0.7673596143722534, - "learning_rate": 1.8503194144773028e-05, - "loss": 0.1587, + "epoch": 0.4490075638357277, + "grad_norm": 0.5754469037055969, + "learning_rate": 1.700661624109515e-05, + "loss": 0.1095, "step": 8845 }, { - "epoch": 0.22464779794390152, - "grad_norm": 0.6876462697982788, - "learning_rate": 1.8502348013707326e-05, - "loss": 0.137, + "epoch": 0.4492613838265902, + "grad_norm": 0.6519771814346313, + "learning_rate": 1.7004924107822733e-05, + "loss": 0.1198, "step": 8850 }, { - "epoch": 0.22477471760375684, - "grad_norm": 0.6305409669876099, - "learning_rate": 1.8501501882641625e-05, - "loss": 0.1451, + "epoch": 0.44951520381745264, + "grad_norm": 0.5165800452232361, + "learning_rate": 1.7003231974550317e-05, + "loss": 0.119, "step": 8855 }, { - "epoch": 0.22490163726361212, - "grad_norm": 1.0115059614181519, - "learning_rate": 1.850065575157592e-05, - "loss": 0.1514, + "epoch": 0.4497690238083151, + "grad_norm": 0.39308440685272217, + "learning_rate": 1.70015398412779e-05, + "loss": 0.1108, "step": 8860 }, { - "epoch": 0.22502855692346743, - "grad_norm": 0.9593497514724731, - "learning_rate": 1.8499809620510218e-05, - "loss": 0.1329, + "epoch": 0.4500228437991776, + "grad_norm": 0.3973790407180786, + "learning_rate": 1.6999847708005484e-05, + "loss": 0.108, "step": 8865 }, { - "epoch": 0.22515547658332274, - "grad_norm": 0.9836851358413696, - "learning_rate": 1.8498963489444516e-05, - "loss": 0.1503, + "epoch": 0.4502766637900401, + "grad_norm": 0.571454644203186, + "learning_rate": 1.6998155574733067e-05, + "loss": 0.1107, "step": 8870 }, { - "epoch": 0.22528239624317806, - "grad_norm": 0.6252449154853821, - "learning_rate": 1.8498117358378815e-05, - "loss": 0.1606, + "epoch": 0.4505304837809026, + "grad_norm": 0.48195260763168335, + "learning_rate": 1.699646344146065e-05, + "loss": 0.118, "step": 8875 }, { - "epoch": 0.22540931590303337, - "grad_norm": 0.7561607956886292, - "learning_rate": 1.8497271227313113e-05, - "loss": 0.1685, + "epoch": 0.45078430377176504, + "grad_norm": 0.384822815656662, + "learning_rate": 1.6994771308188234e-05, + "loss": 0.1239, "step": 8880 }, { - "epoch": 0.22553623556288868, - "grad_norm": 0.6127364039421082, - "learning_rate": 1.849642509624741e-05, - "loss": 0.1312, + "epoch": 0.45103812376262753, + "grad_norm": 0.6219851970672607, + "learning_rate": 1.6993079174915818e-05, + "loss": 0.1087, "step": 8885 }, { - "epoch": 0.225663155222744, - "grad_norm": 0.8643683791160583, - "learning_rate": 1.849557896518171e-05, - "loss": 0.156, + "epoch": 0.45129194375349, + "grad_norm": 0.5098511576652527, + "learning_rate": 1.69913870416434e-05, + "loss": 0.1153, "step": 8890 }, { - "epoch": 0.2257900748825993, - "grad_norm": 0.6511363983154297, - "learning_rate": 1.849473283411601e-05, - "loss": 0.1582, + "epoch": 0.4515457637443525, + "grad_norm": 0.5047430992126465, + "learning_rate": 1.6989694908370985e-05, + "loss": 0.0957, "step": 8895 }, { - "epoch": 0.22591699454245462, - "grad_norm": 0.7328239679336548, - "learning_rate": 1.8493886703050303e-05, - "loss": 0.1469, + "epoch": 0.451799583735215, + "grad_norm": 0.5024707317352295, + "learning_rate": 1.698800277509857e-05, + "loss": 0.1006, "step": 8900 }, { - "epoch": 0.22604391420230993, - "grad_norm": 0.6195532083511353, - "learning_rate": 1.8493040571984602e-05, - "loss": 0.1463, + "epoch": 0.45205340372607744, + "grad_norm": 0.40146404504776, + "learning_rate": 1.6986310641826152e-05, + "loss": 0.1082, "step": 8905 }, { - "epoch": 0.22617083386216524, - "grad_norm": 0.8226732611656189, - "learning_rate": 1.84921944409189e-05, - "loss": 0.172, + "epoch": 0.45230722371693993, + "grad_norm": 1.9382678270339966, + "learning_rate": 1.6984618508553736e-05, + "loss": 0.098, "step": 8910 }, { - "epoch": 0.22629775352202056, - "grad_norm": 0.534443736076355, - "learning_rate": 1.84913483098532e-05, - "loss": 0.1353, + "epoch": 0.4525610437078024, + "grad_norm": 0.7064502835273743, + "learning_rate": 1.698292637528132e-05, + "loss": 0.1107, "step": 8915 }, { - "epoch": 0.22642467318187587, - "grad_norm": 0.4949004054069519, - "learning_rate": 1.8490502178787494e-05, - "loss": 0.1473, + "epoch": 0.4528148636986649, + "grad_norm": 0.4406428635120392, + "learning_rate": 1.6981234242008903e-05, + "loss": 0.0956, "step": 8920 }, { - "epoch": 0.22655159284173118, - "grad_norm": 0.6618390083312988, - "learning_rate": 1.8489656047721792e-05, - "loss": 0.1313, + "epoch": 0.4530686836895274, + "grad_norm": 0.39747583866119385, + "learning_rate": 1.6979542108736486e-05, + "loss": 0.105, "step": 8925 }, { - "epoch": 0.2266785125015865, - "grad_norm": 0.7024089097976685, - "learning_rate": 1.848880991665609e-05, - "loss": 0.1319, + "epoch": 0.45332250368038984, + "grad_norm": 0.5317738056182861, + "learning_rate": 1.697784997546407e-05, + "loss": 0.1111, "step": 8930 }, { - "epoch": 0.2268054321614418, - "grad_norm": 0.48897784948349, - "learning_rate": 1.848796378559039e-05, - "loss": 0.1456, + "epoch": 0.45357632367125233, + "grad_norm": 0.4411675035953522, + "learning_rate": 1.6976157842191653e-05, + "loss": 0.1174, "step": 8935 }, { - "epoch": 0.22693235182129712, - "grad_norm": 0.6299815773963928, - "learning_rate": 1.8487117654524687e-05, - "loss": 0.1509, + "epoch": 0.4538301436621148, + "grad_norm": 0.7328295111656189, + "learning_rate": 1.6974465708919237e-05, + "loss": 0.1025, "step": 8940 }, { - "epoch": 0.22705927148115243, - "grad_norm": 0.8269118070602417, - "learning_rate": 1.8486271523458986e-05, - "loss": 0.1426, + "epoch": 0.4540839636529773, + "grad_norm": 0.40330472588539124, + "learning_rate": 1.697277357564682e-05, + "loss": 0.0993, "step": 8945 }, { - "epoch": 0.22718619114100774, - "grad_norm": 0.46330881118774414, - "learning_rate": 1.8485425392393284e-05, - "loss": 0.1662, + "epoch": 0.4543377836438398, + "grad_norm": 1.0070425271987915, + "learning_rate": 1.69710814423744e-05, + "loss": 0.1033, "step": 8950 }, { - "epoch": 0.22731311080086306, - "grad_norm": 0.7436750531196594, - "learning_rate": 1.8484579261327582e-05, - "loss": 0.1893, + "epoch": 0.45459160363470225, + "grad_norm": 0.5507986545562744, + "learning_rate": 1.6969389309101988e-05, + "loss": 0.1129, "step": 8955 }, { - "epoch": 0.22744003046071837, - "grad_norm": 0.821678638458252, - "learning_rate": 1.8483733130261877e-05, - "loss": 0.1367, + "epoch": 0.45484542362556474, + "grad_norm": 0.48115500807762146, + "learning_rate": 1.6967697175829568e-05, + "loss": 0.1042, "step": 8960 }, { - "epoch": 0.22756695012057368, - "grad_norm": 0.7484177947044373, - "learning_rate": 1.8482886999196176e-05, - "loss": 0.1351, + "epoch": 0.45509924361642723, + "grad_norm": 0.4493533670902252, + "learning_rate": 1.696600504255715e-05, + "loss": 0.1132, "step": 8965 }, { - "epoch": 0.227693869780429, - "grad_norm": 0.6745798587799072, - "learning_rate": 1.8482040868130474e-05, - "loss": 0.1661, + "epoch": 0.4553530636072897, + "grad_norm": 0.9694324135780334, + "learning_rate": 1.6964312909284738e-05, + "loss": 0.1155, "step": 8970 }, { - "epoch": 0.2278207894402843, - "grad_norm": 2.0371339321136475, - "learning_rate": 1.8481194737064773e-05, - "loss": 0.1537, + "epoch": 0.4556068835981522, + "grad_norm": 0.3891000747680664, + "learning_rate": 1.696262077601232e-05, + "loss": 0.0998, "step": 8975 }, { - "epoch": 0.22794770910013962, - "grad_norm": 0.6884753704071045, - "learning_rate": 1.848034860599907e-05, - "loss": 0.1919, + "epoch": 0.45586070358901465, + "grad_norm": 0.4627774953842163, + "learning_rate": 1.6960928642739905e-05, + "loss": 0.1009, "step": 8980 }, { - "epoch": 0.22807462875999493, - "grad_norm": 1.7137778997421265, - "learning_rate": 1.847950247493337e-05, - "loss": 0.1621, + "epoch": 0.45611452357987714, + "grad_norm": 0.5629536509513855, + "learning_rate": 1.6959236509467485e-05, + "loss": 0.108, "step": 8985 }, { - "epoch": 0.22820154841985024, - "grad_norm": 1.309816837310791, - "learning_rate": 1.8478656343867668e-05, - "loss": 0.156, + "epoch": 0.45636834357073963, + "grad_norm": 0.4632164239883423, + "learning_rate": 1.695754437619507e-05, + "loss": 0.113, "step": 8990 }, { - "epoch": 0.22832846807970555, - "grad_norm": 0.8005272746086121, - "learning_rate": 1.8477810212801966e-05, - "loss": 0.1691, + "epoch": 0.4566221635616021, + "grad_norm": 0.5252170562744141, + "learning_rate": 1.6955852242922656e-05, + "loss": 0.1114, "step": 8995 }, { - "epoch": 0.22845538773956087, - "grad_norm": 0.769149124622345, - "learning_rate": 1.847696408173626e-05, - "loss": 0.1613, + "epoch": 0.4568759835524646, + "grad_norm": 0.7564627528190613, + "learning_rate": 1.6954160109650236e-05, + "loss": 0.1086, "step": 9000 }, { - "epoch": 0.22858230739941618, - "grad_norm": 1.1156504154205322, - "learning_rate": 1.847611795067056e-05, - "loss": 0.1443, + "epoch": 0.45712980354332705, + "grad_norm": 0.44219401478767395, + "learning_rate": 1.695246797637782e-05, + "loss": 0.1107, "step": 9005 }, { - "epoch": 0.2287092270592715, - "grad_norm": 0.7813001275062561, - "learning_rate": 1.8475271819604858e-05, - "loss": 0.1471, + "epoch": 0.45738362353418954, + "grad_norm": 0.5671635866165161, + "learning_rate": 1.6950775843105403e-05, + "loss": 0.1236, "step": 9010 }, { - "epoch": 0.2288361467191268, - "grad_norm": 0.7578045725822449, - "learning_rate": 1.8474425688539156e-05, - "loss": 0.1576, + "epoch": 0.45763744352505203, + "grad_norm": 0.42195212841033936, + "learning_rate": 1.6949083709832987e-05, + "loss": 0.1077, "step": 9015 }, { - "epoch": 0.22896306637898212, - "grad_norm": 0.5269511938095093, - "learning_rate": 1.8473579557473455e-05, - "loss": 0.1514, + "epoch": 0.4578912635159145, + "grad_norm": 0.36243775486946106, + "learning_rate": 1.6947391576560574e-05, + "loss": 0.0929, "step": 9020 }, { - "epoch": 0.22908998603883743, - "grad_norm": 0.735392153263092, - "learning_rate": 1.8472733426407753e-05, - "loss": 0.1778, + "epoch": 0.458145083506777, + "grad_norm": 0.4893646240234375, + "learning_rate": 1.6945699443288154e-05, + "loss": 0.1182, "step": 9025 }, { - "epoch": 0.22921690569869274, - "grad_norm": 0.6157693266868591, - "learning_rate": 1.847188729534205e-05, - "loss": 0.1284, + "epoch": 0.45839890349763945, + "grad_norm": 0.43038901686668396, + "learning_rate": 1.6944007310015737e-05, + "loss": 0.1121, "step": 9030 }, { - "epoch": 0.22934382535854803, - "grad_norm": 0.5767320394515991, - "learning_rate": 1.847104116427635e-05, - "loss": 0.1389, + "epoch": 0.45865272348850195, + "grad_norm": 0.4785013198852539, + "learning_rate": 1.694231517674332e-05, + "loss": 0.1063, "step": 9035 }, { - "epoch": 0.22947074501840334, - "grad_norm": 1.0665740966796875, - "learning_rate": 1.8470195033210645e-05, - "loss": 0.1663, + "epoch": 0.45890654347936444, + "grad_norm": 0.4438520073890686, + "learning_rate": 1.6940623043470904e-05, + "loss": 0.107, "step": 9040 }, { - "epoch": 0.22959766467825865, - "grad_norm": 0.6456816792488098, - "learning_rate": 1.8469348902144944e-05, - "loss": 0.1569, + "epoch": 0.45916036347022693, + "grad_norm": 0.5882023572921753, + "learning_rate": 1.6938930910198488e-05, + "loss": 0.1147, "step": 9045 }, { - "epoch": 0.22972458433811396, - "grad_norm": 0.6411804556846619, - "learning_rate": 1.8468502771079242e-05, - "loss": 0.142, + "epoch": 0.4594141834610894, + "grad_norm": 0.5411685705184937, + "learning_rate": 1.693723877692607e-05, + "loss": 0.1099, "step": 9050 }, { - "epoch": 0.22985150399796928, - "grad_norm": 1.195848822593689, - "learning_rate": 1.846765664001354e-05, - "loss": 0.1596, + "epoch": 0.45966800345195186, + "grad_norm": 0.9396799206733704, + "learning_rate": 1.6935546643653655e-05, + "loss": 0.1188, "step": 9055 }, { - "epoch": 0.2299784236578246, - "grad_norm": 0.9005599617958069, - "learning_rate": 1.8466810508947835e-05, - "loss": 0.1439, + "epoch": 0.45992182344281435, + "grad_norm": 0.5368686318397522, + "learning_rate": 1.693385451038124e-05, + "loss": 0.1063, "step": 9060 }, { - "epoch": 0.2301053433176799, - "grad_norm": 0.6290811896324158, - "learning_rate": 1.8465964377882134e-05, - "loss": 0.1401, + "epoch": 0.46017564343367684, + "grad_norm": 0.5517676472663879, + "learning_rate": 1.6932162377108822e-05, + "loss": 0.1013, "step": 9065 }, { - "epoch": 0.2302322629775352, - "grad_norm": 0.8922406435012817, - "learning_rate": 1.8465118246816432e-05, - "loss": 0.1499, + "epoch": 0.46042946342453933, + "grad_norm": 0.45235463976860046, + "learning_rate": 1.6930470243836406e-05, + "loss": 0.1123, "step": 9070 }, { - "epoch": 0.23035918263739052, - "grad_norm": 0.7505338788032532, - "learning_rate": 1.846427211575073e-05, - "loss": 0.1543, + "epoch": 0.4606832834154018, + "grad_norm": 0.5119823217391968, + "learning_rate": 1.692877811056399e-05, + "loss": 0.1046, "step": 9075 }, { - "epoch": 0.23048610229724584, - "grad_norm": 0.8415098190307617, - "learning_rate": 1.846342598468503e-05, - "loss": 0.1621, + "epoch": 0.46093710340626426, + "grad_norm": 0.48481494188308716, + "learning_rate": 1.6927085977291573e-05, + "loss": 0.1181, "step": 9080 }, { - "epoch": 0.23061302195710115, - "grad_norm": 0.6414968967437744, - "learning_rate": 1.8462579853619327e-05, - "loss": 0.1635, + "epoch": 0.46119092339712675, + "grad_norm": 0.8900187015533447, + "learning_rate": 1.6925393844019156e-05, + "loss": 0.0909, "step": 9085 }, { - "epoch": 0.23073994161695646, - "grad_norm": 0.9650170803070068, - "learning_rate": 1.8461733722553626e-05, - "loss": 0.1382, + "epoch": 0.46144474338798924, + "grad_norm": 0.5890236496925354, + "learning_rate": 1.692370171074674e-05, + "loss": 0.1115, "step": 9090 }, { - "epoch": 0.23086686127681177, - "grad_norm": 0.8145585656166077, - "learning_rate": 1.8460887591487924e-05, - "loss": 0.1401, + "epoch": 0.46169856337885173, + "grad_norm": 0.5484874248504639, + "learning_rate": 1.6922009577474323e-05, + "loss": 0.1222, "step": 9095 }, { - "epoch": 0.2309937809366671, - "grad_norm": 0.8770110607147217, - "learning_rate": 1.846004146042222e-05, - "loss": 0.181, + "epoch": 0.4619523833697142, + "grad_norm": 0.32354220747947693, + "learning_rate": 1.6920317444201907e-05, + "loss": 0.0988, "step": 9100 }, { - "epoch": 0.2311207005965224, - "grad_norm": 0.738203763961792, - "learning_rate": 1.8459195329356518e-05, - "loss": 0.1621, + "epoch": 0.46220620336057666, + "grad_norm": 0.4846709966659546, + "learning_rate": 1.691862531092949e-05, + "loss": 0.1184, "step": 9105 }, { - "epoch": 0.2312476202563777, - "grad_norm": 0.7033202052116394, - "learning_rate": 1.8458349198290816e-05, - "loss": 0.1621, + "epoch": 0.46246002335143915, + "grad_norm": 0.43833622336387634, + "learning_rate": 1.6916933177657074e-05, + "loss": 0.1041, "step": 9110 }, { - "epoch": 0.23137453991623302, - "grad_norm": 0.6500858068466187, - "learning_rate": 1.8457503067225114e-05, - "loss": 0.1245, + "epoch": 0.46271384334230165, + "grad_norm": 0.5856930613517761, + "learning_rate": 1.6915241044384658e-05, + "loss": 0.1109, "step": 9115 }, { - "epoch": 0.23150145957608834, - "grad_norm": 0.7656859755516052, - "learning_rate": 1.8456656936159413e-05, - "loss": 0.1416, + "epoch": 0.46296766333316414, + "grad_norm": 0.6962338089942932, + "learning_rate": 1.691354891111224e-05, + "loss": 0.1146, "step": 9120 }, { - "epoch": 0.23162837923594365, - "grad_norm": 0.6147520542144775, - "learning_rate": 1.845581080509371e-05, - "loss": 0.1398, + "epoch": 0.4632214833240266, + "grad_norm": 0.3463370203971863, + "learning_rate": 1.6911856777839825e-05, + "loss": 0.1099, "step": 9125 }, { - "epoch": 0.23175529889579896, - "grad_norm": 0.7322003841400146, - "learning_rate": 1.845496467402801e-05, - "loss": 0.1895, + "epoch": 0.46347530331488906, + "grad_norm": 1.7089934349060059, + "learning_rate": 1.6910164644567408e-05, + "loss": 0.117, "step": 9130 }, { - "epoch": 0.23188221855565427, - "grad_norm": 0.5321815013885498, - "learning_rate": 1.8454118542962308e-05, - "loss": 0.1744, + "epoch": 0.46372912330575156, + "grad_norm": 0.47701790928840637, + "learning_rate": 1.6908472511294992e-05, + "loss": 0.1109, "step": 9135 }, { - "epoch": 0.23200913821550959, - "grad_norm": 0.8465583324432373, - "learning_rate": 1.8453272411896603e-05, - "loss": 0.1124, + "epoch": 0.46398294329661405, + "grad_norm": 0.7937628626823425, + "learning_rate": 1.6906780378022572e-05, + "loss": 0.1002, "step": 9140 }, { - "epoch": 0.2321360578753649, - "grad_norm": 0.8303848505020142, - "learning_rate": 1.84524262808309e-05, - "loss": 0.1848, + "epoch": 0.46423676328747654, + "grad_norm": 0.608931303024292, + "learning_rate": 1.690508824475016e-05, + "loss": 0.1032, "step": 9145 }, { - "epoch": 0.2322629775352202, - "grad_norm": 0.7748087048530579, - "learning_rate": 1.84515801497652e-05, - "loss": 0.1246, + "epoch": 0.464490583278339, + "grad_norm": 0.5773627161979675, + "learning_rate": 1.6903396111477742e-05, + "loss": 0.1204, "step": 9150 }, { - "epoch": 0.23238989719507552, - "grad_norm": 0.5550708174705505, - "learning_rate": 1.8450734018699498e-05, - "loss": 0.1309, + "epoch": 0.46474440326920147, + "grad_norm": 0.48551470041275024, + "learning_rate": 1.6901703978205323e-05, + "loss": 0.1023, "step": 9155 }, { - "epoch": 0.23251681685493084, - "grad_norm": 0.5556688904762268, - "learning_rate": 1.8449887887633797e-05, - "loss": 0.152, + "epoch": 0.46499822326006396, + "grad_norm": 0.428069144487381, + "learning_rate": 1.690001184493291e-05, + "loss": 0.1174, "step": 9160 }, { - "epoch": 0.23264373651478615, - "grad_norm": 0.8689586520195007, - "learning_rate": 1.8449041756568095e-05, - "loss": 0.1165, + "epoch": 0.46525204325092645, + "grad_norm": 0.737295925617218, + "learning_rate": 1.689831971166049e-05, + "loss": 0.1024, "step": 9165 }, { - "epoch": 0.23277065617464146, - "grad_norm": 0.9611642956733704, - "learning_rate": 1.8448195625502393e-05, - "loss": 0.146, + "epoch": 0.46550586324178894, + "grad_norm": 0.9198458194732666, + "learning_rate": 1.6896627578388077e-05, + "loss": 0.1045, "step": 9170 }, { - "epoch": 0.23289757583449677, - "grad_norm": 0.7466115951538086, - "learning_rate": 1.8447349494436692e-05, - "loss": 0.1597, + "epoch": 0.4657596832326514, + "grad_norm": 0.46820440888404846, + "learning_rate": 1.689493544511566e-05, + "loss": 0.105, "step": 9175 }, { - "epoch": 0.23302449549435209, - "grad_norm": 0.6621206998825073, - "learning_rate": 1.8446503363370987e-05, - "loss": 0.138, + "epoch": 0.46601350322351387, + "grad_norm": 0.5066524147987366, + "learning_rate": 1.689324331184324e-05, + "loss": 0.0928, "step": 9180 }, { - "epoch": 0.2331514151542074, - "grad_norm": 0.6535630822181702, - "learning_rate": 1.8445657232305285e-05, - "loss": 0.1324, + "epoch": 0.46626732321437636, + "grad_norm": 0.45925962924957275, + "learning_rate": 1.6891551178570827e-05, + "loss": 0.1143, "step": 9185 }, { - "epoch": 0.2332783348140627, - "grad_norm": 0.6000819802284241, - "learning_rate": 1.8444811101239584e-05, - "loss": 0.1583, + "epoch": 0.46652114320523885, + "grad_norm": 0.5800960063934326, + "learning_rate": 1.6889859045298407e-05, + "loss": 0.1292, "step": 9190 }, { - "epoch": 0.23340525447391802, - "grad_norm": 0.9986957311630249, - "learning_rate": 1.8443964970173882e-05, - "loss": 0.1446, + "epoch": 0.46677496319610134, + "grad_norm": 0.47480955719947815, + "learning_rate": 1.688816691202599e-05, + "loss": 0.1098, "step": 9195 }, { - "epoch": 0.23353217413377333, - "grad_norm": 1.2913461923599243, - "learning_rate": 1.8443118839108177e-05, - "loss": 0.1524, + "epoch": 0.4670287831869638, + "grad_norm": 0.5127950310707092, + "learning_rate": 1.6886474778753578e-05, + "loss": 0.1229, "step": 9200 }, { - "epoch": 0.23365909379362862, - "grad_norm": 0.8417603969573975, - "learning_rate": 1.8442272708042475e-05, - "loss": 0.122, + "epoch": 0.4672826031778263, + "grad_norm": 0.6397584676742554, + "learning_rate": 1.6884782645481158e-05, + "loss": 0.1075, "step": 9205 }, { - "epoch": 0.23378601345348393, - "grad_norm": 1.279468059539795, - "learning_rate": 1.8441426576976774e-05, - "loss": 0.1746, + "epoch": 0.46753642316868876, + "grad_norm": 0.419527530670166, + "learning_rate": 1.688309051220874e-05, + "loss": 0.1033, "step": 9210 }, { - "epoch": 0.23391293311333924, - "grad_norm": 0.6960480809211731, - "learning_rate": 1.8440580445911072e-05, - "loss": 0.1529, + "epoch": 0.46779024315955126, + "grad_norm": 0.3961291015148163, + "learning_rate": 1.6881398378936325e-05, + "loss": 0.0988, "step": 9215 }, { - "epoch": 0.23403985277319456, - "grad_norm": 0.6945759654045105, - "learning_rate": 1.843973431484537e-05, - "loss": 0.1539, + "epoch": 0.46804406315041375, + "grad_norm": 0.5905468463897705, + "learning_rate": 1.687970624566391e-05, + "loss": 0.1044, "step": 9220 }, { - "epoch": 0.23416677243304987, - "grad_norm": 0.9431143403053284, - "learning_rate": 1.843888818377967e-05, - "loss": 0.1397, + "epoch": 0.4682978831412762, + "grad_norm": 0.390688419342041, + "learning_rate": 1.6878014112391496e-05, + "loss": 0.1111, "step": 9225 }, { - "epoch": 0.23429369209290518, - "grad_norm": 0.5605700612068176, - "learning_rate": 1.8438042052713967e-05, - "loss": 0.1474, + "epoch": 0.4685517031321387, + "grad_norm": 0.36093178391456604, + "learning_rate": 1.6876321979119076e-05, + "loss": 0.1002, "step": 9230 }, { - "epoch": 0.2344206117527605, - "grad_norm": 0.7627313137054443, - "learning_rate": 1.8437195921648266e-05, - "loss": 0.1376, + "epoch": 0.46880552312300117, + "grad_norm": 0.5973555445671082, + "learning_rate": 1.687462984584666e-05, + "loss": 0.1121, "step": 9235 }, { - "epoch": 0.2345475314126158, - "grad_norm": 0.6361751556396484, - "learning_rate": 1.843634979058256e-05, - "loss": 0.1371, + "epoch": 0.46905934311386366, + "grad_norm": 0.5031787157058716, + "learning_rate": 1.6872937712574243e-05, + "loss": 0.1064, "step": 9240 }, { - "epoch": 0.23467445107247112, - "grad_norm": 1.3345049619674683, - "learning_rate": 1.843550365951686e-05, - "loss": 0.144, + "epoch": 0.46931316310472615, + "grad_norm": 0.844380259513855, + "learning_rate": 1.6871245579301826e-05, + "loss": 0.1042, "step": 9245 }, { - "epoch": 0.23480137073232643, - "grad_norm": 1.035424828529358, - "learning_rate": 1.8434657528451158e-05, - "loss": 0.1647, + "epoch": 0.4695669830955886, + "grad_norm": 0.5998194217681885, + "learning_rate": 1.686955344602941e-05, + "loss": 0.1112, "step": 9250 }, { - "epoch": 0.23492829039218174, - "grad_norm": 0.7525562047958374, - "learning_rate": 1.8433811397385456e-05, - "loss": 0.159, + "epoch": 0.4698208030864511, + "grad_norm": 0.5319337248802185, + "learning_rate": 1.6867861312756994e-05, + "loss": 0.1065, "step": 9255 }, { - "epoch": 0.23505521005203706, - "grad_norm": 0.619185745716095, - "learning_rate": 1.8432965266319754e-05, - "loss": 0.1167, + "epoch": 0.47007462307731357, + "grad_norm": 0.43366721272468567, + "learning_rate": 1.6866169179484577e-05, + "loss": 0.1047, "step": 9260 }, { - "epoch": 0.23518212971189237, - "grad_norm": 0.9098873138427734, - "learning_rate": 1.8432119135254053e-05, - "loss": 0.1551, + "epoch": 0.47032844306817606, + "grad_norm": 0.4964733421802521, + "learning_rate": 1.686447704621216e-05, + "loss": 0.1027, "step": 9265 }, { - "epoch": 0.23530904937174768, - "grad_norm": 0.8784344792366028, - "learning_rate": 1.843127300418835e-05, - "loss": 0.1332, + "epoch": 0.47058226305903855, + "grad_norm": 0.44293656945228577, + "learning_rate": 1.6862784912939744e-05, + "loss": 0.112, "step": 9270 }, { - "epoch": 0.235435969031603, - "grad_norm": 0.8226946592330933, - "learning_rate": 1.843042687312265e-05, - "loss": 0.1491, + "epoch": 0.470836083049901, + "grad_norm": 0.2671841084957123, + "learning_rate": 1.6861092779667328e-05, + "loss": 0.1019, "step": 9275 }, { - "epoch": 0.2355628886914583, - "grad_norm": 0.819767415523529, - "learning_rate": 1.8429580742056945e-05, - "loss": 0.1351, + "epoch": 0.4710899030407635, + "grad_norm": 0.5117993354797363, + "learning_rate": 1.685940064639491e-05, + "loss": 0.1069, "step": 9280 }, { - "epoch": 0.23568980835131362, - "grad_norm": 1.2547740936279297, - "learning_rate": 1.8428734610991243e-05, - "loss": 0.1259, + "epoch": 0.47134372303162597, + "grad_norm": 0.49542441964149475, + "learning_rate": 1.6857708513122495e-05, + "loss": 0.1059, "step": 9285 }, { - "epoch": 0.23581672801116893, - "grad_norm": 0.5532656908035278, - "learning_rate": 1.842788847992554e-05, - "loss": 0.156, + "epoch": 0.47159754302248846, + "grad_norm": 0.4893092215061188, + "learning_rate": 1.685601637985008e-05, + "loss": 0.1054, "step": 9290 }, { - "epoch": 0.23594364767102424, - "grad_norm": 1.039598822593689, - "learning_rate": 1.842704234885984e-05, - "loss": 0.1382, + "epoch": 0.47185136301335096, + "grad_norm": 1.8522083759307861, + "learning_rate": 1.6854324246577662e-05, + "loss": 0.1002, "step": 9295 }, { - "epoch": 0.23607056733087955, - "grad_norm": 0.9208945631980896, - "learning_rate": 1.8426196217794138e-05, - "loss": 0.1604, + "epoch": 0.4721051830042134, + "grad_norm": 1.9006584882736206, + "learning_rate": 1.6852632113305245e-05, + "loss": 0.113, "step": 9300 }, { - "epoch": 0.23619748699073487, - "grad_norm": 0.5946624279022217, - "learning_rate": 1.8425350086728437e-05, - "loss": 0.1704, + "epoch": 0.4723590029950759, + "grad_norm": 0.43772315979003906, + "learning_rate": 1.685093998003283e-05, + "loss": 0.1172, "step": 9305 }, { - "epoch": 0.23632440665059018, - "grad_norm": 0.5294488072395325, - "learning_rate": 1.8424503955662735e-05, - "loss": 0.1128, + "epoch": 0.4726128229859384, + "grad_norm": 0.5562046766281128, + "learning_rate": 1.6849247846760412e-05, + "loss": 0.1037, "step": 9310 }, { - "epoch": 0.2364513263104455, - "grad_norm": 0.9247378706932068, - "learning_rate": 1.8423657824597033e-05, - "loss": 0.115, + "epoch": 0.47286664297680087, + "grad_norm": 0.4431091248989105, + "learning_rate": 1.6847555713487996e-05, + "loss": 0.102, "step": 9315 }, { - "epoch": 0.2365782459703008, - "grad_norm": 0.8662728071212769, - "learning_rate": 1.8422811693531332e-05, - "loss": 0.1347, + "epoch": 0.47312046296766336, + "grad_norm": 0.4664026200771332, + "learning_rate": 1.684586358021558e-05, + "loss": 0.1014, "step": 9320 }, { - "epoch": 0.23670516563015612, - "grad_norm": 1.0140321254730225, - "learning_rate": 1.8421965562465627e-05, - "loss": 0.1833, + "epoch": 0.4733742829585258, + "grad_norm": 0.4194331765174866, + "learning_rate": 1.6844171446943163e-05, + "loss": 0.1094, "step": 9325 }, { - "epoch": 0.23683208529001143, - "grad_norm": 0.7351956963539124, - "learning_rate": 1.8421119431399925e-05, - "loss": 0.1302, + "epoch": 0.4736281029493883, + "grad_norm": 0.4214523732662201, + "learning_rate": 1.6842479313670747e-05, + "loss": 0.0985, "step": 9330 }, { - "epoch": 0.23695900494986674, - "grad_norm": 0.711276650428772, - "learning_rate": 1.8420273300334224e-05, - "loss": 0.1448, + "epoch": 0.4738819229402508, + "grad_norm": 0.4564267098903656, + "learning_rate": 1.684078718039833e-05, + "loss": 0.0984, "step": 9335 }, { - "epoch": 0.23708592460972205, - "grad_norm": 0.5574631094932556, - "learning_rate": 1.8419427169268522e-05, - "loss": 0.1532, + "epoch": 0.47413574293111327, + "grad_norm": 0.6111764907836914, + "learning_rate": 1.6839095047125914e-05, + "loss": 0.1138, "step": 9340 }, { - "epoch": 0.23721284426957737, - "grad_norm": 0.49244558811187744, - "learning_rate": 1.8418581038202817e-05, - "loss": 0.129, + "epoch": 0.47438956292197576, + "grad_norm": 0.8785223364830017, + "learning_rate": 1.6837402913853494e-05, + "loss": 0.1066, "step": 9345 }, { - "epoch": 0.23733976392943268, - "grad_norm": 0.5064070224761963, - "learning_rate": 1.8417734907137116e-05, - "loss": 0.1066, + "epoch": 0.4746433829128382, + "grad_norm": 0.5163254737854004, + "learning_rate": 1.683571078058108e-05, + "loss": 0.1086, "step": 9350 }, { - "epoch": 0.237466683589288, - "grad_norm": 0.915599524974823, - "learning_rate": 1.8416888776071414e-05, - "loss": 0.1284, + "epoch": 0.4748972029037007, + "grad_norm": 0.45714399218559265, + "learning_rate": 1.6834018647308664e-05, + "loss": 0.1137, "step": 9355 }, { - "epoch": 0.2375936032491433, - "grad_norm": 0.7994723320007324, - "learning_rate": 1.8416042645005712e-05, - "loss": 0.1445, + "epoch": 0.4751510228945632, + "grad_norm": 0.5146034359931946, + "learning_rate": 1.6832326514036248e-05, + "loss": 0.0973, "step": 9360 }, { - "epoch": 0.23772052290899862, - "grad_norm": 0.6893892288208008, - "learning_rate": 1.841519651394001e-05, - "loss": 0.1334, + "epoch": 0.47540484288542567, + "grad_norm": 0.5823367834091187, + "learning_rate": 1.683063438076383e-05, + "loss": 0.1099, "step": 9365 }, { - "epoch": 0.23784744256885393, - "grad_norm": 0.6626500487327576, - "learning_rate": 1.841435038287431e-05, - "loss": 0.1341, + "epoch": 0.47565866287628816, + "grad_norm": 0.472186416387558, + "learning_rate": 1.682894224749141e-05, + "loss": 0.1074, "step": 9370 }, { - "epoch": 0.23797436222870924, - "grad_norm": 0.6614676713943481, - "learning_rate": 1.8413504251808608e-05, - "loss": 0.149, + "epoch": 0.4759124828671506, + "grad_norm": 0.817765474319458, + "learning_rate": 1.6827250114219e-05, + "loss": 0.1037, "step": 9375 }, { - "epoch": 0.23810128188856453, - "grad_norm": 0.6561861634254456, - "learning_rate": 1.8412658120742906e-05, - "loss": 0.1211, + "epoch": 0.4761663028580131, + "grad_norm": 0.43095797300338745, + "learning_rate": 1.6825557980946582e-05, + "loss": 0.1081, "step": 9380 }, { - "epoch": 0.23822820154841984, - "grad_norm": 0.7039099931716919, - "learning_rate": 1.84118119896772e-05, - "loss": 0.1449, + "epoch": 0.4764201228488756, + "grad_norm": 0.40991702675819397, + "learning_rate": 1.6823865847674162e-05, + "loss": 0.1172, "step": 9385 }, { - "epoch": 0.23835512120827515, - "grad_norm": 0.5464332699775696, - "learning_rate": 1.84109658586115e-05, - "loss": 0.1507, + "epoch": 0.4766739428397381, + "grad_norm": 0.39121660590171814, + "learning_rate": 1.682217371440175e-05, + "loss": 0.1066, "step": 9390 }, { - "epoch": 0.23848204086813046, - "grad_norm": 0.5760924816131592, - "learning_rate": 1.8410119727545798e-05, - "loss": 0.1334, + "epoch": 0.47692776283060057, + "grad_norm": 0.5421929359436035, + "learning_rate": 1.682048158112933e-05, + "loss": 0.0957, "step": 9395 }, { - "epoch": 0.23860896052798578, - "grad_norm": 0.9390232563018799, - "learning_rate": 1.8409273596480096e-05, - "loss": 0.1673, + "epoch": 0.477181582821463, + "grad_norm": 0.44107937812805176, + "learning_rate": 1.6818789447856913e-05, + "loss": 0.1129, "step": 9400 }, { - "epoch": 0.2387358801878411, - "grad_norm": 0.49589455127716064, - "learning_rate": 1.8408427465414395e-05, - "loss": 0.1344, + "epoch": 0.4774354028123255, + "grad_norm": 0.6062149405479431, + "learning_rate": 1.68170973145845e-05, + "loss": 0.1152, "step": 9405 }, { - "epoch": 0.2388627998476964, - "grad_norm": 0.7731547951698303, - "learning_rate": 1.8407581334348693e-05, - "loss": 0.1423, + "epoch": 0.477689222803188, + "grad_norm": 0.4286916255950928, + "learning_rate": 1.681540518131208e-05, + "loss": 0.1114, "step": 9410 }, { - "epoch": 0.2389897195075517, - "grad_norm": 0.5858550071716309, - "learning_rate": 1.840673520328299e-05, - "loss": 0.1526, + "epoch": 0.4779430427940505, + "grad_norm": 0.42873385548591614, + "learning_rate": 1.6813713048039667e-05, + "loss": 0.1024, "step": 9415 }, { - "epoch": 0.23911663916740702, - "grad_norm": 0.9642583727836609, - "learning_rate": 1.840588907221729e-05, - "loss": 0.1622, + "epoch": 0.4781968627849129, + "grad_norm": 0.5670800805091858, + "learning_rate": 1.6812020914767247e-05, + "loss": 0.1174, "step": 9420 }, { - "epoch": 0.23924355882726234, - "grad_norm": 0.6050362586975098, - "learning_rate": 1.8405042941151585e-05, - "loss": 0.1243, + "epoch": 0.4784506827757754, + "grad_norm": 0.4170010983943939, + "learning_rate": 1.681032878149483e-05, + "loss": 0.0972, "step": 9425 }, { - "epoch": 0.23937047848711765, - "grad_norm": 0.4369475841522217, - "learning_rate": 1.8404196810085883e-05, - "loss": 0.1258, + "epoch": 0.4787045027666379, + "grad_norm": 0.5890213251113892, + "learning_rate": 1.6808636648222418e-05, + "loss": 0.1212, "step": 9430 }, { - "epoch": 0.23949739814697296, - "grad_norm": 0.8763788342475891, - "learning_rate": 1.840335067902018e-05, - "loss": 0.1606, + "epoch": 0.4789583227575004, + "grad_norm": 0.8447275757789612, + "learning_rate": 1.6806944514949998e-05, + "loss": 0.107, "step": 9435 }, { - "epoch": 0.23962431780682827, - "grad_norm": 0.7296867966651917, - "learning_rate": 1.840250454795448e-05, - "loss": 0.1718, + "epoch": 0.4792121427483629, + "grad_norm": 0.41049903631210327, + "learning_rate": 1.680525238167758e-05, + "loss": 0.1117, "step": 9440 }, { - "epoch": 0.2397512374666836, - "grad_norm": 0.461323618888855, - "learning_rate": 1.840165841688878e-05, - "loss": 0.1378, + "epoch": 0.4794659627392253, + "grad_norm": 0.5393239855766296, + "learning_rate": 1.6803560248405165e-05, + "loss": 0.0978, "step": 9445 }, { - "epoch": 0.2398781571265389, - "grad_norm": 0.6685863733291626, - "learning_rate": 1.8400812285823077e-05, - "loss": 0.1416, + "epoch": 0.4797197827300878, + "grad_norm": 0.5582802295684814, + "learning_rate": 1.680186811513275e-05, + "loss": 0.1057, "step": 9450 }, { - "epoch": 0.2400050767863942, - "grad_norm": 1.0126062631607056, - "learning_rate": 1.8399966154757375e-05, - "loss": 0.1509, + "epoch": 0.4799736027209503, + "grad_norm": 0.6471379995346069, + "learning_rate": 1.6800175981860332e-05, + "loss": 0.1101, "step": 9455 }, { - "epoch": 0.24013199644624952, - "grad_norm": 0.6700828671455383, - "learning_rate": 1.8399120023691674e-05, - "loss": 0.1556, + "epoch": 0.4802274227118128, + "grad_norm": 0.5534394979476929, + "learning_rate": 1.6798483848587915e-05, + "loss": 0.1015, "step": 9460 }, { - "epoch": 0.24025891610610484, - "grad_norm": 0.7165868878364563, - "learning_rate": 1.839827389262597e-05, - "loss": 0.1692, + "epoch": 0.4804812427026753, + "grad_norm": 0.43192145228385925, + "learning_rate": 1.67967917153155e-05, + "loss": 0.102, "step": 9465 }, { - "epoch": 0.24038583576596015, - "grad_norm": 0.572209358215332, - "learning_rate": 1.8397427761560267e-05, - "loss": 0.1218, + "epoch": 0.4807350626935377, + "grad_norm": 0.38189658522605896, + "learning_rate": 1.6795099582043083e-05, + "loss": 0.1017, "step": 9470 }, { - "epoch": 0.24051275542581546, - "grad_norm": 0.712011992931366, - "learning_rate": 1.8396581630494565e-05, - "loss": 0.1474, + "epoch": 0.4809888826844002, + "grad_norm": 0.4700341820716858, + "learning_rate": 1.6793407448770666e-05, + "loss": 0.1005, "step": 9475 }, { - "epoch": 0.24063967508567077, - "grad_norm": 0.8210466504096985, - "learning_rate": 1.8395735499428864e-05, - "loss": 0.1423, + "epoch": 0.4812427026752627, + "grad_norm": 0.6079494953155518, + "learning_rate": 1.679171531549825e-05, + "loss": 0.0978, "step": 9480 }, { - "epoch": 0.24076659474552609, - "grad_norm": 0.8295236229896545, - "learning_rate": 1.839488936836316e-05, - "loss": 0.1508, + "epoch": 0.4814965226661252, + "grad_norm": 0.8428679704666138, + "learning_rate": 1.6790023182225833e-05, + "loss": 0.1251, "step": 9485 }, { - "epoch": 0.2408935144053814, - "grad_norm": 0.8567714691162109, - "learning_rate": 1.8394043237297457e-05, - "loss": 0.1422, + "epoch": 0.4817503426569877, + "grad_norm": 0.48023349046707153, + "learning_rate": 1.6788331048953417e-05, + "loss": 0.1109, "step": 9490 }, { - "epoch": 0.2410204340652367, - "grad_norm": 0.6892949342727661, - "learning_rate": 1.8393197106231756e-05, - "loss": 0.1441, + "epoch": 0.4820041626478501, + "grad_norm": 0.40779995918273926, + "learning_rate": 1.6786638915681e-05, + "loss": 0.1191, "step": 9495 }, { - "epoch": 0.24114735372509202, - "grad_norm": 0.7725691199302673, - "learning_rate": 1.8392350975166054e-05, - "loss": 0.12, + "epoch": 0.4822579826387126, + "grad_norm": 0.4184429943561554, + "learning_rate": 1.6784946782408584e-05, + "loss": 0.0975, "step": 9500 }, { - "epoch": 0.24127427338494734, - "grad_norm": 0.7265022993087769, - "learning_rate": 1.8391504844100352e-05, - "loss": 0.1533, + "epoch": 0.4825118026295751, + "grad_norm": 0.39694687724113464, + "learning_rate": 1.6783254649136167e-05, + "loss": 0.0985, "step": 9505 }, { - "epoch": 0.24140119304480265, - "grad_norm": 0.7284069061279297, - "learning_rate": 1.839065871303465e-05, - "loss": 0.162, + "epoch": 0.4827656226204376, + "grad_norm": 0.503432035446167, + "learning_rate": 1.678156251586375e-05, + "loss": 0.1057, "step": 9510 }, { - "epoch": 0.24152811270465796, - "grad_norm": 0.684155285358429, - "learning_rate": 1.838981258196895e-05, - "loss": 0.1504, + "epoch": 0.4830194426113001, + "grad_norm": 0.43741822242736816, + "learning_rate": 1.6779870382591334e-05, + "loss": 0.1002, "step": 9515 }, { - "epoch": 0.24165503236451327, - "grad_norm": 0.641552209854126, - "learning_rate": 1.8388966450903248e-05, - "loss": 0.1426, + "epoch": 0.4832732626021625, + "grad_norm": 0.49856293201446533, + "learning_rate": 1.6778178249318918e-05, + "loss": 0.1194, "step": 9520 }, { - "epoch": 0.24178195202436858, - "grad_norm": 0.7501075267791748, - "learning_rate": 1.8388120319837543e-05, - "loss": 0.1652, + "epoch": 0.483527082593025, + "grad_norm": 0.46641266345977783, + "learning_rate": 1.67764861160465e-05, + "loss": 0.0864, "step": 9525 }, { - "epoch": 0.2419088716842239, - "grad_norm": 1.0258420705795288, - "learning_rate": 1.838727418877184e-05, - "loss": 0.1435, + "epoch": 0.4837809025838875, + "grad_norm": 0.4092983603477478, + "learning_rate": 1.6774793982774085e-05, + "loss": 0.1078, "step": 9530 }, { - "epoch": 0.2420357913440792, - "grad_norm": 0.66619473695755, - "learning_rate": 1.838642805770614e-05, - "loss": 0.1515, + "epoch": 0.48403472257475, + "grad_norm": 0.4319418668746948, + "learning_rate": 1.677310184950167e-05, + "loss": 0.1056, "step": 9535 }, { - "epoch": 0.24216271100393452, - "grad_norm": 0.7554883360862732, - "learning_rate": 1.8385581926640438e-05, - "loss": 0.1419, + "epoch": 0.4842885425656125, + "grad_norm": 0.3443421423435211, + "learning_rate": 1.6771409716229252e-05, + "loss": 0.098, "step": 9540 }, { - "epoch": 0.24228963066378983, - "grad_norm": 0.7239290475845337, - "learning_rate": 1.8384735795574736e-05, - "loss": 0.1419, + "epoch": 0.4845423625564749, + "grad_norm": 0.5126322507858276, + "learning_rate": 1.6769717582956836e-05, + "loss": 0.1052, "step": 9545 }, { - "epoch": 0.24241655032364512, - "grad_norm": 2.0859267711639404, - "learning_rate": 1.8383889664509035e-05, - "loss": 0.1716, + "epoch": 0.4847961825473374, + "grad_norm": 0.4677981436252594, + "learning_rate": 1.6768025449684416e-05, + "loss": 0.1123, "step": 9550 }, { - "epoch": 0.24254346998350043, - "grad_norm": 0.6063077449798584, - "learning_rate": 1.8383043533443333e-05, - "loss": 0.1222, + "epoch": 0.4850500025381999, + "grad_norm": 0.5052584409713745, + "learning_rate": 1.6766333316412003e-05, + "loss": 0.1084, "step": 9555 }, { - "epoch": 0.24267038964335574, - "grad_norm": 0.7551079392433167, - "learning_rate": 1.838219740237763e-05, - "loss": 0.1681, + "epoch": 0.4853038225290624, + "grad_norm": 0.5080714821815491, + "learning_rate": 1.6764641183139586e-05, + "loss": 0.1052, "step": 9560 }, { - "epoch": 0.24279730930321106, - "grad_norm": 0.6871835589408875, - "learning_rate": 1.8381351271311926e-05, - "loss": 0.1281, + "epoch": 0.4855576425199249, + "grad_norm": 0.4302172064781189, + "learning_rate": 1.676294904986717e-05, + "loss": 0.1023, "step": 9565 }, { - "epoch": 0.24292422896306637, - "grad_norm": 0.600294828414917, - "learning_rate": 1.8380505140246225e-05, - "loss": 0.1417, + "epoch": 0.48581146251078733, + "grad_norm": 0.4493500292301178, + "learning_rate": 1.6761256916594753e-05, + "loss": 0.1187, "step": 9570 }, { - "epoch": 0.24305114862292168, - "grad_norm": 0.6317391991615295, - "learning_rate": 1.8379659009180523e-05, - "loss": 0.1483, + "epoch": 0.4860652825016498, + "grad_norm": 0.3389817178249359, + "learning_rate": 1.6759564783322334e-05, + "loss": 0.1063, "step": 9575 }, { - "epoch": 0.243178068282777, - "grad_norm": 0.6905192136764526, - "learning_rate": 1.8378812878114822e-05, - "loss": 0.1303, + "epoch": 0.4863191024925123, + "grad_norm": 0.4281669855117798, + "learning_rate": 1.675787265004992e-05, + "loss": 0.1004, "step": 9580 }, { - "epoch": 0.2433049879426323, - "grad_norm": 0.7126545906066895, - "learning_rate": 1.837796674704912e-05, - "loss": 0.1588, + "epoch": 0.4865729224833748, + "grad_norm": 0.5047438740730286, + "learning_rate": 1.6756180516777504e-05, + "loss": 0.1168, "step": 9585 }, { - "epoch": 0.24343190760248762, - "grad_norm": 0.6566731929779053, - "learning_rate": 1.837712061598342e-05, - "loss": 0.1273, + "epoch": 0.4868267424742373, + "grad_norm": 0.6686415076255798, + "learning_rate": 1.6754488383505084e-05, + "loss": 0.1013, "step": 9590 }, { - "epoch": 0.24355882726234293, - "grad_norm": 0.5314049124717712, - "learning_rate": 1.8376274484917717e-05, - "loss": 0.1448, + "epoch": 0.48708056246509973, + "grad_norm": 0.5081450939178467, + "learning_rate": 1.675279625023267e-05, + "loss": 0.1007, "step": 9595 }, { - "epoch": 0.24368574692219824, - "grad_norm": 4.125439643859863, - "learning_rate": 1.8375428353852015e-05, - "loss": 0.1848, + "epoch": 0.4873343824559622, + "grad_norm": 0.5452947616577148, + "learning_rate": 1.675110411696025e-05, + "loss": 0.1072, "step": 9600 }, { - "epoch": 0.24381266658205356, - "grad_norm": 0.4344308078289032, - "learning_rate": 1.837458222278631e-05, - "loss": 0.133, + "epoch": 0.4875882024468247, + "grad_norm": 0.3936508595943451, + "learning_rate": 1.6749411983687838e-05, + "loss": 0.0975, "step": 9605 }, { - "epoch": 0.24393958624190887, - "grad_norm": 0.5637083649635315, - "learning_rate": 1.837373609172061e-05, - "loss": 0.1289, + "epoch": 0.4878420224376872, + "grad_norm": 0.4514859914779663, + "learning_rate": 1.6747719850415422e-05, + "loss": 0.0993, "step": 9610 }, { - "epoch": 0.24406650590176418, - "grad_norm": 0.8519596457481384, - "learning_rate": 1.8372889960654907e-05, - "loss": 0.1355, + "epoch": 0.4880958424285497, + "grad_norm": 0.7531886696815491, + "learning_rate": 1.6746027717143002e-05, + "loss": 0.099, "step": 9615 }, { - "epoch": 0.2441934255616195, - "grad_norm": 0.717745840549469, - "learning_rate": 1.8372043829589206e-05, - "loss": 0.1371, + "epoch": 0.48834966241941213, + "grad_norm": 0.5042106509208679, + "learning_rate": 1.674433558387059e-05, + "loss": 0.1125, "step": 9620 }, { - "epoch": 0.2443203452214748, - "grad_norm": 0.6815947890281677, - "learning_rate": 1.83711976985235e-05, - "loss": 0.1562, + "epoch": 0.4886034824102746, + "grad_norm": 0.6559919714927673, + "learning_rate": 1.674264345059817e-05, + "loss": 0.1166, "step": 9625 }, { - "epoch": 0.24444726488133012, - "grad_norm": 0.5920616388320923, - "learning_rate": 1.83703515674578e-05, - "loss": 0.1262, + "epoch": 0.4888573024011371, + "grad_norm": 0.4015331566333771, + "learning_rate": 1.6740951317325753e-05, + "loss": 0.1121, "step": 9630 }, { - "epoch": 0.24457418454118543, - "grad_norm": 0.748652458190918, - "learning_rate": 1.8369505436392097e-05, - "loss": 0.125, + "epoch": 0.4891111223919996, + "grad_norm": 0.38791272044181824, + "learning_rate": 1.673925918405334e-05, + "loss": 0.1058, "step": 9635 }, { - "epoch": 0.24470110420104074, - "grad_norm": 0.7067185640335083, - "learning_rate": 1.8368659305326396e-05, - "loss": 0.1414, + "epoch": 0.4893649423828621, + "grad_norm": 0.6192878484725952, + "learning_rate": 1.673756705078092e-05, + "loss": 0.1029, "step": 9640 }, { - "epoch": 0.24482802386089605, - "grad_norm": 0.7397181987762451, - "learning_rate": 1.8367813174260694e-05, - "loss": 0.128, + "epoch": 0.48961876237372454, + "grad_norm": 0.3586585819721222, + "learning_rate": 1.6735874917508503e-05, + "loss": 0.1003, "step": 9645 }, { - "epoch": 0.24495494352075137, - "grad_norm": 0.7413289546966553, - "learning_rate": 1.8366967043194993e-05, - "loss": 0.1297, + "epoch": 0.48987258236458703, + "grad_norm": 0.5473580956459045, + "learning_rate": 1.6734182784236087e-05, + "loss": 0.102, "step": 9650 }, { - "epoch": 0.24508186318060668, - "grad_norm": 1.1668275594711304, - "learning_rate": 1.836612091212929e-05, - "loss": 0.1443, + "epoch": 0.4901264023554495, + "grad_norm": 0.4406992495059967, + "learning_rate": 1.673249065096367e-05, + "loss": 0.1019, "step": 9655 }, { - "epoch": 0.245208782840462, - "grad_norm": 0.925081193447113, - "learning_rate": 1.836527478106359e-05, - "loss": 0.1264, + "epoch": 0.490380222346312, + "grad_norm": 0.6175484657287598, + "learning_rate": 1.6730798517691257e-05, + "loss": 0.0987, "step": 9660 }, { - "epoch": 0.2453357025003173, - "grad_norm": 1.082558274269104, - "learning_rate": 1.8364428649997884e-05, - "loss": 0.1384, + "epoch": 0.4906340423371745, + "grad_norm": 0.5513244271278381, + "learning_rate": 1.6729106384418837e-05, + "loss": 0.094, "step": 9665 }, { - "epoch": 0.24546262216017262, - "grad_norm": 0.6193867921829224, - "learning_rate": 1.8363582518932183e-05, - "loss": 0.1209, + "epoch": 0.49088786232803694, + "grad_norm": 0.5684154033660889, + "learning_rate": 1.672741425114642e-05, + "loss": 0.0943, "step": 9670 }, { - "epoch": 0.24558954182002793, - "grad_norm": 0.6445197463035583, - "learning_rate": 1.836273638786648e-05, - "loss": 0.1398, + "epoch": 0.49114168231889943, + "grad_norm": 0.5040678381919861, + "learning_rate": 1.6725722117874004e-05, + "loss": 0.1, "step": 9675 }, { - "epoch": 0.24571646147988324, - "grad_norm": 0.7374609112739563, - "learning_rate": 1.836189025680078e-05, - "loss": 0.1459, + "epoch": 0.4913955023097619, + "grad_norm": 0.5477058291435242, + "learning_rate": 1.6724029984601588e-05, + "loss": 0.098, "step": 9680 }, { - "epoch": 0.24584338113973855, - "grad_norm": 0.5440840721130371, - "learning_rate": 1.8361044125735078e-05, - "loss": 0.1313, + "epoch": 0.4916493223006244, + "grad_norm": 0.5544368028640747, + "learning_rate": 1.672233785132917e-05, + "loss": 0.1033, "step": 9685 }, { - "epoch": 0.24597030079959387, - "grad_norm": 0.5820302367210388, - "learning_rate": 1.8360197994669376e-05, - "loss": 0.1364, + "epoch": 0.49190314229148685, + "grad_norm": 0.3991355001926422, + "learning_rate": 1.6720645718056755e-05, + "loss": 0.1013, "step": 9690 }, { - "epoch": 0.24609722045944918, - "grad_norm": 0.6991755366325378, - "learning_rate": 1.8359351863603675e-05, - "loss": 0.1424, + "epoch": 0.49215696228234934, + "grad_norm": 0.39113104343414307, + "learning_rate": 1.671895358478434e-05, + "loss": 0.0954, "step": 9695 }, { - "epoch": 0.2462241401193045, - "grad_norm": 0.6271185278892517, - "learning_rate": 1.8358505732537973e-05, - "loss": 0.1317, + "epoch": 0.49241078227321183, + "grad_norm": 0.5807805061340332, + "learning_rate": 1.6717261451511922e-05, + "loss": 0.0946, "step": 9700 }, { - "epoch": 0.2463510597791598, - "grad_norm": 0.9449744820594788, - "learning_rate": 1.8357659601472268e-05, - "loss": 0.1428, + "epoch": 0.4926646022640743, + "grad_norm": 0.6040493249893188, + "learning_rate": 1.6715569318239506e-05, + "loss": 0.1099, "step": 9705 }, { - "epoch": 0.24647797943901512, - "grad_norm": 0.7532095909118652, - "learning_rate": 1.8356813470406567e-05, - "loss": 0.1408, + "epoch": 0.4929184222549368, + "grad_norm": 0.8760645985603333, + "learning_rate": 1.671387718496709e-05, + "loss": 0.1111, "step": 9710 }, { - "epoch": 0.24660489909887043, - "grad_norm": 0.9332950711250305, - "learning_rate": 1.8355967339340865e-05, - "loss": 0.169, + "epoch": 0.49317224224579925, + "grad_norm": 0.7517929077148438, + "learning_rate": 1.6712185051694673e-05, + "loss": 0.1055, "step": 9715 }, { - "epoch": 0.2467318187587257, - "grad_norm": 0.5556686520576477, - "learning_rate": 1.8355121208275163e-05, - "loss": 0.1325, + "epoch": 0.49342606223666174, + "grad_norm": 0.5098727345466614, + "learning_rate": 1.6710492918422256e-05, + "loss": 0.0984, "step": 9720 }, { - "epoch": 0.24685873841858103, - "grad_norm": 1.2110345363616943, - "learning_rate": 1.8354275077209462e-05, - "loss": 0.1228, + "epoch": 0.49367988222752424, + "grad_norm": 0.7740623354911804, + "learning_rate": 1.670880078514984e-05, + "loss": 0.1011, "step": 9725 }, { - "epoch": 0.24698565807843634, - "grad_norm": 0.6278002262115479, - "learning_rate": 1.835342894614376e-05, - "loss": 0.1471, + "epoch": 0.4939337022183867, + "grad_norm": 0.621465802192688, + "learning_rate": 1.6707108651877423e-05, + "loss": 0.1139, "step": 9730 }, { - "epoch": 0.24711257773829165, - "grad_norm": 1.0397270917892456, - "learning_rate": 1.835258281507806e-05, - "loss": 0.152, + "epoch": 0.4941875222092492, + "grad_norm": 0.5187132358551025, + "learning_rate": 1.6705416518605007e-05, + "loss": 0.1043, "step": 9735 }, { - "epoch": 0.24723949739814696, - "grad_norm": 0.6188620924949646, - "learning_rate": 1.8351736684012357e-05, - "loss": 0.1328, + "epoch": 0.49444134220011166, + "grad_norm": 0.42478689551353455, + "learning_rate": 1.670372438533259e-05, + "loss": 0.1017, "step": 9740 }, { - "epoch": 0.24736641705800227, - "grad_norm": 0.8731056451797485, - "learning_rate": 1.8350890552946652e-05, - "loss": 0.1472, + "epoch": 0.49469516219097415, + "grad_norm": 0.45251190662384033, + "learning_rate": 1.6702032252060174e-05, + "loss": 0.114, "step": 9745 }, { - "epoch": 0.2474933367178576, - "grad_norm": 0.637445867061615, - "learning_rate": 1.835004442188095e-05, - "loss": 0.1386, + "epoch": 0.49494898218183664, + "grad_norm": 0.48776471614837646, + "learning_rate": 1.6700340118787758e-05, + "loss": 0.107, "step": 9750 }, { - "epoch": 0.2476202563777129, - "grad_norm": 0.6661638021469116, - "learning_rate": 1.834919829081525e-05, - "loss": 0.1241, + "epoch": 0.49520280217269913, + "grad_norm": 1.0470062494277954, + "learning_rate": 1.669864798551534e-05, + "loss": 0.0999, "step": 9755 }, { - "epoch": 0.2477471760375682, - "grad_norm": 0.6318964958190918, - "learning_rate": 1.8348352159749547e-05, - "loss": 0.1521, + "epoch": 0.4954566221635616, + "grad_norm": 0.5062408447265625, + "learning_rate": 1.6696955852242925e-05, + "loss": 0.1076, "step": 9760 }, { - "epoch": 0.24787409569742352, - "grad_norm": 0.5989259481430054, - "learning_rate": 1.8347506028683842e-05, - "loss": 0.1281, + "epoch": 0.49571044215442406, + "grad_norm": 0.35752901434898376, + "learning_rate": 1.6695263718970508e-05, + "loss": 0.1169, "step": 9765 }, { - "epoch": 0.24800101535727884, - "grad_norm": 1.3657300472259521, - "learning_rate": 1.834665989761814e-05, - "loss": 0.1372, + "epoch": 0.49596426214528655, + "grad_norm": 0.4572597146034241, + "learning_rate": 1.6693571585698092e-05, + "loss": 0.0932, "step": 9770 }, { - "epoch": 0.24812793501713415, - "grad_norm": 0.5878183841705322, - "learning_rate": 1.834581376655244e-05, - "loss": 0.1462, + "epoch": 0.49621808213614904, + "grad_norm": 1.1978650093078613, + "learning_rate": 1.6691879452425675e-05, + "loss": 0.1062, "step": 9775 }, { - "epoch": 0.24825485467698946, - "grad_norm": 0.6280167102813721, - "learning_rate": 1.8344967635486737e-05, - "loss": 0.1335, + "epoch": 0.49647190212701153, + "grad_norm": 0.5021398663520813, + "learning_rate": 1.6690187319153256e-05, + "loss": 0.1198, "step": 9780 }, { - "epoch": 0.24838177433684477, - "grad_norm": 0.5319559574127197, - "learning_rate": 1.8344121504421036e-05, - "loss": 0.1396, + "epoch": 0.496725722117874, + "grad_norm": 0.8845065236091614, + "learning_rate": 1.6688495185880842e-05, + "loss": 0.1021, "step": 9785 }, { - "epoch": 0.2485086939967001, - "grad_norm": 0.9989762902259827, - "learning_rate": 1.8343275373355334e-05, - "loss": 0.1364, + "epoch": 0.49697954210873646, + "grad_norm": 0.5153151750564575, + "learning_rate": 1.6686803052608426e-05, + "loss": 0.1053, "step": 9790 }, { - "epoch": 0.2486356136565554, - "grad_norm": 0.5605775117874146, - "learning_rate": 1.8342429242289633e-05, - "loss": 0.1705, + "epoch": 0.49723336209959895, + "grad_norm": 0.5133734941482544, + "learning_rate": 1.6685110919336006e-05, + "loss": 0.0964, "step": 9795 }, { - "epoch": 0.2487625333164107, - "grad_norm": 0.6116089820861816, - "learning_rate": 1.834158311122393e-05, - "loss": 0.1225, + "epoch": 0.49748718209046144, + "grad_norm": 0.4482670724391937, + "learning_rate": 1.6683418786063593e-05, + "loss": 0.1081, "step": 9800 }, { - "epoch": 0.24888945297626602, - "grad_norm": 0.7274653911590576, - "learning_rate": 1.8340736980158226e-05, - "loss": 0.115, + "epoch": 0.49774100208132394, + "grad_norm": 0.4168063998222351, + "learning_rate": 1.6681726652791173e-05, + "loss": 0.1145, "step": 9805 }, { - "epoch": 0.24901637263612134, - "grad_norm": 0.7828027009963989, - "learning_rate": 1.8339890849092524e-05, - "loss": 0.1439, + "epoch": 0.4979948220721864, + "grad_norm": 0.6683678030967712, + "learning_rate": 1.668003451951876e-05, + "loss": 0.0899, "step": 9810 }, { - "epoch": 0.24914329229597665, - "grad_norm": 0.6175337433815002, - "learning_rate": 1.8339044718026823e-05, - "loss": 0.1452, + "epoch": 0.49824864206304886, + "grad_norm": 0.46197474002838135, + "learning_rate": 1.6678342386246344e-05, + "loss": 0.1042, "step": 9815 }, { - "epoch": 0.24927021195583196, - "grad_norm": 0.9003787636756897, - "learning_rate": 1.833819858696112e-05, - "loss": 0.1194, + "epoch": 0.49850246205391135, + "grad_norm": 0.3457307517528534, + "learning_rate": 1.6676650252973924e-05, + "loss": 0.1019, "step": 9820 }, { - "epoch": 0.24939713161568727, - "grad_norm": 0.9594404101371765, - "learning_rate": 1.833735245589542e-05, - "loss": 0.119, + "epoch": 0.49875628204477385, + "grad_norm": 0.41356801986694336, + "learning_rate": 1.667495811970151e-05, + "loss": 0.1043, "step": 9825 }, { - "epoch": 0.24952405127554259, - "grad_norm": 0.8379500508308411, - "learning_rate": 1.8336506324829718e-05, - "loss": 0.1471, + "epoch": 0.49901010203563634, + "grad_norm": 0.43680256605148315, + "learning_rate": 1.667326598642909e-05, + "loss": 0.1083, "step": 9830 }, { - "epoch": 0.2496509709353979, - "grad_norm": 1.0844719409942627, - "learning_rate": 1.8335660193764016e-05, - "loss": 0.1366, + "epoch": 0.49926392202649883, + "grad_norm": 0.48716604709625244, + "learning_rate": 1.6671573853156675e-05, + "loss": 0.1028, "step": 9835 }, { - "epoch": 0.2497778905952532, - "grad_norm": 0.6275160908699036, - "learning_rate": 1.8334814062698315e-05, - "loss": 0.1699, + "epoch": 0.49951774201736127, + "grad_norm": 0.5266230702400208, + "learning_rate": 1.666988171988426e-05, + "loss": 0.1009, "step": 9840 }, { - "epoch": 0.24990481025510852, - "grad_norm": 0.6083460450172424, - "learning_rate": 1.8333967931632613e-05, - "loss": 0.1335, + "epoch": 0.49977156200822376, + "grad_norm": 0.4376629889011383, + "learning_rate": 1.666818958661184e-05, + "loss": 0.1078, "step": 9845 }, { - "epoch": 0.2500317299149638, - "grad_norm": 0.747376561164856, - "learning_rate": 1.8333121800566908e-05, - "loss": 0.1429, + "epoch": 0.5000253819990862, + "grad_norm": 0.48102936148643494, + "learning_rate": 1.666649745333943e-05, + "loss": 0.092, "step": 9850 }, { - "epoch": 0.25015864957481915, - "grad_norm": 1.0108275413513184, - "learning_rate": 1.8332275669501207e-05, - "loss": 0.1458, + "epoch": 0.5002792019899487, + "grad_norm": 0.4479629099369049, + "learning_rate": 1.666480532006701e-05, + "loss": 0.0984, "step": 9855 }, { - "epoch": 0.25028556923467443, - "grad_norm": 0.49117717146873474, - "learning_rate": 1.8331429538435505e-05, - "loss": 0.1615, + "epoch": 0.5005330219808112, + "grad_norm": 0.429806649684906, + "learning_rate": 1.6663113186794592e-05, + "loss": 0.09, "step": 9860 }, { - "epoch": 0.25041248889452977, - "grad_norm": 0.8129422664642334, - "learning_rate": 1.8330583407369804e-05, - "loss": 0.1364, + "epoch": 0.5007868419716737, + "grad_norm": 0.4415090084075928, + "learning_rate": 1.666142105352218e-05, + "loss": 0.0989, "step": 9865 }, { - "epoch": 0.25053940855438506, - "grad_norm": 0.5040650963783264, - "learning_rate": 1.8329737276304102e-05, - "loss": 0.1068, + "epoch": 0.5010406619625362, + "grad_norm": 0.4832078516483307, + "learning_rate": 1.665972892024976e-05, + "loss": 0.1061, "step": 9870 }, { - "epoch": 0.2506663282142404, - "grad_norm": 0.7198666334152222, - "learning_rate": 1.83288911452384e-05, - "loss": 0.1539, + "epoch": 0.5012944819533987, + "grad_norm": 0.4944320619106293, + "learning_rate": 1.6658036786977343e-05, + "loss": 0.1129, "step": 9875 }, { - "epoch": 0.2507932478740957, - "grad_norm": 0.7848442792892456, - "learning_rate": 1.83280450141727e-05, - "loss": 0.1447, + "epoch": 0.5015483019442611, + "grad_norm": 0.4951033592224121, + "learning_rate": 1.6656344653704926e-05, + "loss": 0.1057, "step": 9880 }, { - "epoch": 0.250920167533951, - "grad_norm": 0.8687480092048645, - "learning_rate": 1.8327198883106997e-05, - "loss": 0.1395, + "epoch": 0.5018021219351236, + "grad_norm": 0.46183741092681885, + "learning_rate": 1.665465252043251e-05, + "loss": 0.103, "step": 9885 }, { - "epoch": 0.2510470871938063, - "grad_norm": 0.8864397406578064, - "learning_rate": 1.8326352752041292e-05, - "loss": 0.1358, + "epoch": 0.5020559419259861, + "grad_norm": 0.4078376293182373, + "learning_rate": 1.6652960387160093e-05, + "loss": 0.1023, "step": 9890 }, { - "epoch": 0.25117400685366165, - "grad_norm": 2.0247440338134766, - "learning_rate": 1.832550662097559e-05, - "loss": 0.1307, + "epoch": 0.5023097619168486, + "grad_norm": 0.4910680949687958, + "learning_rate": 1.6651268253887677e-05, + "loss": 0.1045, "step": 9895 }, { - "epoch": 0.25130092651351693, - "grad_norm": 1.2370307445526123, - "learning_rate": 1.832466048990989e-05, - "loss": 0.1737, + "epoch": 0.502563581907711, + "grad_norm": 0.5417612791061401, + "learning_rate": 1.664957612061526e-05, + "loss": 0.098, "step": 9900 }, { - "epoch": 0.25142784617337227, - "grad_norm": 1.3794323205947876, - "learning_rate": 1.8323814358844187e-05, - "loss": 0.1495, + "epoch": 0.5028174018985735, + "grad_norm": 0.44090452790260315, + "learning_rate": 1.6647883987342844e-05, + "loss": 0.1098, "step": 9905 }, { - "epoch": 0.25155476583322756, - "grad_norm": 0.9459143280982971, - "learning_rate": 1.8322968227778482e-05, - "loss": 0.1291, + "epoch": 0.503071221889436, + "grad_norm": 0.707319438457489, + "learning_rate": 1.6646191854070428e-05, + "loss": 0.0929, "step": 9910 }, { - "epoch": 0.2516816854930829, - "grad_norm": 0.6629770994186401, - "learning_rate": 1.832212209671278e-05, - "loss": 0.1395, + "epoch": 0.5033250418802985, + "grad_norm": 0.4322894513607025, + "learning_rate": 1.664449972079801e-05, + "loss": 0.0897, "step": 9915 }, { - "epoch": 0.2518086051529382, - "grad_norm": 0.5657610297203064, - "learning_rate": 1.832127596564708e-05, - "loss": 0.1614, + "epoch": 0.503578861871161, + "grad_norm": 0.44595420360565186, + "learning_rate": 1.6642807587525595e-05, + "loss": 0.1098, "step": 9920 }, { - "epoch": 0.2519355248127935, - "grad_norm": 0.6587680578231812, - "learning_rate": 1.8320429834581378e-05, - "loss": 0.1577, + "epoch": 0.5038326818620235, + "grad_norm": 0.47478431463241577, + "learning_rate": 1.664111545425318e-05, + "loss": 0.1152, "step": 9925 }, { - "epoch": 0.2520624444726488, - "grad_norm": 0.9601367712020874, - "learning_rate": 1.8319583703515676e-05, - "loss": 0.1387, + "epoch": 0.504086501852886, + "grad_norm": 0.4155206084251404, + "learning_rate": 1.6639423320980762e-05, + "loss": 0.101, "step": 9930 }, { - "epoch": 0.25218936413250415, - "grad_norm": 0.9832651019096375, - "learning_rate": 1.8318737572449974e-05, - "loss": 0.1676, + "epoch": 0.5043403218437484, + "grad_norm": 0.6963584423065186, + "learning_rate": 1.6637731187708345e-05, + "loss": 0.0959, "step": 9935 }, { - "epoch": 0.25231628379235943, - "grad_norm": 0.5629587173461914, - "learning_rate": 1.8317891441384273e-05, - "loss": 0.1365, + "epoch": 0.5045941418346109, + "grad_norm": 0.4272857904434204, + "learning_rate": 1.663603905443593e-05, + "loss": 0.0983, "step": 9940 }, { - "epoch": 0.25244320345221477, - "grad_norm": 0.5733922719955444, - "learning_rate": 1.831704531031857e-05, - "loss": 0.1613, + "epoch": 0.5048479618254734, + "grad_norm": 0.4493730962276459, + "learning_rate": 1.6634346921163512e-05, + "loss": 0.1028, "step": 9945 }, { - "epoch": 0.25257012311207006, - "grad_norm": 0.7614062428474426, - "learning_rate": 1.8316199179252866e-05, - "loss": 0.1301, + "epoch": 0.5051017818163358, + "grad_norm": 0.5067510008811951, + "learning_rate": 1.6632654787891096e-05, + "loss": 0.0942, "step": 9950 }, { - "epoch": 0.2526970427719254, - "grad_norm": 0.5763667225837708, - "learning_rate": 1.8315353048187165e-05, - "loss": 0.1617, + "epoch": 0.5053556018071983, + "grad_norm": 0.5519894361495972, + "learning_rate": 1.663096265461868e-05, + "loss": 0.0949, "step": 9955 }, { - "epoch": 0.2528239624317807, - "grad_norm": 1.0582488775253296, - "learning_rate": 1.8314506917121463e-05, - "loss": 0.1344, + "epoch": 0.5056094217980608, + "grad_norm": 0.4868788719177246, + "learning_rate": 1.6629270521346263e-05, + "loss": 0.1023, "step": 9960 }, { - "epoch": 0.252950882091636, - "grad_norm": 0.6799361705780029, - "learning_rate": 1.831366078605576e-05, - "loss": 0.1652, + "epoch": 0.5058632417889233, + "grad_norm": 0.40864047408103943, + "learning_rate": 1.6627578388073847e-05, + "loss": 0.1036, "step": 9965 }, { - "epoch": 0.2530778017514913, - "grad_norm": 0.6597838401794434, - "learning_rate": 1.831281465499006e-05, - "loss": 0.1289, + "epoch": 0.5061170617797858, + "grad_norm": 0.33604058623313904, + "learning_rate": 1.662588625480143e-05, + "loss": 0.0921, "step": 9970 }, { - "epoch": 0.25320472141134664, - "grad_norm": 1.0884172916412354, - "learning_rate": 1.8311968523924358e-05, - "loss": 0.1529, + "epoch": 0.5063708817706483, + "grad_norm": 0.39679473638534546, + "learning_rate": 1.6624194121529014e-05, + "loss": 0.1007, "step": 9975 }, { - "epoch": 0.25333164107120193, - "grad_norm": 0.7332661747932434, - "learning_rate": 1.8311122392858657e-05, - "loss": 0.1253, + "epoch": 0.5066247017615108, + "grad_norm": 0.4948192238807678, + "learning_rate": 1.6622501988256597e-05, + "loss": 0.104, "step": 9980 }, { - "epoch": 0.2534585607310572, - "grad_norm": 0.5978658199310303, - "learning_rate": 1.8310276261792955e-05, - "loss": 0.1543, + "epoch": 0.5068785217523732, + "grad_norm": 0.4956167936325073, + "learning_rate": 1.6620809854984177e-05, + "loss": 0.1078, "step": 9985 }, { - "epoch": 0.25358548039091255, - "grad_norm": 0.5437586307525635, - "learning_rate": 1.830943013072725e-05, - "loss": 0.1846, + "epoch": 0.5071323417432357, + "grad_norm": 1.0531851053237915, + "learning_rate": 1.6619117721711764e-05, + "loss": 0.102, "step": 9990 }, { - "epoch": 0.25371240005076784, - "grad_norm": 0.5461845397949219, - "learning_rate": 1.830858399966155e-05, - "loss": 0.163, + "epoch": 0.5073861617340982, + "grad_norm": 0.3761901557445526, + "learning_rate": 1.6617425588439348e-05, + "loss": 0.108, "step": 9995 }, { - "epoch": 0.2538393197106232, - "grad_norm": 0.7532410621643066, - "learning_rate": 1.8307737868595847e-05, - "loss": 0.1676, + "epoch": 0.5076399817249606, + "grad_norm": 0.42027950286865234, + "learning_rate": 1.661573345516693e-05, + "loss": 0.0946, "step": 10000 }, { - "epoch": 0.25396623937047846, - "grad_norm": 0.5898687839508057, - "learning_rate": 1.8306891737530145e-05, - "loss": 0.1613, + "epoch": 0.5078938017158231, + "grad_norm": 0.44332537055015564, + "learning_rate": 1.6614041321894515e-05, + "loss": 0.1023, "step": 10005 }, { - "epoch": 0.2540931590303338, - "grad_norm": 0.612015426158905, - "learning_rate": 1.8306045606464444e-05, - "loss": 0.1412, + "epoch": 0.5081476217066856, + "grad_norm": 0.35272926092147827, + "learning_rate": 1.6612349188622095e-05, + "loss": 0.0892, "step": 10010 }, { - "epoch": 0.2542200786901891, - "grad_norm": 0.793147623538971, - "learning_rate": 1.8305199475398742e-05, - "loss": 0.1692, + "epoch": 0.5084014416975481, + "grad_norm": 0.4161962866783142, + "learning_rate": 1.6610657055349682e-05, + "loss": 0.0938, "step": 10015 }, { - "epoch": 0.25434699835004443, - "grad_norm": 0.8468564748764038, - "learning_rate": 1.830435334433304e-05, - "loss": 0.1493, + "epoch": 0.5086552616884106, + "grad_norm": 1.0567114353179932, + "learning_rate": 1.6608964922077266e-05, + "loss": 0.1041, "step": 10020 }, { - "epoch": 0.2544739180098997, - "grad_norm": 0.49984389543533325, - "learning_rate": 1.830350721326734e-05, - "loss": 0.1303, + "epoch": 0.5089090816792731, + "grad_norm": 0.40777215361595154, + "learning_rate": 1.6607272788804846e-05, + "loss": 0.103, "step": 10025 }, { - "epoch": 0.25460083766975505, - "grad_norm": 0.7265325784683228, - "learning_rate": 1.8302661082201634e-05, - "loss": 0.1602, + "epoch": 0.5091629016701356, + "grad_norm": 0.6142024993896484, + "learning_rate": 1.6605580655532433e-05, + "loss": 0.1047, "step": 10030 }, { - "epoch": 0.25472775732961034, - "grad_norm": 0.6857850551605225, - "learning_rate": 1.8301814951135932e-05, - "loss": 0.1221, + "epoch": 0.509416721660998, + "grad_norm": 0.6380098462104797, + "learning_rate": 1.6603888522260013e-05, + "loss": 0.1104, "step": 10035 }, { - "epoch": 0.2548546769894657, - "grad_norm": 0.728740394115448, - "learning_rate": 1.830096882007023e-05, - "loss": 0.1673, + "epoch": 0.5096705416518605, + "grad_norm": 0.5984404683113098, + "learning_rate": 1.6602196388987596e-05, + "loss": 0.097, "step": 10040 }, { - "epoch": 0.25498159664932096, - "grad_norm": 0.52281653881073, - "learning_rate": 1.830012268900453e-05, - "loss": 0.1319, + "epoch": 0.509924361642723, + "grad_norm": 0.4230678081512451, + "learning_rate": 1.6600504255715183e-05, + "loss": 0.1015, "step": 10045 }, { - "epoch": 0.2551085163091763, - "grad_norm": 0.8147508502006531, - "learning_rate": 1.8299276557938824e-05, - "loss": 0.1345, + "epoch": 0.5101781816335854, + "grad_norm": 0.38381847739219666, + "learning_rate": 1.6598812122442764e-05, + "loss": 0.0914, "step": 10050 }, { - "epoch": 0.2552354359690316, - "grad_norm": 0.7111371755599976, - "learning_rate": 1.8298430426873122e-05, - "loss": 0.1541, + "epoch": 0.5104320016244479, + "grad_norm": 0.44329530000686646, + "learning_rate": 1.659711998917035e-05, + "loss": 0.1188, "step": 10055 }, { - "epoch": 0.25536235562888693, - "grad_norm": 0.7603732943534851, - "learning_rate": 1.829758429580742e-05, - "loss": 0.1478, + "epoch": 0.5106858216153104, + "grad_norm": 0.351794570684433, + "learning_rate": 1.659542785589793e-05, + "loss": 0.1054, "step": 10060 }, { - "epoch": 0.2554892752887422, - "grad_norm": 0.6881338953971863, - "learning_rate": 1.829673816474172e-05, - "loss": 0.1669, + "epoch": 0.5109396416061729, + "grad_norm": 0.596990704536438, + "learning_rate": 1.6593735722625514e-05, + "loss": 0.1058, "step": 10065 }, { - "epoch": 0.25561619494859755, - "grad_norm": 0.543867290019989, - "learning_rate": 1.8295892033676018e-05, - "loss": 0.1298, + "epoch": 0.5111934615970354, + "grad_norm": 0.53257155418396, + "learning_rate": 1.65920435893531e-05, + "loss": 0.0999, "step": 10070 }, { - "epoch": 0.25574311460845284, - "grad_norm": 0.6130603551864624, - "learning_rate": 1.8295045902610316e-05, - "loss": 0.1491, + "epoch": 0.5114472815878979, + "grad_norm": 0.768983781337738, + "learning_rate": 1.659035145608068e-05, + "loss": 0.0938, "step": 10075 }, { - "epoch": 0.2558700342683082, - "grad_norm": 0.6841226816177368, - "learning_rate": 1.8294199771544614e-05, - "loss": 0.1306, + "epoch": 0.5117011015787604, + "grad_norm": 0.6798129081726074, + "learning_rate": 1.6588659322808265e-05, + "loss": 0.0976, "step": 10080 }, { - "epoch": 0.25599695392816346, - "grad_norm": 0.500636637210846, - "learning_rate": 1.8293353640478913e-05, - "loss": 0.137, + "epoch": 0.5119549215696229, + "grad_norm": 0.4623749256134033, + "learning_rate": 1.658696718953585e-05, + "loss": 0.1173, "step": 10085 }, { - "epoch": 0.2561238735880188, - "grad_norm": 0.7968870997428894, - "learning_rate": 1.8292507509413208e-05, - "loss": 0.1156, + "epoch": 0.5122087415604853, + "grad_norm": 0.3781088590621948, + "learning_rate": 1.6585275056263432e-05, + "loss": 0.0992, "step": 10090 }, { - "epoch": 0.2562507932478741, - "grad_norm": 0.7917343974113464, - "learning_rate": 1.8291661378347506e-05, - "loss": 0.1185, + "epoch": 0.5124625615513478, + "grad_norm": 0.5837762951850891, + "learning_rate": 1.6583582922991015e-05, + "loss": 0.0996, "step": 10095 }, { - "epoch": 0.2563777129077294, - "grad_norm": 1.3896543979644775, - "learning_rate": 1.8290815247281805e-05, - "loss": 0.1394, + "epoch": 0.5127163815422102, + "grad_norm": 0.5101218819618225, + "learning_rate": 1.65818907897186e-05, + "loss": 0.0951, "step": 10100 }, { - "epoch": 0.2565046325675847, - "grad_norm": 1.0082969665527344, - "learning_rate": 1.8289969116216103e-05, - "loss": 0.1378, + "epoch": 0.5129702015330727, + "grad_norm": 0.5419501066207886, + "learning_rate": 1.6580198656446183e-05, + "loss": 0.1077, "step": 10105 }, { - "epoch": 0.25663155222744005, - "grad_norm": 1.8145171403884888, - "learning_rate": 1.82891229851504e-05, - "loss": 0.1526, + "epoch": 0.5132240215239352, + "grad_norm": 0.28744372725486755, + "learning_rate": 1.6578506523173766e-05, + "loss": 0.0928, "step": 10110 }, { - "epoch": 0.25675847188729534, - "grad_norm": 1.498610496520996, - "learning_rate": 1.82882768540847e-05, - "loss": 0.1215, + "epoch": 0.5134778415147977, + "grad_norm": 0.43558162450790405, + "learning_rate": 1.657681438990135e-05, + "loss": 0.0963, "step": 10115 }, { - "epoch": 0.2568853915471507, - "grad_norm": 0.9029565453529358, - "learning_rate": 1.8287430723018998e-05, - "loss": 0.1515, + "epoch": 0.5137316615056602, + "grad_norm": 0.35775113105773926, + "learning_rate": 1.6575122256628933e-05, + "loss": 0.0956, "step": 10120 }, { - "epoch": 0.25701231120700596, - "grad_norm": 0.6305819153785706, - "learning_rate": 1.8286584591953297e-05, - "loss": 0.1701, + "epoch": 0.5139854814965227, + "grad_norm": 0.7168741822242737, + "learning_rate": 1.6573430123356517e-05, + "loss": 0.0902, "step": 10125 }, { - "epoch": 0.2571392308668613, - "grad_norm": 0.625966489315033, - "learning_rate": 1.8285738460887592e-05, - "loss": 0.1412, + "epoch": 0.5142393014873852, + "grad_norm": 0.3916659355163574, + "learning_rate": 1.65717379900841e-05, + "loss": 0.105, "step": 10130 }, { - "epoch": 0.2572661505267166, - "grad_norm": 5.035688877105713, - "learning_rate": 1.828489232982189e-05, - "loss": 0.1586, + "epoch": 0.5144931214782477, + "grad_norm": 0.418158620595932, + "learning_rate": 1.6570045856811684e-05, + "loss": 0.0916, "step": 10135 }, { - "epoch": 0.2573930701865719, - "grad_norm": 0.9185628890991211, - "learning_rate": 1.828404619875619e-05, - "loss": 0.1685, + "epoch": 0.5147469414691102, + "grad_norm": 0.678088903427124, + "learning_rate": 1.6568353723539267e-05, + "loss": 0.1082, "step": 10140 }, { - "epoch": 0.2575199898464272, - "grad_norm": 0.5896447896957397, - "learning_rate": 1.8283200067690487e-05, - "loss": 0.1125, + "epoch": 0.5150007614599725, + "grad_norm": 0.42723122239112854, + "learning_rate": 1.656666159026685e-05, + "loss": 0.1036, "step": 10145 }, { - "epoch": 0.2576469095062825, - "grad_norm": 0.41062963008880615, - "learning_rate": 1.8282353936624785e-05, - "loss": 0.1399, + "epoch": 0.515254581450835, + "grad_norm": 0.6436286568641663, + "learning_rate": 1.6564969456994434e-05, + "loss": 0.109, "step": 10150 }, { - "epoch": 0.25777382916613784, - "grad_norm": 0.5125820636749268, - "learning_rate": 1.8281507805559084e-05, - "loss": 0.1215, + "epoch": 0.5155084014416975, + "grad_norm": 0.7051729559898376, + "learning_rate": 1.6563277323722018e-05, + "loss": 0.1164, "step": 10155 }, { - "epoch": 0.2579007488259931, - "grad_norm": 1.0335094928741455, - "learning_rate": 1.8280661674493382e-05, - "loss": 0.1337, + "epoch": 0.51576222143256, + "grad_norm": 0.5349159836769104, + "learning_rate": 1.65615851904496e-05, + "loss": 0.1171, "step": 10160 }, { - "epoch": 0.25802766848584846, - "grad_norm": 0.6157042384147644, - "learning_rate": 1.827981554342768e-05, - "loss": 0.1242, + "epoch": 0.5160160414234225, + "grad_norm": 0.6342788338661194, + "learning_rate": 1.6559893057177185e-05, + "loss": 0.114, "step": 10165 }, { - "epoch": 0.25815458814570375, - "grad_norm": 0.7246564030647278, - "learning_rate": 1.8278969412361976e-05, - "loss": 0.1021, + "epoch": 0.516269861414285, + "grad_norm": 0.4344607889652252, + "learning_rate": 1.655820092390477e-05, + "loss": 0.0888, "step": 10170 }, { - "epoch": 0.2582815078055591, - "grad_norm": 0.7480441927909851, - "learning_rate": 1.8278123281296274e-05, - "loss": 0.1601, + "epoch": 0.5165236814051475, + "grad_norm": 0.6470787525177002, + "learning_rate": 1.6556508790632352e-05, + "loss": 0.0952, "step": 10175 }, { - "epoch": 0.25840842746541437, - "grad_norm": 1.0739176273345947, - "learning_rate": 1.8277277150230572e-05, - "loss": 0.1478, + "epoch": 0.51677750139601, + "grad_norm": 0.6785343289375305, + "learning_rate": 1.6554816657359936e-05, + "loss": 0.111, "step": 10180 }, { - "epoch": 0.2585353471252697, - "grad_norm": 0.9894225001335144, - "learning_rate": 1.827643101916487e-05, - "loss": 0.1637, + "epoch": 0.5170313213868725, + "grad_norm": 0.41532400250434875, + "learning_rate": 1.655312452408752e-05, + "loss": 0.0911, "step": 10185 }, { - "epoch": 0.258662266785125, - "grad_norm": 0.52119380235672, - "learning_rate": 1.8275584888099166e-05, - "loss": 0.1384, + "epoch": 0.517285141377735, + "grad_norm": 0.4153634309768677, + "learning_rate": 1.65514323908151e-05, + "loss": 0.0986, "step": 10190 }, { - "epoch": 0.25878918644498033, - "grad_norm": 0.6088096499443054, - "learning_rate": 1.8274738757033464e-05, - "loss": 0.1399, + "epoch": 0.5175389613685973, + "grad_norm": 0.49320197105407715, + "learning_rate": 1.6549740257542686e-05, + "loss": 0.1052, "step": 10195 }, { - "epoch": 0.2589161061048356, - "grad_norm": 1.0353361368179321, - "learning_rate": 1.8273892625967763e-05, - "loss": 0.1715, + "epoch": 0.5177927813594598, + "grad_norm": 0.44718942046165466, + "learning_rate": 1.654804812427027e-05, + "loss": 0.1064, "step": 10200 }, { - "epoch": 0.25904302576469096, - "grad_norm": 0.8402993083000183, - "learning_rate": 1.827304649490206e-05, - "loss": 0.1427, + "epoch": 0.5180466013503223, + "grad_norm": 0.49730628728866577, + "learning_rate": 1.6546355990997853e-05, + "loss": 0.0981, "step": 10205 }, { - "epoch": 0.25916994542454624, - "grad_norm": 1.0035673379898071, - "learning_rate": 1.827220036383636e-05, - "loss": 0.1389, + "epoch": 0.5183004213411848, + "grad_norm": 0.7655092477798462, + "learning_rate": 1.6544663857725437e-05, + "loss": 0.1057, "step": 10210 }, { - "epoch": 0.2592968650844016, - "grad_norm": 0.7869465351104736, - "learning_rate": 1.8271354232770658e-05, - "loss": 0.1268, + "epoch": 0.5185542413320473, + "grad_norm": 0.3895924389362335, + "learning_rate": 1.6542971724453017e-05, + "loss": 0.0978, "step": 10215 }, { - "epoch": 0.25942378474425687, - "grad_norm": 0.8216532468795776, - "learning_rate": 1.8270508101704956e-05, - "loss": 0.125, + "epoch": 0.5188080613229098, + "grad_norm": 0.5672686696052551, + "learning_rate": 1.6541279591180604e-05, + "loss": 0.1043, "step": 10220 }, { - "epoch": 0.2595507044041122, - "grad_norm": 0.841312050819397, - "learning_rate": 1.8269661970639255e-05, - "loss": 0.1381, + "epoch": 0.5190618813137723, + "grad_norm": 0.5215070843696594, + "learning_rate": 1.6539587457908188e-05, + "loss": 0.1016, "step": 10225 }, { - "epoch": 0.2596776240639675, - "grad_norm": 1.3083573579788208, - "learning_rate": 1.826881583957355e-05, - "loss": 0.1176, + "epoch": 0.5193157013046348, + "grad_norm": 0.5295329689979553, + "learning_rate": 1.6537895324635768e-05, + "loss": 0.0954, "step": 10230 }, { - "epoch": 0.25980454372382283, - "grad_norm": 0.7557424902915955, - "learning_rate": 1.8267969708507848e-05, - "loss": 0.1822, + "epoch": 0.5195695212954973, + "grad_norm": 0.4239153563976288, + "learning_rate": 1.6536203191363355e-05, + "loss": 0.1025, "step": 10235 }, { - "epoch": 0.2599314633836781, - "grad_norm": 0.9737616777420044, - "learning_rate": 1.8267123577442146e-05, - "loss": 0.1183, + "epoch": 0.5198233412863598, + "grad_norm": 0.597144365310669, + "learning_rate": 1.6534511058090935e-05, + "loss": 0.0936, "step": 10240 }, { - "epoch": 0.26005838304353346, - "grad_norm": 1.03396737575531, - "learning_rate": 1.8266277446376445e-05, - "loss": 0.1617, + "epoch": 0.5200771612772221, + "grad_norm": 0.4462467133998871, + "learning_rate": 1.6532818924818522e-05, + "loss": 0.1048, "step": 10245 }, { - "epoch": 0.26018530270338874, - "grad_norm": 1.3512555360794067, - "learning_rate": 1.8265431315310743e-05, - "loss": 0.1605, + "epoch": 0.5203309812680846, + "grad_norm": 0.7833221554756165, + "learning_rate": 1.6531126791546105e-05, + "loss": 0.1123, "step": 10250 }, { - "epoch": 0.2603122223632441, - "grad_norm": 0.580971360206604, - "learning_rate": 1.826458518424504e-05, - "loss": 0.1484, + "epoch": 0.5205848012589471, + "grad_norm": 0.7217886447906494, + "learning_rate": 1.6529434658273685e-05, + "loss": 0.1007, "step": 10255 }, { - "epoch": 0.26043914202309937, - "grad_norm": 0.6328538060188293, - "learning_rate": 1.826373905317934e-05, - "loss": 0.1198, + "epoch": 0.5208386212498096, + "grad_norm": 0.6208773255348206, + "learning_rate": 1.6527742525001272e-05, + "loss": 0.088, "step": 10260 }, { - "epoch": 0.2605660616829547, - "grad_norm": 0.4139742851257324, - "learning_rate": 1.826289292211364e-05, - "loss": 0.1382, + "epoch": 0.5210924412406721, + "grad_norm": 0.5365100502967834, + "learning_rate": 1.6526050391728853e-05, + "loss": 0.1052, "step": 10265 }, { - "epoch": 0.26069298134281, - "grad_norm": 0.849651038646698, - "learning_rate": 1.8262046791047933e-05, - "loss": 0.1096, + "epoch": 0.5213462612315346, + "grad_norm": 0.4037196636199951, + "learning_rate": 1.6524358258456436e-05, + "loss": 0.0958, "step": 10270 }, { - "epoch": 0.26081990100266533, - "grad_norm": 0.656310498714447, - "learning_rate": 1.8261200659982232e-05, - "loss": 0.1489, + "epoch": 0.5216000812223971, + "grad_norm": 0.367753267288208, + "learning_rate": 1.6522666125184023e-05, + "loss": 0.0873, "step": 10275 }, { - "epoch": 0.2609468206625206, - "grad_norm": 0.6063727140426636, - "learning_rate": 1.826035452891653e-05, - "loss": 0.1435, + "epoch": 0.5218539012132596, + "grad_norm": 0.5729825496673584, + "learning_rate": 1.6520973991911603e-05, + "loss": 0.1014, "step": 10280 }, { - "epoch": 0.26107374032237596, - "grad_norm": 1.0413060188293457, - "learning_rate": 1.825950839785083e-05, - "loss": 0.1582, + "epoch": 0.5221077212041221, + "grad_norm": 0.3999881446361542, + "learning_rate": 1.6519281858639187e-05, + "loss": 0.0905, "step": 10285 }, { - "epoch": 0.26120065998223124, - "grad_norm": 0.5931706428527832, - "learning_rate": 1.8258662266785127e-05, - "loss": 0.1314, + "epoch": 0.5223615411949846, + "grad_norm": 0.5186746716499329, + "learning_rate": 1.651758972536677e-05, + "loss": 0.0934, "step": 10290 }, { - "epoch": 0.2613275796420866, - "grad_norm": 1.0698857307434082, - "learning_rate": 1.8257816135719425e-05, - "loss": 0.1498, + "epoch": 0.522615361185847, + "grad_norm": 0.4636486768722534, + "learning_rate": 1.6515897592094354e-05, + "loss": 0.0943, "step": 10295 }, { - "epoch": 0.26145449930194187, - "grad_norm": 0.9084089994430542, - "learning_rate": 1.8256970004653724e-05, - "loss": 0.1423, + "epoch": 0.5228691811767094, + "grad_norm": 0.5422764420509338, + "learning_rate": 1.651420545882194e-05, + "loss": 0.0939, "step": 10300 }, { - "epoch": 0.2615814189617972, - "grad_norm": 0.6530733704566956, - "learning_rate": 1.8256123873588022e-05, - "loss": 0.1487, + "epoch": 0.5231230011675719, + "grad_norm": 0.4960278570652008, + "learning_rate": 1.651251332554952e-05, + "loss": 0.089, "step": 10305 }, { - "epoch": 0.2617083386216525, - "grad_norm": 0.7484983801841736, - "learning_rate": 1.8255277742522317e-05, - "loss": 0.1391, + "epoch": 0.5233768211584344, + "grad_norm": 0.47158360481262207, + "learning_rate": 1.6510821192277104e-05, + "loss": 0.1025, "step": 10310 }, { - "epoch": 0.26183525828150783, - "grad_norm": 0.5956051349639893, - "learning_rate": 1.8254431611456616e-05, - "loss": 0.1445, + "epoch": 0.5236306411492969, + "grad_norm": 0.5370834469795227, + "learning_rate": 1.6509129059004688e-05, + "loss": 0.0955, "step": 10315 }, { - "epoch": 0.2619621779413631, - "grad_norm": 0.8198218941688538, - "learning_rate": 1.8253585480390914e-05, - "loss": 0.118, + "epoch": 0.5238844611401594, + "grad_norm": 1.4522969722747803, + "learning_rate": 1.650743692573227e-05, + "loss": 0.1046, "step": 10320 }, { - "epoch": 0.2620890976012184, - "grad_norm": 0.602367639541626, - "learning_rate": 1.8252739349325212e-05, - "loss": 0.1602, + "epoch": 0.5241382811310219, + "grad_norm": 0.5088288187980652, + "learning_rate": 1.6505744792459855e-05, + "loss": 0.1002, "step": 10325 }, { - "epoch": 0.26221601726107374, - "grad_norm": 0.7677584290504456, - "learning_rate": 1.825189321825951e-05, - "loss": 0.1207, + "epoch": 0.5243921011218844, + "grad_norm": 0.6046572327613831, + "learning_rate": 1.650405265918744e-05, + "loss": 0.0896, "step": 10330 }, { - "epoch": 0.262342936920929, - "grad_norm": 0.7836244106292725, - "learning_rate": 1.8251047087193806e-05, - "loss": 0.1171, + "epoch": 0.5246459211127469, + "grad_norm": 0.4464103877544403, + "learning_rate": 1.6502360525915022e-05, + "loss": 0.1115, "step": 10335 }, { - "epoch": 0.26246985658078437, - "grad_norm": 0.9748008251190186, - "learning_rate": 1.8250200956128104e-05, - "loss": 0.1386, + "epoch": 0.5248997411036094, + "grad_norm": 0.42552125453948975, + "learning_rate": 1.6500668392642606e-05, + "loss": 0.0953, "step": 10340 }, { - "epoch": 0.26259677624063965, - "grad_norm": 0.657301127910614, - "learning_rate": 1.8249354825062403e-05, - "loss": 0.129, + "epoch": 0.5251535610944718, + "grad_norm": 0.3787493407726288, + "learning_rate": 1.649897625937019e-05, + "loss": 0.0923, "step": 10345 }, { - "epoch": 0.262723695900495, - "grad_norm": 0.5656751394271851, - "learning_rate": 1.82485086939967e-05, - "loss": 0.1312, + "epoch": 0.5254073810853342, + "grad_norm": 0.7609786987304688, + "learning_rate": 1.6497284126097773e-05, + "loss": 0.0985, "step": 10350 }, { - "epoch": 0.2628506155603503, - "grad_norm": 0.7234407067298889, - "learning_rate": 1.8247662562931e-05, - "loss": 0.1623, + "epoch": 0.5256612010761967, + "grad_norm": 0.41064974665641785, + "learning_rate": 1.6495591992825356e-05, + "loss": 0.1105, "step": 10355 }, { - "epoch": 0.2629775352202056, - "grad_norm": 0.5996893644332886, - "learning_rate": 1.8246816431865298e-05, - "loss": 0.1429, + "epoch": 0.5259150210670592, + "grad_norm": 0.4209999740123749, + "learning_rate": 1.649389985955294e-05, + "loss": 0.0995, "step": 10360 }, { - "epoch": 0.2631044548800609, - "grad_norm": 0.6663133502006531, - "learning_rate": 1.8245970300799596e-05, - "loss": 0.1131, + "epoch": 0.5261688410579217, + "grad_norm": 0.405991792678833, + "learning_rate": 1.6492207726280523e-05, + "loss": 0.0927, "step": 10365 }, { - "epoch": 0.26323137453991624, - "grad_norm": 1.0403087139129639, - "learning_rate": 1.8245124169733895e-05, - "loss": 0.1646, + "epoch": 0.5264226610487842, + "grad_norm": 0.47690391540527344, + "learning_rate": 1.6490515593008107e-05, + "loss": 0.1119, "step": 10370 }, { - "epoch": 0.2633582941997715, - "grad_norm": 0.6069133877754211, - "learning_rate": 1.824427803866819e-05, - "loss": 0.1462, + "epoch": 0.5266764810396467, + "grad_norm": 0.3825083076953888, + "learning_rate": 1.648882345973569e-05, + "loss": 0.1069, "step": 10375 }, { - "epoch": 0.26348521385962687, - "grad_norm": 0.7852567434310913, - "learning_rate": 1.8243431907602488e-05, - "loss": 0.1497, + "epoch": 0.5269303010305092, + "grad_norm": 0.6007245779037476, + "learning_rate": 1.6487131326463274e-05, + "loss": 0.0949, "step": 10380 }, { - "epoch": 0.26361213351948215, - "grad_norm": 0.6449419260025024, - "learning_rate": 1.8242585776536786e-05, - "loss": 0.1507, + "epoch": 0.5271841210213717, + "grad_norm": 0.41400671005249023, + "learning_rate": 1.6485439193190858e-05, + "loss": 0.0959, "step": 10385 }, { - "epoch": 0.2637390531793375, - "grad_norm": 0.6475593447685242, - "learning_rate": 1.8241739645471085e-05, - "loss": 0.1294, + "epoch": 0.5274379410122341, + "grad_norm": 0.8201514482498169, + "learning_rate": 1.648374705991844e-05, + "loss": 0.0948, "step": 10390 }, { - "epoch": 0.2638659728391928, - "grad_norm": 0.5379044413566589, - "learning_rate": 1.8240893514405383e-05, - "loss": 0.1561, + "epoch": 0.5276917610030966, + "grad_norm": 0.4177849292755127, + "learning_rate": 1.6482054926646025e-05, + "loss": 0.1071, "step": 10395 }, { - "epoch": 0.2639928924990481, - "grad_norm": 0.7231510281562805, - "learning_rate": 1.8240047383339682e-05, - "loss": 0.1186, + "epoch": 0.527945580993959, + "grad_norm": 0.45180994272232056, + "learning_rate": 1.6480362793373608e-05, + "loss": 0.1038, "step": 10400 }, { - "epoch": 0.2641198121589034, - "grad_norm": 0.6444408893585205, - "learning_rate": 1.823920125227398e-05, - "loss": 0.1453, + "epoch": 0.5281994009848215, + "grad_norm": 0.5408563613891602, + "learning_rate": 1.6478670660101192e-05, + "loss": 0.1203, "step": 10405 }, { - "epoch": 0.26424673181875874, - "grad_norm": 0.6092085242271423, - "learning_rate": 1.823835512120828e-05, - "loss": 0.1288, + "epoch": 0.528453220975684, + "grad_norm": 0.4275490939617157, + "learning_rate": 1.6476978526828775e-05, + "loss": 0.1016, "step": 10410 }, { - "epoch": 0.264373651478614, - "grad_norm": 0.5316492319107056, - "learning_rate": 1.8237508990142574e-05, - "loss": 0.1299, + "epoch": 0.5287070409665465, + "grad_norm": 0.4530520439147949, + "learning_rate": 1.647528639355636e-05, + "loss": 0.1036, "step": 10415 }, { - "epoch": 0.26450057113846936, - "grad_norm": 0.5390154719352722, - "learning_rate": 1.8236662859076872e-05, - "loss": 0.1264, + "epoch": 0.528960860957409, + "grad_norm": 0.49070650339126587, + "learning_rate": 1.647359426028394e-05, + "loss": 0.0977, "step": 10420 }, { - "epoch": 0.26462749079832465, - "grad_norm": 1.282063364982605, - "learning_rate": 1.823581672801117e-05, - "loss": 0.1208, + "epoch": 0.5292146809482715, + "grad_norm": 0.4170216917991638, + "learning_rate": 1.6471902127011526e-05, + "loss": 0.1227, "step": 10425 }, { - "epoch": 0.26475441045818, - "grad_norm": 0.7379654049873352, - "learning_rate": 1.823497059694547e-05, - "loss": 0.1135, + "epoch": 0.529468500939134, + "grad_norm": 0.3593534827232361, + "learning_rate": 1.647020999373911e-05, + "loss": 0.1125, "step": 10430 }, { - "epoch": 0.2648813301180353, - "grad_norm": 0.6593842506408691, - "learning_rate": 1.8234124465879767e-05, - "loss": 0.0933, + "epoch": 0.5297223209299965, + "grad_norm": 0.49653005599975586, + "learning_rate": 1.646851786046669e-05, + "loss": 0.0916, "step": 10435 }, { - "epoch": 0.2650082497778906, - "grad_norm": 0.8443021774291992, - "learning_rate": 1.8233278334814066e-05, - "loss": 0.1211, + "epoch": 0.5299761409208589, + "grad_norm": 0.536085844039917, + "learning_rate": 1.6466825727194277e-05, + "loss": 0.0895, "step": 10440 }, { - "epoch": 0.2651351694377459, - "grad_norm": 0.5529556274414062, - "learning_rate": 1.8232432203748364e-05, - "loss": 0.1613, + "epoch": 0.5302299609117214, + "grad_norm": 0.4254559278488159, + "learning_rate": 1.6465133593921857e-05, + "loss": 0.0948, "step": 10445 }, { - "epoch": 0.26526208909760124, - "grad_norm": 0.8107729554176331, - "learning_rate": 1.8231586072682662e-05, - "loss": 0.1571, + "epoch": 0.5304837809025839, + "grad_norm": 0.3751010000705719, + "learning_rate": 1.6463441460649444e-05, + "loss": 0.0907, "step": 10450 }, { - "epoch": 0.2653890087574565, - "grad_norm": 0.6544068455696106, - "learning_rate": 1.8230739941616957e-05, - "loss": 0.1481, + "epoch": 0.5307376008934463, + "grad_norm": 0.4354502558708191, + "learning_rate": 1.6461749327377027e-05, + "loss": 0.1001, "step": 10455 }, { - "epoch": 0.26551592841731186, - "grad_norm": 0.9253925681114197, - "learning_rate": 1.8229893810551256e-05, - "loss": 0.134, + "epoch": 0.5309914208843088, + "grad_norm": 0.3564659357070923, + "learning_rate": 1.6460057194104607e-05, + "loss": 0.0916, "step": 10460 }, { - "epoch": 0.26564284807716715, - "grad_norm": 1.0547356605529785, - "learning_rate": 1.8229047679485554e-05, - "loss": 0.1374, + "epoch": 0.5312452408751713, + "grad_norm": 0.47137919068336487, + "learning_rate": 1.6458365060832194e-05, + "loss": 0.1, "step": 10465 }, { - "epoch": 0.2657697677370225, - "grad_norm": 0.7195497751235962, - "learning_rate": 1.8228201548419853e-05, - "loss": 0.1545, + "epoch": 0.5314990608660338, + "grad_norm": 0.32844093441963196, + "learning_rate": 1.6456672927559775e-05, + "loss": 0.0874, "step": 10470 }, { - "epoch": 0.2658966873968778, - "grad_norm": 0.8537266850471497, - "learning_rate": 1.8227355417354148e-05, - "loss": 0.143, + "epoch": 0.5317528808568963, + "grad_norm": 0.521373450756073, + "learning_rate": 1.6454980794287358e-05, + "loss": 0.1061, "step": 10475 }, { - "epoch": 0.2660236070567331, - "grad_norm": 0.8939195275306702, - "learning_rate": 1.8226509286288446e-05, - "loss": 0.1272, + "epoch": 0.5320067008477588, + "grad_norm": 0.36201703548431396, + "learning_rate": 1.6453288661014945e-05, + "loss": 0.1006, "step": 10480 }, { - "epoch": 0.2661505267165884, - "grad_norm": 0.4694000780582428, - "learning_rate": 1.8225663155222744e-05, - "loss": 0.124, + "epoch": 0.5322605208386213, + "grad_norm": 0.5359712839126587, + "learning_rate": 1.6451596527742525e-05, + "loss": 0.092, "step": 10485 }, { - "epoch": 0.26627744637644374, - "grad_norm": 0.7413976192474365, - "learning_rate": 1.8224817024157043e-05, - "loss": 0.156, + "epoch": 0.5325143408294837, + "grad_norm": 0.38301995396614075, + "learning_rate": 1.6449904394470112e-05, + "loss": 0.0994, "step": 10490 }, { - "epoch": 0.266404366036299, - "grad_norm": 0.7617392539978027, - "learning_rate": 1.822397089309134e-05, - "loss": 0.1685, + "epoch": 0.5327681608203462, + "grad_norm": 0.3559681475162506, + "learning_rate": 1.6448212261197692e-05, + "loss": 0.0872, "step": 10495 }, { - "epoch": 0.2665312856961543, - "grad_norm": 0.7957344651222229, - "learning_rate": 1.822312476202564e-05, - "loss": 0.1269, + "epoch": 0.5330219808112087, + "grad_norm": 0.4657198488712311, + "learning_rate": 1.6446520127925276e-05, + "loss": 0.0962, "step": 10500 }, { - "epoch": 0.26665820535600965, - "grad_norm": 0.8991373777389526, - "learning_rate": 1.8222278630959938e-05, - "loss": 0.1427, + "epoch": 0.5332758008020712, + "grad_norm": 0.8297700881958008, + "learning_rate": 1.6444827994652863e-05, + "loss": 0.1149, "step": 10505 }, { - "epoch": 0.26678512501586493, - "grad_norm": 0.7226929664611816, - "learning_rate": 1.8221432499894236e-05, - "loss": 0.1403, + "epoch": 0.5335296207929336, + "grad_norm": 0.4492545425891876, + "learning_rate": 1.6443135861380443e-05, + "loss": 0.0958, "step": 10510 }, { - "epoch": 0.2669120446757203, - "grad_norm": 0.640259325504303, - "learning_rate": 1.822058636882853e-05, - "loss": 0.1571, + "epoch": 0.5337834407837961, + "grad_norm": 0.41303345561027527, + "learning_rate": 1.6441443728108026e-05, + "loss": 0.0971, "step": 10515 }, { - "epoch": 0.26703896433557556, - "grad_norm": 0.8531690835952759, - "learning_rate": 1.821974023776283e-05, - "loss": 0.1361, + "epoch": 0.5340372607746586, + "grad_norm": 0.4111636281013489, + "learning_rate": 1.643975159483561e-05, + "loss": 0.0831, "step": 10520 }, { - "epoch": 0.2671658839954309, - "grad_norm": 1.3328566551208496, - "learning_rate": 1.8218894106697128e-05, - "loss": 0.1513, + "epoch": 0.5342910807655211, + "grad_norm": 0.5072298049926758, + "learning_rate": 1.6438059461563193e-05, + "loss": 0.0935, "step": 10525 }, { - "epoch": 0.2672928036552862, - "grad_norm": 0.8149065971374512, - "learning_rate": 1.8218047975631427e-05, - "loss": 0.1419, + "epoch": 0.5345449007563836, + "grad_norm": 0.43646836280822754, + "learning_rate": 1.6436367328290777e-05, + "loss": 0.1003, "step": 10530 }, { - "epoch": 0.2674197233151415, - "grad_norm": 1.1008367538452148, - "learning_rate": 1.8217201844565725e-05, - "loss": 0.162, + "epoch": 0.5347987207472461, + "grad_norm": 0.5552557110786438, + "learning_rate": 1.643467519501836e-05, + "loss": 0.0961, "step": 10535 }, { - "epoch": 0.2675466429749968, - "grad_norm": 1.2584394216537476, - "learning_rate": 1.8216355713500023e-05, - "loss": 0.135, + "epoch": 0.5350525407381085, + "grad_norm": 0.398117333650589, + "learning_rate": 1.6432983061745944e-05, + "loss": 0.1038, "step": 10540 }, { - "epoch": 0.26767356263485215, - "grad_norm": 0.6744344830513, - "learning_rate": 1.8215509582434322e-05, - "loss": 0.1242, + "epoch": 0.535306360728971, + "grad_norm": 0.301830530166626, + "learning_rate": 1.6431290928473528e-05, + "loss": 0.0828, "step": 10545 }, { - "epoch": 0.26780048229470743, - "grad_norm": 0.8094856142997742, - "learning_rate": 1.821466345136862e-05, - "loss": 0.1401, + "epoch": 0.5355601807198335, + "grad_norm": 0.43275371193885803, + "learning_rate": 1.642959879520111e-05, + "loss": 0.1077, "step": 10550 }, { - "epoch": 0.26792740195456277, - "grad_norm": 0.5045608282089233, - "learning_rate": 1.8213817320302915e-05, - "loss": 0.1509, + "epoch": 0.535814000710696, + "grad_norm": 0.3594335913658142, + "learning_rate": 1.6427906661928695e-05, + "loss": 0.0974, "step": 10555 }, { - "epoch": 0.26805432161441806, - "grad_norm": 0.7787953615188599, - "learning_rate": 1.8212971189237214e-05, - "loss": 0.1377, + "epoch": 0.5360678207015585, + "grad_norm": 0.3689029812812805, + "learning_rate": 1.6426214528656278e-05, + "loss": 0.0762, "step": 10560 }, { - "epoch": 0.2681812412742734, - "grad_norm": 0.81280118227005, - "learning_rate": 1.8212125058171512e-05, - "loss": 0.1257, + "epoch": 0.5363216406924209, + "grad_norm": 0.7268462777137756, + "learning_rate": 1.6424522395383862e-05, + "loss": 0.0943, "step": 10565 }, { - "epoch": 0.2683081609341287, - "grad_norm": 0.9612415432929993, - "learning_rate": 1.821127892710581e-05, - "loss": 0.1324, + "epoch": 0.5365754606832834, + "grad_norm": 0.5907272696495056, + "learning_rate": 1.6422830262111445e-05, + "loss": 0.1044, "step": 10570 }, { - "epoch": 0.268435080593984, - "grad_norm": 0.8268859386444092, - "learning_rate": 1.821043279604011e-05, - "loss": 0.135, + "epoch": 0.5368292806741459, + "grad_norm": 0.640363335609436, + "learning_rate": 1.642113812883903e-05, + "loss": 0.0887, "step": 10575 }, { - "epoch": 0.2685620002538393, - "grad_norm": 0.9991202354431152, - "learning_rate": 1.8209586664974407e-05, - "loss": 0.1303, + "epoch": 0.5370831006650084, + "grad_norm": 0.9156871438026428, + "learning_rate": 1.6419445995566612e-05, + "loss": 0.1107, "step": 10580 }, { - "epoch": 0.26868891991369465, - "grad_norm": 0.7082025408744812, - "learning_rate": 1.8208740533908706e-05, - "loss": 0.1416, + "epoch": 0.5373369206558709, + "grad_norm": 0.449605792760849, + "learning_rate": 1.6417753862294196e-05, + "loss": 0.0975, "step": 10585 }, { - "epoch": 0.26881583957354993, - "grad_norm": 0.980776309967041, - "learning_rate": 1.8207894402843004e-05, - "loss": 0.1464, + "epoch": 0.5375907406467333, + "grad_norm": 0.4629546105861664, + "learning_rate": 1.641606172902178e-05, + "loss": 0.0946, "step": 10590 }, { - "epoch": 0.26894275923340527, - "grad_norm": 0.7157489657402039, - "learning_rate": 1.82070482717773e-05, - "loss": 0.1446, + "epoch": 0.5378445606375958, + "grad_norm": 0.7200871109962463, + "learning_rate": 1.6414369595749363e-05, + "loss": 0.1041, "step": 10595 }, { - "epoch": 0.26906967889326056, - "grad_norm": 0.632318913936615, - "learning_rate": 1.8206202140711597e-05, - "loss": 0.145, + "epoch": 0.5380983806284583, + "grad_norm": 0.4900858402252197, + "learning_rate": 1.6412677462476947e-05, + "loss": 0.1005, "step": 10600 }, { - "epoch": 0.2691965985531159, - "grad_norm": 0.792119026184082, - "learning_rate": 1.8205356009645896e-05, - "loss": 0.1325, + "epoch": 0.5383522006193208, + "grad_norm": 0.3713901937007904, + "learning_rate": 1.641098532920453e-05, + "loss": 0.0933, "step": 10605 }, { - "epoch": 0.2693235182129712, - "grad_norm": 0.532227635383606, - "learning_rate": 1.8204509878580194e-05, - "loss": 0.1162, + "epoch": 0.5386060206101833, + "grad_norm": 0.4475940763950348, + "learning_rate": 1.6409293195932114e-05, + "loss": 0.0977, "step": 10610 }, { - "epoch": 0.2694504378728265, - "grad_norm": 0.7298319339752197, - "learning_rate": 1.820366374751449e-05, - "loss": 0.1746, + "epoch": 0.5388598406010457, + "grad_norm": 0.5317751169204712, + "learning_rate": 1.6407601062659697e-05, + "loss": 0.109, "step": 10615 }, { - "epoch": 0.2695773575326818, - "grad_norm": 0.6765480041503906, - "learning_rate": 1.8202817616448788e-05, - "loss": 0.1245, + "epoch": 0.5391136605919082, + "grad_norm": 0.4223870635032654, + "learning_rate": 1.640590892938728e-05, + "loss": 0.0951, "step": 10620 }, { - "epoch": 0.26970427719253715, - "grad_norm": 0.6902404427528381, - "learning_rate": 1.8201971485383086e-05, - "loss": 0.1291, + "epoch": 0.5393674805827707, + "grad_norm": 0.4437798261642456, + "learning_rate": 1.640421679611486e-05, + "loss": 0.0971, "step": 10625 }, { - "epoch": 0.26983119685239243, - "grad_norm": 0.6430912017822266, - "learning_rate": 1.8201125354317384e-05, - "loss": 0.1124, + "epoch": 0.5396213005736332, + "grad_norm": 0.5337457060813904, + "learning_rate": 1.6402524662842448e-05, + "loss": 0.0893, "step": 10630 }, { - "epoch": 0.26995811651224777, - "grad_norm": 0.6160311102867126, - "learning_rate": 1.8200279223251683e-05, - "loss": 0.1494, + "epoch": 0.5398751205644957, + "grad_norm": 0.5861578583717346, + "learning_rate": 1.640083252957003e-05, + "loss": 0.1028, "step": 10635 }, { - "epoch": 0.27008503617210305, - "grad_norm": 0.5549115538597107, - "learning_rate": 1.819943309218598e-05, - "loss": 0.128, + "epoch": 0.5401289405553581, + "grad_norm": 0.46542319655418396, + "learning_rate": 1.6399140396297615e-05, + "loss": 0.0978, "step": 10640 }, { - "epoch": 0.2702119558319584, - "grad_norm": 0.7361851334571838, - "learning_rate": 1.819858696112028e-05, - "loss": 0.1319, + "epoch": 0.5403827605462206, + "grad_norm": 0.5477737188339233, + "learning_rate": 1.63974482630252e-05, + "loss": 0.0928, "step": 10645 }, { - "epoch": 0.2703388754918137, - "grad_norm": 0.5054565072059631, - "learning_rate": 1.8197740830054578e-05, - "loss": 0.1297, + "epoch": 0.5406365805370831, + "grad_norm": 0.38435491919517517, + "learning_rate": 1.639575612975278e-05, + "loss": 0.1031, "step": 10650 }, { - "epoch": 0.270465795151669, - "grad_norm": 0.6162994503974915, - "learning_rate": 1.8196894698988873e-05, - "loss": 0.1374, + "epoch": 0.5408904005279456, + "grad_norm": 0.632420003414154, + "learning_rate": 1.6394063996480366e-05, + "loss": 0.1089, "step": 10655 }, { - "epoch": 0.2705927148115243, - "grad_norm": 0.756897509098053, - "learning_rate": 1.819604856792317e-05, - "loss": 0.1264, + "epoch": 0.5411442205188081, + "grad_norm": 0.2502029240131378, + "learning_rate": 1.639237186320795e-05, + "loss": 0.1011, "step": 10660 }, { - "epoch": 0.27071963447137964, - "grad_norm": 0.9612749218940735, - "learning_rate": 1.819520243685747e-05, - "loss": 0.1399, + "epoch": 0.5413980405096706, + "grad_norm": 0.7792786955833435, + "learning_rate": 1.639067972993553e-05, + "loss": 0.1021, "step": 10665 }, { - "epoch": 0.27084655413123493, - "grad_norm": 0.6429823040962219, - "learning_rate": 1.819435630579177e-05, - "loss": 0.1507, + "epoch": 0.541651860500533, + "grad_norm": 0.36001554131507874, + "learning_rate": 1.6388987596663116e-05, + "loss": 0.1007, "step": 10670 }, { - "epoch": 0.2709734737910902, - "grad_norm": 1.0581218004226685, - "learning_rate": 1.8193510174726067e-05, - "loss": 0.1153, + "epoch": 0.5419056804913955, + "grad_norm": 0.6201646327972412, + "learning_rate": 1.6387295463390696e-05, + "loss": 0.1045, "step": 10675 }, { - "epoch": 0.27110039345094555, - "grad_norm": 0.7205058336257935, - "learning_rate": 1.8192664043660365e-05, - "loss": 0.1343, + "epoch": 0.542159500482258, + "grad_norm": 0.6190532445907593, + "learning_rate": 1.638560333011828e-05, + "loss": 0.0953, "step": 10680 }, { - "epoch": 0.27122731311080084, - "grad_norm": 1.312429666519165, - "learning_rate": 1.8191817912594664e-05, - "loss": 0.1543, + "epoch": 0.5424133204731204, + "grad_norm": 0.7013592720031738, + "learning_rate": 1.6383911196845867e-05, + "loss": 0.0991, "step": 10685 }, { - "epoch": 0.2713542327706562, - "grad_norm": 0.5877411365509033, - "learning_rate": 1.8190971781528962e-05, - "loss": 0.1465, + "epoch": 0.5426671404639829, + "grad_norm": 0.3520493507385254, + "learning_rate": 1.6382219063573447e-05, + "loss": 0.0828, "step": 10690 }, { - "epoch": 0.27148115243051146, - "grad_norm": 0.5379381775856018, - "learning_rate": 1.8190125650463257e-05, - "loss": 0.1438, + "epoch": 0.5429209604548454, + "grad_norm": 0.788502037525177, + "learning_rate": 1.6380526930301034e-05, + "loss": 0.093, "step": 10695 }, { - "epoch": 0.2716080720903668, - "grad_norm": 0.8152998685836792, - "learning_rate": 1.8189279519397555e-05, - "loss": 0.153, + "epoch": 0.5431747804457079, + "grad_norm": 0.38444724678993225, + "learning_rate": 1.6378834797028614e-05, + "loss": 0.0963, "step": 10700 }, { - "epoch": 0.2717349917502221, - "grad_norm": 1.305991291999817, - "learning_rate": 1.8188433388331854e-05, - "loss": 0.142, + "epoch": 0.5434286004365704, + "grad_norm": 0.6140927076339722, + "learning_rate": 1.6377142663756198e-05, + "loss": 0.103, "step": 10705 }, { - "epoch": 0.27186191141007743, - "grad_norm": 0.560907781124115, - "learning_rate": 1.8187587257266152e-05, - "loss": 0.1124, + "epoch": 0.5436824204274329, + "grad_norm": 0.5046095848083496, + "learning_rate": 1.6375450530483785e-05, + "loss": 0.1056, "step": 10710 }, { - "epoch": 0.2719888310699327, - "grad_norm": 0.4456802010536194, - "learning_rate": 1.818674112620045e-05, - "loss": 0.1116, + "epoch": 0.5439362404182954, + "grad_norm": 0.5487456917762756, + "learning_rate": 1.6373758397211365e-05, + "loss": 0.1221, "step": 10715 }, { - "epoch": 0.27211575072978805, - "grad_norm": 1.1398930549621582, - "learning_rate": 1.818589499513475e-05, - "loss": 0.1341, + "epoch": 0.5441900604091578, + "grad_norm": 0.49649783968925476, + "learning_rate": 1.637206626393895e-05, + "loss": 0.1, "step": 10720 }, { - "epoch": 0.27224267038964334, - "grad_norm": 0.8604961633682251, - "learning_rate": 1.8185048864069047e-05, - "loss": 0.1672, + "epoch": 0.5444438804000203, + "grad_norm": 0.3561123013496399, + "learning_rate": 1.6370374130666532e-05, + "loss": 0.0825, "step": 10725 }, { - "epoch": 0.2723695900494987, - "grad_norm": 0.7261795401573181, - "learning_rate": 1.8184202733003346e-05, - "loss": 0.14, + "epoch": 0.5446977003908828, + "grad_norm": 0.4641798138618469, + "learning_rate": 1.6368681997394115e-05, + "loss": 0.0995, "step": 10730 }, { - "epoch": 0.27249650970935396, - "grad_norm": 0.4452093839645386, - "learning_rate": 1.818335660193764e-05, - "loss": 0.1077, + "epoch": 0.5449515203817452, + "grad_norm": 0.43982285261154175, + "learning_rate": 1.63669898641217e-05, + "loss": 0.1151, "step": 10735 }, { - "epoch": 0.2726234293692093, - "grad_norm": 0.7117156982421875, - "learning_rate": 1.818251047087194e-05, - "loss": 0.1398, + "epoch": 0.5452053403726077, + "grad_norm": 0.3846445679664612, + "learning_rate": 1.6365297730849283e-05, + "loss": 0.1044, "step": 10740 }, { - "epoch": 0.2727503490290646, - "grad_norm": 0.9321088790893555, - "learning_rate": 1.8181664339806238e-05, - "loss": 0.1595, + "epoch": 0.5454591603634702, + "grad_norm": 0.46901729702949524, + "learning_rate": 1.6363605597576866e-05, + "loss": 0.1155, "step": 10745 }, { - "epoch": 0.2728772686889199, - "grad_norm": 1.6590540409088135, - "learning_rate": 1.8180818208740536e-05, - "loss": 0.1483, + "epoch": 0.5457129803543327, + "grad_norm": 0.3808036744594574, + "learning_rate": 1.636191346430445e-05, + "loss": 0.1055, "step": 10750 }, { - "epoch": 0.2730041883487752, - "grad_norm": 0.623633623123169, - "learning_rate": 1.817997207767483e-05, - "loss": 0.1254, + "epoch": 0.5459668003451952, + "grad_norm": 1.3655842542648315, + "learning_rate": 1.6360221331032033e-05, + "loss": 0.1032, "step": 10755 }, { - "epoch": 0.27313110800863055, - "grad_norm": 0.9871307015419006, - "learning_rate": 1.817912594660913e-05, - "loss": 0.1594, + "epoch": 0.5462206203360577, + "grad_norm": 0.4646155834197998, + "learning_rate": 1.6358529197759617e-05, + "loss": 0.0991, "step": 10760 }, { - "epoch": 0.27325802766848584, - "grad_norm": 0.5379174947738647, - "learning_rate": 1.8178279815543428e-05, - "loss": 0.1344, + "epoch": 0.5464744403269202, + "grad_norm": 0.6558862924575806, + "learning_rate": 1.63568370644872e-05, + "loss": 0.1017, "step": 10765 }, { - "epoch": 0.2733849473283412, - "grad_norm": 0.8160228133201599, - "learning_rate": 1.8177433684477726e-05, - "loss": 0.1314, + "epoch": 0.5467282603177827, + "grad_norm": 0.4608815610408783, + "learning_rate": 1.6355144931214784e-05, + "loss": 0.1032, "step": 10770 }, { - "epoch": 0.27351186698819646, - "grad_norm": 0.5404942035675049, - "learning_rate": 1.8176587553412025e-05, - "loss": 0.1485, + "epoch": 0.5469820803086451, + "grad_norm": 0.3188372254371643, + "learning_rate": 1.6353452797942367e-05, + "loss": 0.0907, "step": 10775 }, { - "epoch": 0.2736387866480518, - "grad_norm": 0.6171514391899109, - "learning_rate": 1.8175741422346323e-05, - "loss": 0.1083, + "epoch": 0.5472359002995076, + "grad_norm": 0.31106188893318176, + "learning_rate": 1.635176066466995e-05, + "loss": 0.1002, "step": 10780 }, { - "epoch": 0.2737657063079071, - "grad_norm": 0.6776657104492188, - "learning_rate": 1.817489529128062e-05, - "loss": 0.1211, + "epoch": 0.54748972029037, + "grad_norm": 0.37392106652259827, + "learning_rate": 1.6350068531397534e-05, + "loss": 0.0935, "step": 10785 }, { - "epoch": 0.2738926259677624, - "grad_norm": 1.0158299207687378, - "learning_rate": 1.817404916021492e-05, - "loss": 0.1439, + "epoch": 0.5477435402812325, + "grad_norm": 0.3712967038154602, + "learning_rate": 1.6348376398125118e-05, + "loss": 0.1068, "step": 10790 }, { - "epoch": 0.2740195456276177, - "grad_norm": 0.7957082390785217, - "learning_rate": 1.8173203029149215e-05, - "loss": 0.1368, + "epoch": 0.547997360272095, + "grad_norm": 1.323493242263794, + "learning_rate": 1.63466842648527e-05, + "loss": 0.1024, "step": 10795 }, { - "epoch": 0.27414646528747305, - "grad_norm": 0.44895777106285095, - "learning_rate": 1.8172356898083513e-05, - "loss": 0.1353, + "epoch": 0.5482511802629575, + "grad_norm": 0.3808216452598572, + "learning_rate": 1.6344992131580285e-05, + "loss": 0.0899, "step": 10800 }, { - "epoch": 0.27427338494732834, - "grad_norm": 0.5716232061386108, - "learning_rate": 1.817151076701781e-05, - "loss": 0.1162, + "epoch": 0.54850500025382, + "grad_norm": 0.7817515134811401, + "learning_rate": 1.634329999830787e-05, + "loss": 0.1175, "step": 10805 }, { - "epoch": 0.2744003046071837, - "grad_norm": 0.37966981530189514, - "learning_rate": 1.817066463595211e-05, - "loss": 0.1218, + "epoch": 0.5487588202446825, + "grad_norm": 0.9595544934272766, + "learning_rate": 1.6341607865035452e-05, + "loss": 0.0989, "step": 10810 }, { - "epoch": 0.27452722426703896, - "grad_norm": 0.676076352596283, - "learning_rate": 1.816981850488641e-05, - "loss": 0.1202, + "epoch": 0.549012640235545, + "grad_norm": 0.5686103701591492, + "learning_rate": 1.6339915731763036e-05, + "loss": 0.1023, "step": 10815 }, { - "epoch": 0.2746541439268943, - "grad_norm": 1.0266637802124023, - "learning_rate": 1.8168972373820707e-05, - "loss": 0.1583, + "epoch": 0.5492664602264075, + "grad_norm": 0.457376092672348, + "learning_rate": 1.633822359849062e-05, + "loss": 0.1002, "step": 10820 }, { - "epoch": 0.2747810635867496, - "grad_norm": 0.867236852645874, - "learning_rate": 1.8168126242755005e-05, - "loss": 0.1315, + "epoch": 0.54952028021727, + "grad_norm": 0.954951286315918, + "learning_rate": 1.6336531465218203e-05, + "loss": 0.1003, "step": 10825 }, { - "epoch": 0.2749079832466049, - "grad_norm": 0.7518088221549988, - "learning_rate": 1.8167280111689304e-05, - "loss": 0.1487, + "epoch": 0.5497741002081324, + "grad_norm": 0.40505141019821167, + "learning_rate": 1.6334839331945783e-05, + "loss": 0.0966, "step": 10830 }, { - "epoch": 0.2750349029064602, - "grad_norm": 0.5471534132957458, - "learning_rate": 1.81664339806236e-05, - "loss": 0.1131, + "epoch": 0.5500279201989948, + "grad_norm": 0.41799911856651306, + "learning_rate": 1.633314719867337e-05, + "loss": 0.1108, "step": 10835 }, { - "epoch": 0.2751618225663155, - "grad_norm": 0.7500672340393066, - "learning_rate": 1.8165587849557897e-05, - "loss": 0.1206, + "epoch": 0.5502817401898573, + "grad_norm": 0.5992372632026672, + "learning_rate": 1.6331455065400953e-05, + "loss": 0.1106, "step": 10840 }, { - "epoch": 0.27528874222617084, - "grad_norm": 0.7388617992401123, - "learning_rate": 1.8164741718492195e-05, - "loss": 0.1455, + "epoch": 0.5505355601807198, + "grad_norm": 0.8904902935028076, + "learning_rate": 1.6329762932128537e-05, + "loss": 0.1022, "step": 10845 }, { - "epoch": 0.2754156618860261, - "grad_norm": 0.7050172686576843, - "learning_rate": 1.8163895587426494e-05, - "loss": 0.1252, + "epoch": 0.5507893801715823, + "grad_norm": 0.5283609628677368, + "learning_rate": 1.632807079885612e-05, + "loss": 0.0975, "step": 10850 }, { - "epoch": 0.27554258154588146, - "grad_norm": 0.5725177526473999, - "learning_rate": 1.8163049456360792e-05, - "loss": 0.1371, + "epoch": 0.5510432001624448, + "grad_norm": 0.49641087651252747, + "learning_rate": 1.63263786655837e-05, + "loss": 0.099, "step": 10855 }, { - "epoch": 0.27566950120573674, - "grad_norm": 0.6277180910110474, - "learning_rate": 1.816220332529509e-05, - "loss": 0.1222, + "epoch": 0.5512970201533073, + "grad_norm": 0.5636726021766663, + "learning_rate": 1.6324686532311288e-05, + "loss": 0.0957, "step": 10860 }, { - "epoch": 0.2757964208655921, - "grad_norm": 0.7796297669410706, - "learning_rate": 1.816135719422939e-05, - "loss": 0.1497, + "epoch": 0.5515508401441698, + "grad_norm": 0.6105141043663025, + "learning_rate": 1.632299439903887e-05, + "loss": 0.1119, "step": 10865 }, { - "epoch": 0.27592334052544737, - "grad_norm": 1.0008755922317505, - "learning_rate": 1.8160511063163687e-05, - "loss": 0.1367, + "epoch": 0.5518046601350323, + "grad_norm": 0.43738439679145813, + "learning_rate": 1.632130226576645e-05, + "loss": 0.0902, "step": 10870 }, { - "epoch": 0.2760502601853027, - "grad_norm": 0.7615749835968018, - "learning_rate": 1.8159664932097986e-05, - "loss": 0.1072, + "epoch": 0.5520584801258948, + "grad_norm": 0.6451889872550964, + "learning_rate": 1.6319610132494038e-05, + "loss": 0.0906, "step": 10875 }, { - "epoch": 0.276177179845158, - "grad_norm": 0.9321630597114563, - "learning_rate": 1.815881880103228e-05, - "loss": 0.1611, + "epoch": 0.5523123001167572, + "grad_norm": 0.4356880187988281, + "learning_rate": 1.631791799922162e-05, + "loss": 0.1015, "step": 10880 }, { - "epoch": 0.27630409950501333, - "grad_norm": 0.5253831148147583, - "learning_rate": 1.815797266996658e-05, - "loss": 0.1308, + "epoch": 0.5525661201076196, + "grad_norm": 0.487354040145874, + "learning_rate": 1.6316225865949205e-05, + "loss": 0.1072, "step": 10885 }, { - "epoch": 0.2764310191648686, - "grad_norm": 0.6507368683815002, - "learning_rate": 1.8157126538900878e-05, - "loss": 0.134, + "epoch": 0.5528199400984821, + "grad_norm": 0.41686418652534485, + "learning_rate": 1.631453373267679e-05, + "loss": 0.0994, "step": 10890 }, { - "epoch": 0.27655793882472396, - "grad_norm": 0.6041866540908813, - "learning_rate": 1.8156280407835176e-05, - "loss": 0.1411, + "epoch": 0.5530737600893446, + "grad_norm": 0.5132306218147278, + "learning_rate": 1.631284159940437e-05, + "loss": 0.0994, "step": 10895 }, { - "epoch": 0.27668485848457924, - "grad_norm": 1.8006521463394165, - "learning_rate": 1.815543427676947e-05, - "loss": 0.1203, + "epoch": 0.5533275800802071, + "grad_norm": 0.4513254463672638, + "learning_rate": 1.6311149466131956e-05, + "loss": 0.0963, "step": 10900 }, { - "epoch": 0.2768117781444346, - "grad_norm": 0.6021510362625122, - "learning_rate": 1.815458814570377e-05, - "loss": 0.1229, + "epoch": 0.5535814000710696, + "grad_norm": 0.38187047839164734, + "learning_rate": 1.6309457332859536e-05, + "loss": 0.0969, "step": 10905 }, { - "epoch": 0.27693869780428987, - "grad_norm": 0.5405517816543579, - "learning_rate": 1.8153742014638068e-05, - "loss": 0.1184, + "epoch": 0.5538352200619321, + "grad_norm": 0.5190130472183228, + "learning_rate": 1.630776519958712e-05, + "loss": 0.1029, "step": 10910 }, { - "epoch": 0.2770656174641452, - "grad_norm": 0.8995214700698853, - "learning_rate": 1.8152895883572366e-05, - "loss": 0.1292, + "epoch": 0.5540890400527946, + "grad_norm": 0.3652910888195038, + "learning_rate": 1.6306073066314703e-05, + "loss": 0.0953, "step": 10915 }, { - "epoch": 0.2771925371240005, - "grad_norm": 1.0038641691207886, - "learning_rate": 1.8152049752506665e-05, - "loss": 0.0964, + "epoch": 0.5543428600436571, + "grad_norm": 0.4218839406967163, + "learning_rate": 1.6304380933042287e-05, + "loss": 0.0997, "step": 10920 }, { - "epoch": 0.27731945678385583, - "grad_norm": 5.893551826477051, - "learning_rate": 1.8151203621440963e-05, - "loss": 0.1609, + "epoch": 0.5545966800345196, + "grad_norm": 0.4914858043193817, + "learning_rate": 1.630268879976987e-05, + "loss": 0.1099, "step": 10925 }, { - "epoch": 0.2774463764437111, - "grad_norm": 0.5537265539169312, - "learning_rate": 1.815035749037526e-05, - "loss": 0.1614, + "epoch": 0.554850500025382, + "grad_norm": 0.3577395975589752, + "learning_rate": 1.6300996666497454e-05, + "loss": 0.1083, "step": 10930 }, { - "epoch": 0.27757329610356646, - "grad_norm": 0.4616085886955261, - "learning_rate": 1.814951135930956e-05, - "loss": 0.0998, + "epoch": 0.5551043200162444, + "grad_norm": 0.32556432485580444, + "learning_rate": 1.6299304533225037e-05, + "loss": 0.0865, "step": 10935 }, { - "epoch": 0.27770021576342174, - "grad_norm": 1.0577502250671387, - "learning_rate": 1.8148665228243855e-05, - "loss": 0.1557, + "epoch": 0.5553581400071069, + "grad_norm": 0.5093722939491272, + "learning_rate": 1.629761239995262e-05, + "loss": 0.0913, "step": 10940 }, { - "epoch": 0.2778271354232771, - "grad_norm": 0.8485143184661865, - "learning_rate": 1.8147819097178153e-05, - "loss": 0.1446, + "epoch": 0.5556119599979694, + "grad_norm": 0.5503299832344055, + "learning_rate": 1.6295920266680204e-05, + "loss": 0.1075, "step": 10945 }, { - "epoch": 0.27795405508313237, - "grad_norm": 0.672253429889679, - "learning_rate": 1.8146972966112452e-05, - "loss": 0.1057, + "epoch": 0.5558657799888319, + "grad_norm": 0.6354576945304871, + "learning_rate": 1.6294228133407788e-05, + "loss": 0.103, "step": 10950 }, { - "epoch": 0.2780809747429877, - "grad_norm": 0.8017942309379578, - "learning_rate": 1.814612683504675e-05, - "loss": 0.1514, + "epoch": 0.5561195999796944, + "grad_norm": 0.43695926666259766, + "learning_rate": 1.629253600013537e-05, + "loss": 0.0993, "step": 10955 }, { - "epoch": 0.278207894402843, - "grad_norm": 0.6683960556983948, - "learning_rate": 1.814528070398105e-05, - "loss": 0.1155, + "epoch": 0.5563734199705569, + "grad_norm": 0.36351871490478516, + "learning_rate": 1.6290843866862955e-05, + "loss": 0.0972, "step": 10960 }, { - "epoch": 0.27833481406269833, - "grad_norm": 0.4990769922733307, - "learning_rate": 1.8144434572915347e-05, - "loss": 0.1221, + "epoch": 0.5566272399614194, + "grad_norm": 0.6152383685112, + "learning_rate": 1.628915173359054e-05, + "loss": 0.1052, "step": 10965 }, { - "epoch": 0.2784617337225536, - "grad_norm": 1.5576382875442505, - "learning_rate": 1.8143588441849645e-05, - "loss": 0.1588, + "epoch": 0.5568810599522819, + "grad_norm": 0.7461835145950317, + "learning_rate": 1.6287459600318122e-05, + "loss": 0.0893, "step": 10970 }, { - "epoch": 0.27858865338240896, - "grad_norm": 0.6433332562446594, - "learning_rate": 1.8142742310783944e-05, - "loss": 0.1204, + "epoch": 0.5571348799431444, + "grad_norm": 0.45629680156707764, + "learning_rate": 1.6285767467045706e-05, + "loss": 0.0984, "step": 10975 }, { - "epoch": 0.27871557304226424, - "grad_norm": 0.5335119962692261, - "learning_rate": 1.814189617971824e-05, - "loss": 0.1321, + "epoch": 0.5573886999340067, + "grad_norm": 0.4399794340133667, + "learning_rate": 1.628407533377329e-05, + "loss": 0.1036, "step": 10980 }, { - "epoch": 0.2788424927021196, - "grad_norm": 0.5914715528488159, - "learning_rate": 1.8141050048652537e-05, - "loss": 0.1359, + "epoch": 0.5576425199248692, + "grad_norm": 0.4887857437133789, + "learning_rate": 1.6282383200500873e-05, + "loss": 0.1022, "step": 10985 }, { - "epoch": 0.27896941236197487, - "grad_norm": 1.378893256187439, - "learning_rate": 1.8140203917586836e-05, - "loss": 0.1288, + "epoch": 0.5578963399157317, + "grad_norm": 0.5037972927093506, + "learning_rate": 1.6280691067228456e-05, + "loss": 0.0961, "step": 10990 }, { - "epoch": 0.2790963320218302, - "grad_norm": 0.5665779709815979, - "learning_rate": 1.8139357786521134e-05, - "loss": 0.1381, + "epoch": 0.5581501599065942, + "grad_norm": 0.45002323389053345, + "learning_rate": 1.627899893395604e-05, + "loss": 0.0973, "step": 10995 }, { - "epoch": 0.2792232516816855, - "grad_norm": 0.7179095149040222, - "learning_rate": 1.8138511655455432e-05, - "loss": 0.153, + "epoch": 0.5584039798974567, + "grad_norm": 0.6315869688987732, + "learning_rate": 1.6277306800683623e-05, + "loss": 0.0963, "step": 11000 }, { - "epoch": 0.27935017134154083, - "grad_norm": 0.5445453524589539, - "learning_rate": 1.813766552438973e-05, - "loss": 0.139, + "epoch": 0.5586577998883192, + "grad_norm": 0.3851071000099182, + "learning_rate": 1.6275614667411207e-05, + "loss": 0.0852, "step": 11005 }, { - "epoch": 0.2794770910013961, - "grad_norm": 0.5175193548202515, - "learning_rate": 1.813681939332403e-05, - "loss": 0.1302, + "epoch": 0.5589116198791817, + "grad_norm": 0.6421680450439453, + "learning_rate": 1.627392253413879e-05, + "loss": 0.1016, "step": 11010 }, { - "epoch": 0.2796040106612514, - "grad_norm": 0.5770663022994995, - "learning_rate": 1.8135973262258328e-05, - "loss": 0.1305, + "epoch": 0.5591654398700442, + "grad_norm": 0.7815170884132385, + "learning_rate": 1.6272230400866374e-05, + "loss": 0.0833, "step": 11015 }, { - "epoch": 0.27973093032110674, - "grad_norm": 0.8919943571090698, - "learning_rate": 1.8135127131192623e-05, - "loss": 0.1309, + "epoch": 0.5594192598609067, + "grad_norm": 0.6230599880218506, + "learning_rate": 1.6270538267593958e-05, + "loss": 0.0952, "step": 11020 }, { - "epoch": 0.279857849980962, - "grad_norm": 0.5534716844558716, - "learning_rate": 1.813428100012692e-05, - "loss": 0.1349, + "epoch": 0.5596730798517692, + "grad_norm": 0.6255953311920166, + "learning_rate": 1.626884613432154e-05, + "loss": 0.0935, "step": 11025 }, { - "epoch": 0.27998476964081737, - "grad_norm": 0.4583420157432556, - "learning_rate": 1.813343486906122e-05, - "loss": 0.1249, + "epoch": 0.5599268998426316, + "grad_norm": 0.3688150644302368, + "learning_rate": 1.6267154001049125e-05, + "loss": 0.1016, "step": 11030 }, { - "epoch": 0.28011168930067265, - "grad_norm": 0.7272845506668091, - "learning_rate": 1.8132588737995518e-05, - "loss": 0.1497, + "epoch": 0.560180719833494, + "grad_norm": 0.41703271865844727, + "learning_rate": 1.6265461867776708e-05, + "loss": 0.0937, "step": 11035 }, { - "epoch": 0.280238608960528, - "grad_norm": 1.1202971935272217, - "learning_rate": 1.8131742606929813e-05, - "loss": 0.1136, + "epoch": 0.5604345398243565, + "grad_norm": 0.6567605137825012, + "learning_rate": 1.6263769734504292e-05, + "loss": 0.0937, "step": 11040 }, { - "epoch": 0.2803655286203833, - "grad_norm": 0.7007968425750732, - "learning_rate": 1.813089647586411e-05, - "loss": 0.132, + "epoch": 0.560688359815219, + "grad_norm": 0.5505645275115967, + "learning_rate": 1.6262077601231875e-05, + "loss": 0.1043, "step": 11045 }, { - "epoch": 0.2804924482802386, - "grad_norm": 0.5623154044151306, - "learning_rate": 1.813005034479841e-05, - "loss": 0.115, + "epoch": 0.5609421798060815, + "grad_norm": 0.42129865288734436, + "learning_rate": 1.626038546795946e-05, + "loss": 0.1049, "step": 11050 }, { - "epoch": 0.2806193679400939, - "grad_norm": 0.5715023875236511, - "learning_rate": 1.8129204213732708e-05, - "loss": 0.1527, + "epoch": 0.561195999796944, + "grad_norm": 0.5019480586051941, + "learning_rate": 1.6258693334687042e-05, + "loss": 0.0966, "step": 11055 }, { - "epoch": 0.28074628759994924, - "grad_norm": 0.6222023367881775, - "learning_rate": 1.8128358082667006e-05, - "loss": 0.1381, + "epoch": 0.5614498197878065, + "grad_norm": 0.44651737809181213, + "learning_rate": 1.6257001201414623e-05, + "loss": 0.1014, "step": 11060 }, { - "epoch": 0.2808732072598045, - "grad_norm": 0.7128495573997498, - "learning_rate": 1.8127511951601305e-05, - "loss": 0.1262, + "epoch": 0.561703639778669, + "grad_norm": 0.4271222651004791, + "learning_rate": 1.625530906814221e-05, + "loss": 0.105, "step": 11065 }, { - "epoch": 0.28100012691965986, - "grad_norm": 0.6524057388305664, - "learning_rate": 1.8126665820535603e-05, - "loss": 0.1337, + "epoch": 0.5619574597695315, + "grad_norm": 0.354856938123703, + "learning_rate": 1.6253616934869793e-05, + "loss": 0.0862, "step": 11070 }, { - "epoch": 0.28112704657951515, - "grad_norm": 0.5519865155220032, - "learning_rate": 1.81258196894699e-05, - "loss": 0.1323, + "epoch": 0.562211279760394, + "grad_norm": 0.4536794424057007, + "learning_rate": 1.6251924801597373e-05, + "loss": 0.0963, "step": 11075 }, { - "epoch": 0.2812539662393705, - "grad_norm": 0.673793613910675, - "learning_rate": 1.8124973558404197e-05, - "loss": 0.1296, + "epoch": 0.5624650997512564, + "grad_norm": 0.4090806245803833, + "learning_rate": 1.625023266832496e-05, + "loss": 0.0874, "step": 11080 }, { - "epoch": 0.2813808858992258, - "grad_norm": 0.5444685816764832, - "learning_rate": 1.8124127427338495e-05, - "loss": 0.1218, + "epoch": 0.5627189197421189, + "grad_norm": 0.7183617353439331, + "learning_rate": 1.624854053505254e-05, + "loss": 0.1116, "step": 11085 }, { - "epoch": 0.2815078055590811, - "grad_norm": 1.082297444343567, - "learning_rate": 1.8123281296272793e-05, - "loss": 0.1725, + "epoch": 0.5629727397329813, + "grad_norm": 0.8949313759803772, + "learning_rate": 1.6246848401780127e-05, + "loss": 0.1, "step": 11090 }, { - "epoch": 0.2816347252189364, - "grad_norm": 0.5500984787940979, - "learning_rate": 1.8122435165207092e-05, - "loss": 0.1096, + "epoch": 0.5632265597238438, + "grad_norm": 0.47640174627304077, + "learning_rate": 1.624515626850771e-05, + "loss": 0.0952, "step": 11095 }, { - "epoch": 0.28176164487879174, - "grad_norm": 0.8038409948348999, - "learning_rate": 1.812158903414139e-05, - "loss": 0.1296, + "epoch": 0.5634803797147063, + "grad_norm": 0.4236536920070648, + "learning_rate": 1.624346413523529e-05, + "loss": 0.0888, "step": 11100 }, { - "epoch": 0.281888564538647, - "grad_norm": 0.7072268128395081, - "learning_rate": 1.812074290307569e-05, - "loss": 0.1218, + "epoch": 0.5637341997055688, + "grad_norm": 0.4696132242679596, + "learning_rate": 1.6241772001962878e-05, + "loss": 0.0986, "step": 11105 }, { - "epoch": 0.28201548419850236, - "grad_norm": 0.5456904768943787, - "learning_rate": 1.8119896772009987e-05, - "loss": 0.1139, + "epoch": 0.5639880196964313, + "grad_norm": 0.48761463165283203, + "learning_rate": 1.6240079868690458e-05, + "loss": 0.086, "step": 11110 }, { - "epoch": 0.28214240385835765, - "grad_norm": 0.5357866287231445, - "learning_rate": 1.8119050640944285e-05, - "loss": 0.1341, + "epoch": 0.5642418396872938, + "grad_norm": 0.42676812410354614, + "learning_rate": 1.623838773541804e-05, + "loss": 0.0887, "step": 11115 }, { - "epoch": 0.282269323518213, - "grad_norm": 0.5507974624633789, - "learning_rate": 1.811820450987858e-05, - "loss": 0.1327, + "epoch": 0.5644956596781563, + "grad_norm": 0.5835752487182617, + "learning_rate": 1.6236695602145625e-05, + "loss": 0.0797, "step": 11120 }, { - "epoch": 0.2823962431780683, - "grad_norm": 0.40798458456993103, - "learning_rate": 1.811735837881288e-05, - "loss": 0.1244, + "epoch": 0.5647494796690188, + "grad_norm": 0.39226260781288147, + "learning_rate": 1.623500346887321e-05, + "loss": 0.1086, "step": 11125 }, { - "epoch": 0.2825231628379236, - "grad_norm": 0.9678627252578735, - "learning_rate": 1.8116512247747177e-05, - "loss": 0.1369, + "epoch": 0.5650032996598812, + "grad_norm": 0.4616701006889343, + "learning_rate": 1.6233311335600796e-05, + "loss": 0.0845, "step": 11130 }, { - "epoch": 0.2826500824977789, - "grad_norm": 0.8094459772109985, - "learning_rate": 1.8115666116681476e-05, - "loss": 0.1389, + "epoch": 0.5652571196507437, + "grad_norm": 0.6852196455001831, + "learning_rate": 1.6231619202328376e-05, + "loss": 0.0953, "step": 11135 }, { - "epoch": 0.28277700215763424, - "grad_norm": 0.8788360953330994, - "learning_rate": 1.8114819985615774e-05, - "loss": 0.1413, + "epoch": 0.5655109396416061, + "grad_norm": 0.5007418990135193, + "learning_rate": 1.622992706905596e-05, + "loss": 0.1014, "step": 11140 }, { - "epoch": 0.2829039218174895, - "grad_norm": 0.5808648467063904, - "learning_rate": 1.8113973854550072e-05, - "loss": 0.1566, + "epoch": 0.5657647596324686, + "grad_norm": 0.43455320596694946, + "learning_rate": 1.6228234935783543e-05, + "loss": 0.0925, "step": 11145 }, { - "epoch": 0.28303084147734486, - "grad_norm": 0.5569543242454529, - "learning_rate": 1.811312772348437e-05, - "loss": 0.1602, + "epoch": 0.5660185796233311, + "grad_norm": 0.41454216837882996, + "learning_rate": 1.6226542802511126e-05, + "loss": 0.0963, "step": 11150 }, { - "epoch": 0.28315776113720015, - "grad_norm": 1.1065337657928467, - "learning_rate": 1.811228159241867e-05, - "loss": 0.1393, + "epoch": 0.5662723996141936, + "grad_norm": 0.5282880663871765, + "learning_rate": 1.622485066923871e-05, + "loss": 0.0926, "step": 11155 }, { - "epoch": 0.2832846807970555, - "grad_norm": 0.6778880953788757, - "learning_rate": 1.8111435461352964e-05, - "loss": 0.137, + "epoch": 0.5665262196050561, + "grad_norm": 0.33719152212142944, + "learning_rate": 1.6223158535966293e-05, + "loss": 0.0983, "step": 11160 }, { - "epoch": 0.2834116004569108, - "grad_norm": 0.7852742075920105, - "learning_rate": 1.8110589330287263e-05, - "loss": 0.1216, + "epoch": 0.5667800395959186, + "grad_norm": 0.4163464605808258, + "learning_rate": 1.6221466402693877e-05, + "loss": 0.0993, "step": 11165 }, { - "epoch": 0.2835385201167661, - "grad_norm": 0.7558225989341736, - "learning_rate": 1.810974319922156e-05, - "loss": 0.1176, + "epoch": 0.5670338595867811, + "grad_norm": 0.40644052624702454, + "learning_rate": 1.621977426942146e-05, + "loss": 0.0798, "step": 11170 }, { - "epoch": 0.2836654397766214, - "grad_norm": 0.7527660131454468, - "learning_rate": 1.810889706815586e-05, - "loss": 0.1609, + "epoch": 0.5672876795776436, + "grad_norm": 0.42944255471229553, + "learning_rate": 1.6218082136149044e-05, + "loss": 0.1043, "step": 11175 }, { - "epoch": 0.28379235943647674, - "grad_norm": 0.6236631870269775, - "learning_rate": 1.8108050937090154e-05, - "loss": 0.136, + "epoch": 0.567541499568506, + "grad_norm": 0.3915778398513794, + "learning_rate": 1.6216390002876628e-05, + "loss": 0.0828, "step": 11180 }, { - "epoch": 0.283919279096332, - "grad_norm": 0.859935998916626, - "learning_rate": 1.8107204806024453e-05, - "loss": 0.1275, + "epoch": 0.5677953195593685, + "grad_norm": 0.5200676321983337, + "learning_rate": 1.621469786960421e-05, + "loss": 0.0916, "step": 11185 }, { - "epoch": 0.2840461987561873, - "grad_norm": 0.6942567825317383, - "learning_rate": 1.810635867495875e-05, - "loss": 0.1298, + "epoch": 0.568049139550231, + "grad_norm": 0.5898273587226868, + "learning_rate": 1.6213005736331795e-05, + "loss": 0.1157, "step": 11190 }, { - "epoch": 0.28417311841604265, - "grad_norm": 0.8881015777587891, - "learning_rate": 1.810551254389305e-05, - "loss": 0.1311, + "epoch": 0.5683029595410934, + "grad_norm": 0.4564743638038635, + "learning_rate": 1.6211313603059378e-05, + "loss": 0.1059, "step": 11195 }, { - "epoch": 0.28430003807589793, - "grad_norm": 0.7394446730613708, - "learning_rate": 1.8104666412827348e-05, - "loss": 0.1321, + "epoch": 0.5685567795319559, + "grad_norm": 0.42315754294395447, + "learning_rate": 1.6209621469786962e-05, + "loss": 0.1021, "step": 11200 }, { - "epoch": 0.28442695773575327, - "grad_norm": 0.5201545357704163, - "learning_rate": 1.8103820281761646e-05, - "loss": 0.1322, + "epoch": 0.5688105995228184, + "grad_norm": 0.4259279668331146, + "learning_rate": 1.6207929336514545e-05, + "loss": 0.0893, "step": 11205 }, { - "epoch": 0.28455387739560856, - "grad_norm": 0.9029053449630737, - "learning_rate": 1.8102974150695945e-05, - "loss": 0.1069, + "epoch": 0.5690644195136809, + "grad_norm": 0.4402656555175781, + "learning_rate": 1.620623720324213e-05, + "loss": 0.085, "step": 11210 }, { - "epoch": 0.2846807970554639, - "grad_norm": 1.014359951019287, - "learning_rate": 1.8102128019630243e-05, - "loss": 0.1251, + "epoch": 0.5693182395045434, + "grad_norm": 0.3866107165813446, + "learning_rate": 1.6204545069969712e-05, + "loss": 0.09, "step": 11215 }, { - "epoch": 0.2848077167153192, - "grad_norm": 0.5858657956123352, - "learning_rate": 1.810128188856454e-05, - "loss": 0.1218, + "epoch": 0.5695720594954059, + "grad_norm": 0.4657836854457855, + "learning_rate": 1.6202852936697296e-05, + "loss": 0.0994, "step": 11220 }, { - "epoch": 0.2849346363751745, - "grad_norm": 0.5967556834220886, - "learning_rate": 1.8100435757498837e-05, - "loss": 0.1285, + "epoch": 0.5698258794862683, + "grad_norm": 0.40453943610191345, + "learning_rate": 1.620116080342488e-05, + "loss": 0.0987, "step": 11225 }, { - "epoch": 0.2850615560350298, - "grad_norm": 1.4924256801605225, - "learning_rate": 1.8099589626433135e-05, - "loss": 0.1113, + "epoch": 0.5700796994771308, + "grad_norm": 0.401993989944458, + "learning_rate": 1.6199468670152463e-05, + "loss": 0.0847, "step": 11230 }, { - "epoch": 0.28518847569488515, - "grad_norm": 0.7331765294075012, - "learning_rate": 1.8098743495367434e-05, - "loss": 0.1235, + "epoch": 0.5703335194679933, + "grad_norm": 0.5438871383666992, + "learning_rate": 1.6197776536880047e-05, + "loss": 0.0964, "step": 11235 }, { - "epoch": 0.28531539535474043, - "grad_norm": 1.5182048082351685, - "learning_rate": 1.8097897364301732e-05, - "loss": 0.1276, + "epoch": 0.5705873394588558, + "grad_norm": 0.5712496638298035, + "learning_rate": 1.619608440360763e-05, + "loss": 0.0966, "step": 11240 }, { - "epoch": 0.28544231501459577, - "grad_norm": 0.7885397672653198, - "learning_rate": 1.809705123323603e-05, - "loss": 0.1217, + "epoch": 0.5708411594497182, + "grad_norm": 0.3064565658569336, + "learning_rate": 1.6194392270335214e-05, + "loss": 0.0909, "step": 11245 }, { - "epoch": 0.28556923467445106, - "grad_norm": 0.45543771982192993, - "learning_rate": 1.809620510217033e-05, - "loss": 0.1204, + "epoch": 0.5710949794405807, + "grad_norm": 1.4858416318893433, + "learning_rate": 1.6192700137062797e-05, + "loss": 0.1, "step": 11250 }, { - "epoch": 0.2856961543343064, - "grad_norm": 0.8032432794570923, - "learning_rate": 1.8095358971104627e-05, - "loss": 0.1205, + "epoch": 0.5713487994314432, + "grad_norm": 0.36369451880455017, + "learning_rate": 1.619100800379038e-05, + "loss": 0.0975, "step": 11255 }, { - "epoch": 0.2858230739941617, - "grad_norm": 1.0962108373641968, - "learning_rate": 1.8094512840038922e-05, - "loss": 0.1093, + "epoch": 0.5716026194223057, + "grad_norm": 0.48280322551727295, + "learning_rate": 1.6189315870517964e-05, + "loss": 0.0852, "step": 11260 }, { - "epoch": 0.285949993654017, - "grad_norm": 0.3912820816040039, - "learning_rate": 1.809366670897322e-05, - "loss": 0.1292, + "epoch": 0.5718564394131682, + "grad_norm": 0.708537220954895, + "learning_rate": 1.6187623737245545e-05, + "loss": 0.1065, "step": 11265 }, { - "epoch": 0.2860769133138723, - "grad_norm": 0.8758546113967896, - "learning_rate": 1.809282057790752e-05, - "loss": 0.1426, + "epoch": 0.5721102594040307, + "grad_norm": 0.5387185215950012, + "learning_rate": 1.618593160397313e-05, + "loss": 0.0971, "step": 11270 }, { - "epoch": 0.28620383297372765, - "grad_norm": 0.6096153259277344, - "learning_rate": 1.8091974446841817e-05, - "loss": 0.1446, + "epoch": 0.5723640793948931, + "grad_norm": 0.5026307106018066, + "learning_rate": 1.6184239470700715e-05, + "loss": 0.0887, "step": 11275 }, { - "epoch": 0.28633075263358293, - "grad_norm": 0.5418789982795715, - "learning_rate": 1.8091128315776116e-05, - "loss": 0.1363, + "epoch": 0.5726178993857556, + "grad_norm": 0.40268126130104065, + "learning_rate": 1.61825473374283e-05, + "loss": 0.0892, "step": 11280 }, { - "epoch": 0.28645767229343827, - "grad_norm": 1.6113145351409912, - "learning_rate": 1.8090282184710414e-05, - "loss": 0.1294, + "epoch": 0.5728717193766181, + "grad_norm": 0.5645183324813843, + "learning_rate": 1.6180855204155882e-05, + "loss": 0.0984, "step": 11285 }, { - "epoch": 0.28658459195329355, - "grad_norm": 0.722954273223877, - "learning_rate": 1.8089436053644713e-05, - "loss": 0.1, + "epoch": 0.5731255393674806, + "grad_norm": 0.35205450654029846, + "learning_rate": 1.6179163070883462e-05, + "loss": 0.1013, "step": 11290 }, { - "epoch": 0.2867115116131489, - "grad_norm": 0.7617412209510803, - "learning_rate": 1.808858992257901e-05, - "loss": 0.1392, + "epoch": 0.573379359358343, + "grad_norm": 0.42374926805496216, + "learning_rate": 1.617747093761105e-05, + "loss": 0.0954, "step": 11295 }, { - "epoch": 0.2868384312730042, - "grad_norm": 0.5482261180877686, - "learning_rate": 1.8087743791513306e-05, - "loss": 0.1309, + "epoch": 0.5736331793492055, + "grad_norm": 0.438722163438797, + "learning_rate": 1.617577880433863e-05, + "loss": 0.0961, "step": 11300 }, { - "epoch": 0.2869653509328595, - "grad_norm": 1.1423382759094238, - "learning_rate": 1.8086897660447604e-05, - "loss": 0.1354, + "epoch": 0.573886999340068, + "grad_norm": 0.6137442588806152, + "learning_rate": 1.6174086671066213e-05, + "loss": 0.0918, "step": 11305 }, { - "epoch": 0.2870922705927148, - "grad_norm": 1.0530189275741577, - "learning_rate": 1.8086051529381903e-05, - "loss": 0.1581, + "epoch": 0.5741408193309305, + "grad_norm": 0.6508979201316833, + "learning_rate": 1.61723945377938e-05, + "loss": 0.0934, "step": 11310 }, { - "epoch": 0.28721919025257014, - "grad_norm": 0.9346684813499451, - "learning_rate": 1.80852053983162e-05, - "loss": 0.135, + "epoch": 0.574394639321793, + "grad_norm": 0.6937998533248901, + "learning_rate": 1.617070240452138e-05, + "loss": 0.104, "step": 11315 }, { - "epoch": 0.28734610991242543, - "grad_norm": 0.6507686376571655, - "learning_rate": 1.8084359267250496e-05, - "loss": 0.1303, + "epoch": 0.5746484593126555, + "grad_norm": 0.44844573736190796, + "learning_rate": 1.6169010271248964e-05, + "loss": 0.1024, "step": 11320 }, { - "epoch": 0.28747302957228077, - "grad_norm": 0.5052119493484497, - "learning_rate": 1.8083513136184795e-05, - "loss": 0.1224, + "epoch": 0.5749022793035179, + "grad_norm": 0.5491892695426941, + "learning_rate": 1.6167318137976547e-05, + "loss": 0.1098, "step": 11325 }, { - "epoch": 0.28759994923213605, - "grad_norm": 0.5499473214149475, - "learning_rate": 1.8082667005119093e-05, - "loss": 0.1179, + "epoch": 0.5751560992943804, + "grad_norm": 0.43779659271240234, + "learning_rate": 1.616562600470413e-05, + "loss": 0.0993, "step": 11330 }, { - "epoch": 0.2877268688919914, - "grad_norm": 0.46656566858291626, - "learning_rate": 1.808182087405339e-05, - "loss": 0.1192, + "epoch": 0.5754099192852429, + "grad_norm": 0.3295450508594513, + "learning_rate": 1.6163933871431718e-05, + "loss": 0.0859, "step": 11335 }, { - "epoch": 0.2878537885518467, - "grad_norm": 0.6849450469017029, - "learning_rate": 1.808097474298769e-05, - "loss": 0.1363, + "epoch": 0.5756637392761054, + "grad_norm": 0.43296727538108826, + "learning_rate": 1.6162241738159298e-05, + "loss": 0.0926, "step": 11340 }, { - "epoch": 0.287980708211702, - "grad_norm": 0.7887417674064636, - "learning_rate": 1.8080128611921988e-05, - "loss": 0.1607, + "epoch": 0.5759175592669679, + "grad_norm": 0.5013937950134277, + "learning_rate": 1.616054960488688e-05, + "loss": 0.0917, "step": 11345 }, { - "epoch": 0.2881076278715573, - "grad_norm": 0.7072952389717102, - "learning_rate": 1.8079282480856287e-05, - "loss": 0.1461, + "epoch": 0.5761713792578304, + "grad_norm": 0.4258970022201538, + "learning_rate": 1.6158857471614465e-05, + "loss": 0.1014, "step": 11350 }, { - "epoch": 0.2882345475314126, - "grad_norm": 0.6509044170379639, - "learning_rate": 1.8078436349790585e-05, - "loss": 0.1238, + "epoch": 0.5764251992486928, + "grad_norm": 0.4371802806854248, + "learning_rate": 1.615716533834205e-05, + "loss": 0.0894, "step": 11355 }, { - "epoch": 0.28836146719126793, - "grad_norm": 0.6719843149185181, - "learning_rate": 1.8077590218724883e-05, - "loss": 0.1262, + "epoch": 0.5766790192395553, + "grad_norm": 0.34599819779396057, + "learning_rate": 1.6155473205069632e-05, + "loss": 0.0877, "step": 11360 }, { - "epoch": 0.2884883868511232, - "grad_norm": 0.7463476061820984, - "learning_rate": 1.807674408765918e-05, - "loss": 0.1257, + "epoch": 0.5769328392304178, + "grad_norm": 0.3687082827091217, + "learning_rate": 1.6153781071797215e-05, + "loss": 0.0881, "step": 11365 }, { - "epoch": 0.28861530651097855, - "grad_norm": 0.46227023005485535, - "learning_rate": 1.8075897956593477e-05, - "loss": 0.1188, + "epoch": 0.5771866592212803, + "grad_norm": 0.45477935671806335, + "learning_rate": 1.61520889385248e-05, + "loss": 0.0896, "step": 11370 }, { - "epoch": 0.28874222617083384, - "grad_norm": 0.8195514678955078, - "learning_rate": 1.8075051825527775e-05, - "loss": 0.1279, + "epoch": 0.5774404792121427, + "grad_norm": 0.4007332921028137, + "learning_rate": 1.6150396805252383e-05, + "loss": 0.0888, "step": 11375 }, { - "epoch": 0.2888691458306892, - "grad_norm": 0.635254442691803, - "learning_rate": 1.8074205694462074e-05, - "loss": 0.1282, + "epoch": 0.5776942992030052, + "grad_norm": 0.44187676906585693, + "learning_rate": 1.6148704671979966e-05, + "loss": 0.0953, "step": 11380 }, { - "epoch": 0.28899606549054446, - "grad_norm": 0.4992779493331909, - "learning_rate": 1.8073359563396372e-05, - "loss": 0.1205, + "epoch": 0.5779481191938677, + "grad_norm": 0.3938285708427429, + "learning_rate": 1.614701253870755e-05, + "loss": 0.0935, "step": 11385 }, { - "epoch": 0.2891229851503998, - "grad_norm": 1.2572362422943115, - "learning_rate": 1.807251343233067e-05, - "loss": 0.1244, + "epoch": 0.5782019391847302, + "grad_norm": 0.4192904531955719, + "learning_rate": 1.6145320405435133e-05, + "loss": 0.0982, "step": 11390 }, { - "epoch": 0.2892499048102551, - "grad_norm": 0.7118340730667114, - "learning_rate": 1.807166730126497e-05, - "loss": 0.1298, + "epoch": 0.5784557591755927, + "grad_norm": 0.3519038259983063, + "learning_rate": 1.6143628272162717e-05, + "loss": 0.095, "step": 11395 }, { - "epoch": 0.2893768244701104, - "grad_norm": 0.9458000659942627, - "learning_rate": 1.8070821170199267e-05, - "loss": 0.1708, + "epoch": 0.5787095791664552, + "grad_norm": 0.5053529143333435, + "learning_rate": 1.61419361388903e-05, + "loss": 0.1037, "step": 11400 }, { - "epoch": 0.2895037441299657, - "grad_norm": 0.5459735989570618, - "learning_rate": 1.8069975039133562e-05, - "loss": 0.1109, + "epoch": 0.5789633991573176, + "grad_norm": 0.5747759938240051, + "learning_rate": 1.6140244005617884e-05, + "loss": 0.0861, "step": 11405 }, { - "epoch": 0.28963066378982105, - "grad_norm": 0.5949194431304932, - "learning_rate": 1.806912890806786e-05, - "loss": 0.1231, + "epoch": 0.5792172191481801, + "grad_norm": 0.5155202150344849, + "learning_rate": 1.6138551872345467e-05, + "loss": 0.0859, "step": 11410 }, { - "epoch": 0.28975758344967634, - "grad_norm": 0.807267427444458, - "learning_rate": 1.806828277700216e-05, - "loss": 0.1571, + "epoch": 0.5794710391390426, + "grad_norm": 0.799940288066864, + "learning_rate": 1.613685973907305e-05, + "loss": 0.0922, "step": 11415 }, { - "epoch": 0.2898845031095317, - "grad_norm": 0.6182039380073547, - "learning_rate": 1.8067436645936457e-05, - "loss": 0.1391, + "epoch": 0.5797248591299051, + "grad_norm": 0.29105183482170105, + "learning_rate": 1.6135167605800634e-05, + "loss": 0.0794, "step": 11420 }, { - "epoch": 0.29001142276938696, - "grad_norm": 0.3898719847202301, - "learning_rate": 1.8066590514870756e-05, - "loss": 0.1304, + "epoch": 0.5799786791207675, + "grad_norm": 0.43824175000190735, + "learning_rate": 1.6133475472528218e-05, + "loss": 0.1006, "step": 11425 }, { - "epoch": 0.2901383424292423, - "grad_norm": 0.8015116453170776, - "learning_rate": 1.8065744383805054e-05, - "loss": 0.1237, + "epoch": 0.58023249911163, + "grad_norm": 0.8796586990356445, + "learning_rate": 1.61317833392558e-05, + "loss": 0.0999, "step": 11430 }, { - "epoch": 0.2902652620890976, - "grad_norm": 0.9739649295806885, - "learning_rate": 1.8064898252739353e-05, - "loss": 0.1481, + "epoch": 0.5804863191024925, + "grad_norm": 0.4385398328304291, + "learning_rate": 1.6130091205983385e-05, + "loss": 0.0962, "step": 11435 }, { - "epoch": 0.2903921817489529, - "grad_norm": 0.7331280708312988, - "learning_rate": 1.806405212167365e-05, - "loss": 0.1174, + "epoch": 0.580740139093355, + "grad_norm": 0.4407016336917877, + "learning_rate": 1.612839907271097e-05, + "loss": 0.0962, "step": 11440 }, { - "epoch": 0.2905191014088082, - "grad_norm": 0.576487123966217, - "learning_rate": 1.8063205990607946e-05, - "loss": 0.1192, + "epoch": 0.5809939590842175, + "grad_norm": 0.37328964471817017, + "learning_rate": 1.6126706939438552e-05, + "loss": 0.0866, "step": 11445 }, { - "epoch": 0.29064602106866355, - "grad_norm": 0.6380681395530701, - "learning_rate": 1.8062359859542244e-05, - "loss": 0.1284, + "epoch": 0.58124777907508, + "grad_norm": 0.42281779646873474, + "learning_rate": 1.6125014806166136e-05, + "loss": 0.1012, "step": 11450 }, { - "epoch": 0.29077294072851884, - "grad_norm": 0.7383301854133606, - "learning_rate": 1.8061513728476543e-05, - "loss": 0.1246, + "epoch": 0.5815015990659425, + "grad_norm": 0.459942102432251, + "learning_rate": 1.612332267289372e-05, + "loss": 0.1061, "step": 11455 }, { - "epoch": 0.2908998603883742, - "grad_norm": 0.5548836588859558, - "learning_rate": 1.806066759741084e-05, - "loss": 0.127, + "epoch": 0.581755419056805, + "grad_norm": 0.6190848350524902, + "learning_rate": 1.6121630539621303e-05, + "loss": 0.1014, "step": 11460 }, { - "epoch": 0.29102678004822946, - "grad_norm": 0.8128247261047363, - "learning_rate": 1.8059821466345136e-05, - "loss": 0.1292, + "epoch": 0.5820092390476674, + "grad_norm": 0.6544129252433777, + "learning_rate": 1.6119938406348886e-05, + "loss": 0.0861, "step": 11465 }, { - "epoch": 0.2911536997080848, - "grad_norm": 0.6800500154495239, - "learning_rate": 1.8058975335279435e-05, - "loss": 0.1124, + "epoch": 0.5822630590385299, + "grad_norm": 0.32813870906829834, + "learning_rate": 1.6118246273076466e-05, + "loss": 0.0926, "step": 11470 }, { - "epoch": 0.2912806193679401, - "grad_norm": 0.6074063777923584, - "learning_rate": 1.8058129204213733e-05, - "loss": 0.1495, + "epoch": 0.5825168790293923, + "grad_norm": 0.4258780777454376, + "learning_rate": 1.6116554139804053e-05, + "loss": 0.0933, "step": 11475 }, { - "epoch": 0.2914075390277954, - "grad_norm": 1.125908374786377, - "learning_rate": 1.805728307314803e-05, - "loss": 0.1319, + "epoch": 0.5827706990202548, + "grad_norm": 0.6019533276557922, + "learning_rate": 1.6114862006531634e-05, + "loss": 0.0953, "step": 11480 }, { - "epoch": 0.2915344586876507, - "grad_norm": 0.7935991287231445, - "learning_rate": 1.805643694208233e-05, - "loss": 0.1143, + "epoch": 0.5830245190111173, + "grad_norm": 0.4072204828262329, + "learning_rate": 1.611316987325922e-05, + "loss": 0.0951, "step": 11485 }, { - "epoch": 0.29166137834750605, - "grad_norm": 0.611835777759552, - "learning_rate": 1.805559081101663e-05, - "loss": 0.1213, + "epoch": 0.5832783390019798, + "grad_norm": 0.3429082930088043, + "learning_rate": 1.6111477739986804e-05, + "loss": 0.0879, "step": 11490 }, { - "epoch": 0.29178829800736134, - "grad_norm": 0.4945152997970581, - "learning_rate": 1.8054744679950927e-05, - "loss": 0.1147, + "epoch": 0.5835321589928423, + "grad_norm": 0.4086165428161621, + "learning_rate": 1.6109785606714384e-05, + "loss": 0.0996, "step": 11495 }, { - "epoch": 0.2919152176672167, - "grad_norm": 0.5169728398323059, - "learning_rate": 1.8053898548885225e-05, - "loss": 0.1271, + "epoch": 0.5837859789837048, + "grad_norm": 0.3771967589855194, + "learning_rate": 1.610809347344197e-05, + "loss": 0.0959, "step": 11500 }, { - "epoch": 0.29204213732707196, - "grad_norm": 2.5562379360198975, - "learning_rate": 1.805305241781952e-05, - "loss": 0.1251, + "epoch": 0.5840397989745673, + "grad_norm": 0.4078262746334076, + "learning_rate": 1.610640134016955e-05, + "loss": 0.0953, "step": 11505 }, { - "epoch": 0.2921690569869273, - "grad_norm": 0.64995276927948, - "learning_rate": 1.805220628675382e-05, - "loss": 0.1435, + "epoch": 0.5842936189654298, + "grad_norm": 1.961870789527893, + "learning_rate": 1.6104709206897135e-05, + "loss": 0.0964, "step": 11510 }, { - "epoch": 0.2922959766467826, - "grad_norm": 0.804287850856781, - "learning_rate": 1.8051360155688117e-05, - "loss": 0.1645, + "epoch": 0.5845474389562922, + "grad_norm": 0.4825584888458252, + "learning_rate": 1.6103017073624722e-05, + "loss": 0.0995, "step": 11515 }, { - "epoch": 0.2924228963066379, - "grad_norm": 0.6376997232437134, - "learning_rate": 1.8050514024622415e-05, - "loss": 0.1362, + "epoch": 0.5848012589471546, + "grad_norm": 0.46092280745506287, + "learning_rate": 1.6101324940352302e-05, + "loss": 0.085, "step": 11520 }, { - "epoch": 0.2925498159664932, - "grad_norm": 0.5797489881515503, - "learning_rate": 1.8049667893556714e-05, - "loss": 0.1335, + "epoch": 0.5850550789380171, + "grad_norm": 0.5379409193992615, + "learning_rate": 1.609963280707989e-05, + "loss": 0.1011, "step": 11525 }, { - "epoch": 0.2926767356263485, - "grad_norm": 0.5686189532279968, - "learning_rate": 1.8048821762491012e-05, - "loss": 0.1478, + "epoch": 0.5853088989288796, + "grad_norm": 0.9948629140853882, + "learning_rate": 1.609794067380747e-05, + "loss": 0.0914, "step": 11530 }, { - "epoch": 0.29280365528620383, - "grad_norm": 0.5861384868621826, - "learning_rate": 1.804797563142531e-05, - "loss": 0.1058, + "epoch": 0.5855627189197421, + "grad_norm": 0.39759644865989685, + "learning_rate": 1.6096248540535053e-05, + "loss": 0.0939, "step": 11535 }, { - "epoch": 0.2929305749460591, - "grad_norm": 1.0974847078323364, - "learning_rate": 1.804712950035961e-05, - "loss": 0.135, + "epoch": 0.5858165389106046, + "grad_norm": 0.4798009693622589, + "learning_rate": 1.609455640726264e-05, + "loss": 0.0927, "step": 11540 }, { - "epoch": 0.29305749460591446, - "grad_norm": 0.7453882694244385, - "learning_rate": 1.8046283369293904e-05, - "loss": 0.1019, + "epoch": 0.5860703589014671, + "grad_norm": 0.3862641453742981, + "learning_rate": 1.609286427399022e-05, + "loss": 0.0899, "step": 11545 }, { - "epoch": 0.29318441426576974, - "grad_norm": 0.5430159568786621, - "learning_rate": 1.8045437238228202e-05, - "loss": 0.1007, + "epoch": 0.5863241788923296, + "grad_norm": 0.38452666997909546, + "learning_rate": 1.6091172140717803e-05, + "loss": 0.0959, "step": 11550 }, { - "epoch": 0.2933113339256251, - "grad_norm": 0.9931712746620178, - "learning_rate": 1.80445911071625e-05, - "loss": 0.2502, + "epoch": 0.5865779988831921, + "grad_norm": 0.3630549907684326, + "learning_rate": 1.6089480007445387e-05, + "loss": 0.0995, "step": 11555 }, { - "epoch": 0.29343825358548037, - "grad_norm": 1.350375771522522, - "learning_rate": 1.80437449760968e-05, - "loss": 0.1326, + "epoch": 0.5868318188740546, + "grad_norm": 0.8909419178962708, + "learning_rate": 1.608778787417297e-05, + "loss": 0.1095, "step": 11560 }, { - "epoch": 0.2935651732453357, - "grad_norm": 0.5595411658287048, - "learning_rate": 1.8042898845031098e-05, - "loss": 0.1285, + "epoch": 0.587085638864917, + "grad_norm": 0.3947525918483734, + "learning_rate": 1.6086095740900554e-05, + "loss": 0.099, "step": 11565 }, { - "epoch": 0.293692092905191, - "grad_norm": 0.9209237694740295, - "learning_rate": 1.8042052713965396e-05, - "loss": 0.1372, + "epoch": 0.5873394588557794, + "grad_norm": 0.3510633409023285, + "learning_rate": 1.6084403607628137e-05, + "loss": 0.0885, "step": 11570 }, { - "epoch": 0.29381901256504633, - "grad_norm": 0.476377010345459, - "learning_rate": 1.8041206582899694e-05, - "loss": 0.1433, + "epoch": 0.5875932788466419, + "grad_norm": 0.49123838543891907, + "learning_rate": 1.608271147435572e-05, + "loss": 0.0867, "step": 11575 }, { - "epoch": 0.2939459322249016, - "grad_norm": 0.8645185232162476, - "learning_rate": 1.8040360451833993e-05, - "loss": 0.1414, + "epoch": 0.5878470988375044, + "grad_norm": 0.3366958200931549, + "learning_rate": 1.6081019341083304e-05, + "loss": 0.0974, "step": 11580 }, { - "epoch": 0.29407285188475696, - "grad_norm": 0.5758728384971619, - "learning_rate": 1.8039514320768288e-05, - "loss": 0.1066, + "epoch": 0.5881009188283669, + "grad_norm": 0.7465676665306091, + "learning_rate": 1.6079327207810888e-05, + "loss": 0.1032, "step": 11585 }, { - "epoch": 0.29419977154461224, - "grad_norm": 0.860349178314209, - "learning_rate": 1.8038668189702586e-05, - "loss": 0.1232, + "epoch": 0.5883547388192294, + "grad_norm": 0.38815176486968994, + "learning_rate": 1.607763507453847e-05, + "loss": 0.1028, "step": 11590 }, { - "epoch": 0.2943266912044676, - "grad_norm": 0.712425708770752, - "learning_rate": 1.8037822058636885e-05, - "loss": 0.1473, + "epoch": 0.5886085588100919, + "grad_norm": 0.42240670323371887, + "learning_rate": 1.6075942941266055e-05, + "loss": 0.086, "step": 11595 }, { - "epoch": 0.29445361086432287, - "grad_norm": 0.6702398657798767, - "learning_rate": 1.8036975927571183e-05, - "loss": 0.1275, + "epoch": 0.5888623788009544, + "grad_norm": 0.35436907410621643, + "learning_rate": 1.607425080799364e-05, + "loss": 0.095, "step": 11600 }, { - "epoch": 0.2945805305241782, - "grad_norm": 0.46198904514312744, - "learning_rate": 1.8036129796505478e-05, - "loss": 0.1396, + "epoch": 0.5891161987918169, + "grad_norm": 0.3987736999988556, + "learning_rate": 1.6072558674721222e-05, + "loss": 0.0994, "step": 11605 }, { - "epoch": 0.2947074501840335, - "grad_norm": 0.7908966541290283, - "learning_rate": 1.8035283665439776e-05, - "loss": 0.1414, + "epoch": 0.5893700187826794, + "grad_norm": 0.5091561079025269, + "learning_rate": 1.6070866541448806e-05, + "loss": 0.0942, "step": 11610 }, { - "epoch": 0.29483436984388883, - "grad_norm": 0.5825008749961853, - "learning_rate": 1.8034437534374075e-05, - "loss": 0.1066, + "epoch": 0.5896238387735419, + "grad_norm": 0.3973734974861145, + "learning_rate": 1.606917440817639e-05, + "loss": 0.0852, "step": 11615 }, { - "epoch": 0.2949612895037441, - "grad_norm": 0.8061119914054871, - "learning_rate": 1.8033591403308373e-05, - "loss": 0.113, + "epoch": 0.5898776587644042, + "grad_norm": 0.38369306921958923, + "learning_rate": 1.6067482274903973e-05, + "loss": 0.0904, "step": 11620 }, { - "epoch": 0.29508820916359946, - "grad_norm": 0.5902580618858337, - "learning_rate": 1.803274527224267e-05, - "loss": 0.1194, + "epoch": 0.5901314787552667, + "grad_norm": 0.41540855169296265, + "learning_rate": 1.6065790141631556e-05, + "loss": 0.083, "step": 11625 }, { - "epoch": 0.29521512882345474, - "grad_norm": 0.47920557856559753, - "learning_rate": 1.803189914117697e-05, - "loss": 0.1163, + "epoch": 0.5903852987461292, + "grad_norm": 0.468839168548584, + "learning_rate": 1.606409800835914e-05, + "loss": 0.1061, "step": 11630 }, { - "epoch": 0.2953420484833101, - "grad_norm": 0.7212051153182983, - "learning_rate": 1.803105301011127e-05, - "loss": 0.1372, + "epoch": 0.5906391187369917, + "grad_norm": 0.5892307162284851, + "learning_rate": 1.6062405875086723e-05, + "loss": 0.0913, "step": 11635 }, { - "epoch": 0.29546896814316537, - "grad_norm": 0.5490466356277466, - "learning_rate": 1.8030206879045567e-05, - "loss": 0.1102, + "epoch": 0.5908929387278542, + "grad_norm": 0.3928740322589874, + "learning_rate": 1.6060713741814307e-05, + "loss": 0.0921, "step": 11640 }, { - "epoch": 0.2955958878030207, - "grad_norm": 0.5807163715362549, - "learning_rate": 1.8029360747979862e-05, - "loss": 0.144, + "epoch": 0.5911467587187167, + "grad_norm": 0.36110663414001465, + "learning_rate": 1.605902160854189e-05, + "loss": 0.0927, "step": 11645 }, { - "epoch": 0.295722807462876, - "grad_norm": 0.8678895235061646, - "learning_rate": 1.802851461691416e-05, - "loss": 0.1215, + "epoch": 0.5914005787095792, + "grad_norm": 0.699163019657135, + "learning_rate": 1.6057329475269474e-05, + "loss": 0.0997, "step": 11650 }, { - "epoch": 0.29584972712273133, - "grad_norm": 0.44988247752189636, - "learning_rate": 1.802766848584846e-05, - "loss": 0.1184, + "epoch": 0.5916543987004417, + "grad_norm": 0.43201932311058044, + "learning_rate": 1.6055637341997058e-05, + "loss": 0.0863, "step": 11655 }, { - "epoch": 0.2959766467825866, - "grad_norm": 0.9166194200515747, - "learning_rate": 1.8026822354782757e-05, - "loss": 0.1024, + "epoch": 0.5919082186913042, + "grad_norm": 1.3616560697555542, + "learning_rate": 1.6053945208724638e-05, + "loss": 0.0943, "step": 11660 }, { - "epoch": 0.29610356644244196, - "grad_norm": 0.5664634704589844, - "learning_rate": 1.8025976223717055e-05, - "loss": 0.1228, + "epoch": 0.5921620386821667, + "grad_norm": 0.44065532088279724, + "learning_rate": 1.6052253075452225e-05, + "loss": 0.1008, "step": 11665 }, { - "epoch": 0.29623048610229724, - "grad_norm": 0.49146413803100586, - "learning_rate": 1.8025130092651354e-05, - "loss": 0.126, + "epoch": 0.592415858673029, + "grad_norm": 0.48787134885787964, + "learning_rate": 1.6050560942179808e-05, + "loss": 0.0988, "step": 11670 }, { - "epoch": 0.2963574057621526, - "grad_norm": 0.6591022610664368, - "learning_rate": 1.8024283961585652e-05, - "loss": 0.1329, + "epoch": 0.5926696786638915, + "grad_norm": 0.502856969833374, + "learning_rate": 1.6048868808907392e-05, + "loss": 0.093, "step": 11675 }, { - "epoch": 0.29648432542200787, - "grad_norm": 0.4846183955669403, - "learning_rate": 1.802343783051995e-05, - "loss": 0.1078, + "epoch": 0.592923498654754, + "grad_norm": 0.46943750977516174, + "learning_rate": 1.6047176675634975e-05, + "loss": 0.0912, "step": 11680 }, { - "epoch": 0.2966112450818632, - "grad_norm": 0.463806688785553, - "learning_rate": 1.8022591699454246e-05, - "loss": 0.1259, + "epoch": 0.5931773186456165, + "grad_norm": 0.37175118923187256, + "learning_rate": 1.6045484542362555e-05, + "loss": 0.0861, "step": 11685 }, { - "epoch": 0.2967381647417185, - "grad_norm": 0.6903710961341858, - "learning_rate": 1.8021745568388544e-05, - "loss": 0.1343, + "epoch": 0.593431138636479, + "grad_norm": 0.5893447995185852, + "learning_rate": 1.6043792409090142e-05, + "loss": 0.0894, "step": 11690 }, { - "epoch": 0.29686508440157383, - "grad_norm": 0.6096500158309937, - "learning_rate": 1.8020899437322842e-05, - "loss": 0.1189, + "epoch": 0.5936849586273415, + "grad_norm": 0.4342266619205475, + "learning_rate": 1.6042100275817726e-05, + "loss": 0.0919, "step": 11695 }, { - "epoch": 0.2969920040614291, - "grad_norm": 0.8069438338279724, - "learning_rate": 1.802005330625714e-05, - "loss": 0.1337, + "epoch": 0.593938778618204, + "grad_norm": 0.3848216235637665, + "learning_rate": 1.6040408142545306e-05, + "loss": 0.081, "step": 11700 }, { - "epoch": 0.2971189237212844, - "grad_norm": 1.0294148921966553, - "learning_rate": 1.801920717519144e-05, - "loss": 0.1334, + "epoch": 0.5941925986090665, + "grad_norm": 0.4635235667228699, + "learning_rate": 1.6038716009272893e-05, + "loss": 0.0899, "step": 11705 }, { - "epoch": 0.29724584338113974, - "grad_norm": 0.7532293200492859, - "learning_rate": 1.8018361044125738e-05, - "loss": 0.1305, + "epoch": 0.594446418599929, + "grad_norm": 0.5500293374061584, + "learning_rate": 1.6037023876000473e-05, + "loss": 0.0902, "step": 11710 }, { - "epoch": 0.297372763040995, - "grad_norm": 0.5494889616966248, - "learning_rate": 1.8017514913060036e-05, - "loss": 0.1327, + "epoch": 0.5947002385907915, + "grad_norm": 0.3731316924095154, + "learning_rate": 1.6035331742728057e-05, + "loss": 0.0846, "step": 11715 }, { - "epoch": 0.29749968270085037, - "grad_norm": 0.6636677980422974, - "learning_rate": 1.8016668781994334e-05, - "loss": 0.1204, + "epoch": 0.5949540585816538, + "grad_norm": 0.3542387783527374, + "learning_rate": 1.6033639609455644e-05, + "loss": 0.0959, "step": 11720 }, { - "epoch": 0.29762660236070565, - "grad_norm": 0.750522255897522, - "learning_rate": 1.801582265092863e-05, - "loss": 0.1379, + "epoch": 0.5952078785725163, + "grad_norm": 0.4823991656303406, + "learning_rate": 1.6031947476183224e-05, + "loss": 0.0959, "step": 11725 }, { - "epoch": 0.297753522020561, - "grad_norm": 0.7156482338905334, - "learning_rate": 1.8014976519862928e-05, - "loss": 0.1034, + "epoch": 0.5954616985633788, + "grad_norm": 0.4015558063983917, + "learning_rate": 1.603025534291081e-05, + "loss": 0.0983, "step": 11730 }, { - "epoch": 0.2978804416804163, - "grad_norm": 0.6520010232925415, - "learning_rate": 1.8014130388797226e-05, - "loss": 0.1278, + "epoch": 0.5957155185542413, + "grad_norm": 0.4608984589576721, + "learning_rate": 1.602856320963839e-05, + "loss": 0.0885, "step": 11735 }, { - "epoch": 0.2980073613402716, - "grad_norm": 0.6006286144256592, - "learning_rate": 1.8013284257731525e-05, - "loss": 0.1275, + "epoch": 0.5959693385451038, + "grad_norm": 0.4496300220489502, + "learning_rate": 1.6026871076365974e-05, + "loss": 0.0854, "step": 11740 }, { - "epoch": 0.2981342810001269, - "grad_norm": 0.8607048392295837, - "learning_rate": 1.801243812666582e-05, - "loss": 0.1432, + "epoch": 0.5962231585359663, + "grad_norm": 0.41843149065971375, + "learning_rate": 1.602517894309356e-05, + "loss": 0.0882, "step": 11745 }, { - "epoch": 0.29826120065998224, - "grad_norm": 0.5984954237937927, - "learning_rate": 1.8011591995600118e-05, - "loss": 0.122, + "epoch": 0.5964769785268288, + "grad_norm": 0.3742343783378601, + "learning_rate": 1.602348680982114e-05, + "loss": 0.0829, "step": 11750 }, { - "epoch": 0.2983881203198375, - "grad_norm": 0.7678459882736206, - "learning_rate": 1.8010745864534417e-05, - "loss": 0.1278, + "epoch": 0.5967307985176913, + "grad_norm": 0.5669666528701782, + "learning_rate": 1.6021794676548725e-05, + "loss": 0.0933, "step": 11755 }, { - "epoch": 0.29851503997969286, - "grad_norm": 0.49324142932891846, - "learning_rate": 1.8009899733468715e-05, - "loss": 0.1056, + "epoch": 0.5969846185085538, + "grad_norm": 0.37627196311950684, + "learning_rate": 1.602010254327631e-05, + "loss": 0.0797, "step": 11760 }, { - "epoch": 0.29864195963954815, - "grad_norm": 0.8456659317016602, - "learning_rate": 1.8009053602403013e-05, - "loss": 0.132, + "epoch": 0.5972384384994163, + "grad_norm": 0.4670741856098175, + "learning_rate": 1.6018410410003892e-05, + "loss": 0.0964, "step": 11765 }, { - "epoch": 0.2987688792994035, - "grad_norm": 0.9185765385627747, - "learning_rate": 1.8008207471337312e-05, - "loss": 0.125, + "epoch": 0.5974922584902786, + "grad_norm": 0.5946935415267944, + "learning_rate": 1.601671827673148e-05, + "loss": 0.0848, "step": 11770 }, { - "epoch": 0.2988957989592588, - "grad_norm": 0.5867887735366821, - "learning_rate": 1.800736134027161e-05, - "loss": 0.1266, + "epoch": 0.5977460784811411, + "grad_norm": 0.3806981146335602, + "learning_rate": 1.601502614345906e-05, + "loss": 0.0901, "step": 11775 }, { - "epoch": 0.2990227186191141, - "grad_norm": 0.7779586315155029, - "learning_rate": 1.800651520920591e-05, - "loss": 0.1288, + "epoch": 0.5979998984720036, + "grad_norm": 0.4813790023326874, + "learning_rate": 1.6013334010186643e-05, + "loss": 0.0897, "step": 11780 }, { - "epoch": 0.2991496382789694, - "grad_norm": 0.5797708630561829, - "learning_rate": 1.8005669078140204e-05, - "loss": 0.1653, + "epoch": 0.5982537184628661, + "grad_norm": 0.33864712715148926, + "learning_rate": 1.6011641876914226e-05, + "loss": 0.0869, "step": 11785 }, { - "epoch": 0.29927655793882474, - "grad_norm": 0.4221962094306946, - "learning_rate": 1.8004822947074502e-05, - "loss": 0.1159, + "epoch": 0.5985075384537286, + "grad_norm": 0.4564129114151001, + "learning_rate": 1.600994974364181e-05, + "loss": 0.089, "step": 11790 }, { - "epoch": 0.29940347759868, - "grad_norm": 0.5996963381767273, - "learning_rate": 1.80039768160088e-05, - "loss": 0.1181, + "epoch": 0.5987613584445911, + "grad_norm": 0.3900724947452545, + "learning_rate": 1.6008257610369393e-05, + "loss": 0.0748, "step": 11795 }, { - "epoch": 0.29953039725853536, - "grad_norm": 0.7568656802177429, - "learning_rate": 1.80031306849431e-05, - "loss": 0.1074, + "epoch": 0.5990151784354536, + "grad_norm": 0.34131884574890137, + "learning_rate": 1.6006565477096977e-05, + "loss": 0.0985, "step": 11800 }, { - "epoch": 0.29965731691839065, - "grad_norm": 0.9006831049919128, - "learning_rate": 1.8002284553877397e-05, - "loss": 0.1216, + "epoch": 0.5992689984263161, + "grad_norm": 0.6740922331809998, + "learning_rate": 1.600487334382456e-05, + "loss": 0.0969, "step": 11805 }, { - "epoch": 0.299784236578246, - "grad_norm": 0.9933466911315918, - "learning_rate": 1.8001438422811696e-05, - "loss": 0.103, + "epoch": 0.5995228184171786, + "grad_norm": 0.41970062255859375, + "learning_rate": 1.6003181210552144e-05, + "loss": 0.0924, "step": 11810 }, { - "epoch": 0.2999111562381013, - "grad_norm": 0.6819742918014526, - "learning_rate": 1.8000592291745994e-05, - "loss": 0.1161, + "epoch": 0.599776638408041, + "grad_norm": 0.40490224957466125, + "learning_rate": 1.6001489077279728e-05, + "loss": 0.0926, "step": 11815 }, { - "epoch": 0.3000380758979566, - "grad_norm": 0.6438451409339905, - "learning_rate": 1.7999746160680292e-05, - "loss": 0.1342, + "epoch": 0.6000304583989035, + "grad_norm": 0.9713094234466553, + "learning_rate": 1.599979694400731e-05, + "loss": 0.1022, "step": 11820 }, { - "epoch": 0.3001649955578119, - "grad_norm": 0.8554864525794983, - "learning_rate": 1.7998900029614587e-05, - "loss": 0.117, + "epoch": 0.600284278389766, + "grad_norm": 0.3870560824871063, + "learning_rate": 1.5998104810734895e-05, + "loss": 0.0827, "step": 11825 }, { - "epoch": 0.30029191521766724, - "grad_norm": 0.8430250883102417, - "learning_rate": 1.7998053898548886e-05, - "loss": 0.1511, + "epoch": 0.6005380983806284, + "grad_norm": 0.44513827562332153, + "learning_rate": 1.5996412677462478e-05, + "loss": 0.0932, "step": 11830 }, { - "epoch": 0.3004188348775225, - "grad_norm": 0.7267314791679382, - "learning_rate": 1.7997207767483184e-05, - "loss": 0.1334, + "epoch": 0.6007919183714909, + "grad_norm": 0.42367058992385864, + "learning_rate": 1.5994720544190062e-05, + "loss": 0.0917, "step": 11835 }, { - "epoch": 0.30054575453737786, - "grad_norm": 1.0597418546676636, - "learning_rate": 1.7996361636417483e-05, - "loss": 0.1438, + "epoch": 0.6010457383623534, + "grad_norm": 0.5602151155471802, + "learning_rate": 1.5993028410917645e-05, + "loss": 0.099, "step": 11840 }, { - "epoch": 0.30067267419723315, - "grad_norm": 0.5338455438613892, - "learning_rate": 1.799551550535178e-05, - "loss": 0.1408, + "epoch": 0.6012995583532159, + "grad_norm": 0.43050816655158997, + "learning_rate": 1.599133627764523e-05, + "loss": 0.0836, "step": 11845 }, { - "epoch": 0.3007995938570885, - "grad_norm": 0.5664796829223633, - "learning_rate": 1.799466937428608e-05, - "loss": 0.1368, + "epoch": 0.6015533783440784, + "grad_norm": 0.34673821926116943, + "learning_rate": 1.5989644144372812e-05, + "loss": 0.0992, "step": 11850 }, { - "epoch": 0.30092651351694377, - "grad_norm": 0.6056821346282959, - "learning_rate": 1.7993823243220378e-05, - "loss": 0.1331, + "epoch": 0.6018071983349409, + "grad_norm": 0.3608914911746979, + "learning_rate": 1.5987952011100396e-05, + "loss": 0.0941, "step": 11855 }, { - "epoch": 0.3010534331767991, - "grad_norm": 0.7925805449485779, - "learning_rate": 1.7992977112154676e-05, - "loss": 0.1256, + "epoch": 0.6020610183258034, + "grad_norm": 0.4284670352935791, + "learning_rate": 1.598625987782798e-05, + "loss": 0.0925, "step": 11860 }, { - "epoch": 0.3011803528366544, - "grad_norm": 0.6671765446662903, - "learning_rate": 1.799213098108897e-05, - "loss": 0.1118, + "epoch": 0.6023148383166658, + "grad_norm": 0.5360268950462341, + "learning_rate": 1.5984567744555563e-05, + "loss": 0.0936, "step": 11865 }, { - "epoch": 0.30130727249650974, - "grad_norm": 0.8314164280891418, - "learning_rate": 1.799128485002327e-05, - "loss": 0.1291, + "epoch": 0.6025686583075283, + "grad_norm": 0.34779566526412964, + "learning_rate": 1.5982875611283147e-05, + "loss": 0.0871, "step": 11870 }, { - "epoch": 0.301434192156365, - "grad_norm": 0.49427008628845215, - "learning_rate": 1.7990438718957568e-05, - "loss": 0.132, + "epoch": 0.6028224782983908, + "grad_norm": 0.43765851855278015, + "learning_rate": 1.598118347801073e-05, + "loss": 0.0914, "step": 11875 }, { - "epoch": 0.3015611118162203, - "grad_norm": 0.608447790145874, - "learning_rate": 1.7989592587891866e-05, - "loss": 0.1221, + "epoch": 0.6030762982892532, + "grad_norm": 0.5663272142410278, + "learning_rate": 1.5979491344738314e-05, + "loss": 0.084, "step": 11880 }, { - "epoch": 0.30168803147607565, - "grad_norm": 0.8913175463676453, - "learning_rate": 1.7988746456826165e-05, - "loss": 0.1294, + "epoch": 0.6033301182801157, + "grad_norm": 0.4468323290348053, + "learning_rate": 1.5977799211465897e-05, + "loss": 0.0927, "step": 11885 }, { - "epoch": 0.30181495113593093, - "grad_norm": 0.6121737360954285, - "learning_rate": 1.798790032576046e-05, - "loss": 0.1238, + "epoch": 0.6035839382709782, + "grad_norm": 0.6487022042274475, + "learning_rate": 1.5976107078193477e-05, + "loss": 0.0985, "step": 11890 }, { - "epoch": 0.30194187079578627, - "grad_norm": 0.509806215763092, - "learning_rate": 1.7987054194694758e-05, - "loss": 0.1378, + "epoch": 0.6038377582618407, + "grad_norm": 0.5686355233192444, + "learning_rate": 1.5974414944921064e-05, + "loss": 0.1071, "step": 11895 }, { - "epoch": 0.30206879045564156, - "grad_norm": 1.6384482383728027, - "learning_rate": 1.7986208063629057e-05, - "loss": 0.1332, + "epoch": 0.6040915782527032, + "grad_norm": 0.4146348237991333, + "learning_rate": 1.5972722811648648e-05, + "loss": 0.0844, "step": 11900 }, { - "epoch": 0.3021957101154969, - "grad_norm": 0.5924645066261292, - "learning_rate": 1.7985361932563355e-05, - "loss": 0.1224, + "epoch": 0.6043453982435657, + "grad_norm": 0.37118101119995117, + "learning_rate": 1.5971030678376228e-05, + "loss": 0.0891, "step": 11905 }, { - "epoch": 0.3023226297753522, - "grad_norm": 0.5011929273605347, - "learning_rate": 1.7984515801497653e-05, - "loss": 0.1201, + "epoch": 0.6045992182344282, + "grad_norm": 0.4417383670806885, + "learning_rate": 1.5969338545103815e-05, + "loss": 0.0932, "step": 11910 }, { - "epoch": 0.3024495494352075, - "grad_norm": 0.6407788991928101, - "learning_rate": 1.7983669670431952e-05, - "loss": 0.1231, + "epoch": 0.6048530382252906, + "grad_norm": 0.4875818192958832, + "learning_rate": 1.5967646411831395e-05, + "loss": 0.0851, "step": 11915 }, { - "epoch": 0.3025764690950628, - "grad_norm": 0.6021493077278137, - "learning_rate": 1.798282353936625e-05, - "loss": 0.1109, + "epoch": 0.6051068582161531, + "grad_norm": 0.4297184944152832, + "learning_rate": 1.5965954278558982e-05, + "loss": 0.0887, "step": 11920 }, { - "epoch": 0.30270338875491815, - "grad_norm": 0.8985568284988403, - "learning_rate": 1.798197740830055e-05, - "loss": 0.1122, + "epoch": 0.6053606782070156, + "grad_norm": 0.5882421135902405, + "learning_rate": 1.5964262145286566e-05, + "loss": 0.0828, "step": 11925 }, { - "epoch": 0.30283030841477343, - "grad_norm": 1.045918583869934, - "learning_rate": 1.7981131277234844e-05, - "loss": 0.0964, + "epoch": 0.605614498197878, + "grad_norm": 0.4121566712856293, + "learning_rate": 1.5962570012014146e-05, + "loss": 0.092, "step": 11930 }, { - "epoch": 0.30295722807462877, - "grad_norm": 0.6242228150367737, - "learning_rate": 1.7980285146169142e-05, - "loss": 0.1401, + "epoch": 0.6058683181887405, + "grad_norm": 0.4164383113384247, + "learning_rate": 1.5960877878741733e-05, + "loss": 0.0916, "step": 11935 }, { - "epoch": 0.30308414773448406, - "grad_norm": 0.6083391904830933, - "learning_rate": 1.797943901510344e-05, - "loss": 0.1213, + "epoch": 0.606122138179603, + "grad_norm": 0.7164746522903442, + "learning_rate": 1.5959185745469313e-05, + "loss": 0.0945, "step": 11940 }, { - "epoch": 0.3032110673943394, - "grad_norm": 0.4603676497936249, - "learning_rate": 1.797859288403774e-05, - "loss": 0.1501, + "epoch": 0.6063759581704655, + "grad_norm": 0.3977768123149872, + "learning_rate": 1.5957493612196896e-05, + "loss": 0.0871, "step": 11945 }, { - "epoch": 0.3033379870541947, - "grad_norm": 0.9958428144454956, - "learning_rate": 1.7977746752972037e-05, - "loss": 0.1415, + "epoch": 0.606629778161328, + "grad_norm": 0.5656052827835083, + "learning_rate": 1.5955801478924483e-05, + "loss": 0.0728, "step": 11950 }, { - "epoch": 0.30346490671405, - "grad_norm": 1.5161710977554321, - "learning_rate": 1.7976900621906336e-05, - "loss": 0.1386, + "epoch": 0.6068835981521905, + "grad_norm": 0.6886550188064575, + "learning_rate": 1.5954109345652064e-05, + "loss": 0.0971, "step": 11955 }, { - "epoch": 0.3035918263739053, - "grad_norm": 0.6791282892227173, - "learning_rate": 1.7976054490840634e-05, - "loss": 0.14, + "epoch": 0.607137418143053, + "grad_norm": 0.30135658383369446, + "learning_rate": 1.5952417212379647e-05, + "loss": 0.097, "step": 11960 }, { - "epoch": 0.30371874603376064, - "grad_norm": 0.6133077144622803, - "learning_rate": 1.7975208359774932e-05, - "loss": 0.1111, + "epoch": 0.6073912381339154, + "grad_norm": 0.6407579779624939, + "learning_rate": 1.595072507910723e-05, + "loss": 0.0938, "step": 11965 }, { - "epoch": 0.30384566569361593, - "grad_norm": 0.5090328454971313, - "learning_rate": 1.7974362228709227e-05, - "loss": 0.1198, + "epoch": 0.6076450581247779, + "grad_norm": 0.3917185068130493, + "learning_rate": 1.5949032945834814e-05, + "loss": 0.0985, "step": 11970 }, { - "epoch": 0.30397258535347127, - "grad_norm": 0.8747912049293518, - "learning_rate": 1.7973516097643526e-05, - "loss": 0.117, + "epoch": 0.6078988781156404, + "grad_norm": 0.38503170013427734, + "learning_rate": 1.59473408125624e-05, + "loss": 0.0808, "step": 11975 }, { - "epoch": 0.30409950501332655, - "grad_norm": 0.6504578590393066, - "learning_rate": 1.7972669966577824e-05, - "loss": 0.1376, + "epoch": 0.6081526981065029, + "grad_norm": 0.856484591960907, + "learning_rate": 1.594564867928998e-05, + "loss": 0.092, "step": 11980 }, { - "epoch": 0.3042264246731819, - "grad_norm": 0.9833356142044067, - "learning_rate": 1.7971823835512123e-05, - "loss": 0.1115, + "epoch": 0.6084065180973653, + "grad_norm": 0.538896381855011, + "learning_rate": 1.5943956546017565e-05, + "loss": 0.0954, "step": 11985 }, { - "epoch": 0.3043533443330372, - "grad_norm": 0.8059728145599365, - "learning_rate": 1.797097770444642e-05, - "loss": 0.114, + "epoch": 0.6086603380882278, + "grad_norm": 0.5355377793312073, + "learning_rate": 1.594226441274515e-05, + "loss": 0.0904, "step": 11990 }, { - "epoch": 0.3044802639928925, - "grad_norm": 1.1578737497329712, - "learning_rate": 1.797013157338072e-05, - "loss": 0.106, + "epoch": 0.6089141580790903, + "grad_norm": 0.5098680853843689, + "learning_rate": 1.5940572279472732e-05, + "loss": 0.0823, "step": 11995 }, { - "epoch": 0.3046071836527478, - "grad_norm": 0.507258951663971, - "learning_rate": 1.7969285442315018e-05, - "loss": 0.1223, + "epoch": 0.6091679780699528, + "grad_norm": 0.4084436595439911, + "learning_rate": 1.5938880146200315e-05, + "loss": 0.082, "step": 12000 }, { - "epoch": 0.30473410331260314, - "grad_norm": 0.8611864447593689, - "learning_rate": 1.7968439311249316e-05, - "loss": 0.1434, + "epoch": 0.6094217980608153, + "grad_norm": 0.3876837193965912, + "learning_rate": 1.59371880129279e-05, + "loss": 0.1021, "step": 12005 }, { - "epoch": 0.30486102297245843, - "grad_norm": 0.6664344072341919, - "learning_rate": 1.796759318018361e-05, - "loss": 0.1473, + "epoch": 0.6096756180516778, + "grad_norm": 0.4292503297328949, + "learning_rate": 1.5935495879655483e-05, + "loss": 0.079, "step": 12010 }, { - "epoch": 0.30498794263231377, - "grad_norm": 0.9392525553703308, - "learning_rate": 1.796674704911791e-05, - "loss": 0.1365, + "epoch": 0.6099294380425402, + "grad_norm": 0.533645510673523, + "learning_rate": 1.5933803746383066e-05, + "loss": 0.0962, "step": 12015 }, { - "epoch": 0.30511486229216905, - "grad_norm": 0.6864877343177795, - "learning_rate": 1.7965900918052208e-05, - "loss": 0.1385, + "epoch": 0.6101832580334027, + "grad_norm": 0.36521783471107483, + "learning_rate": 1.593211161311065e-05, + "loss": 0.0956, "step": 12020 }, { - "epoch": 0.3052417819520244, - "grad_norm": 0.6563825011253357, - "learning_rate": 1.7965054786986507e-05, - "loss": 0.1205, + "epoch": 0.6104370780242652, + "grad_norm": 0.3349459767341614, + "learning_rate": 1.5930419479838233e-05, + "loss": 0.0904, "step": 12025 }, { - "epoch": 0.3053687016118797, - "grad_norm": 0.837984561920166, - "learning_rate": 1.79642086559208e-05, - "loss": 0.1182, + "epoch": 0.6106908980151277, + "grad_norm": 0.43317875266075134, + "learning_rate": 1.5928727346565817e-05, + "loss": 0.0913, "step": 12030 }, { - "epoch": 0.305495621271735, - "grad_norm": 0.7060363292694092, - "learning_rate": 1.79633625248551e-05, - "loss": 0.123, + "epoch": 0.6109447180059902, + "grad_norm": 0.4873206317424774, + "learning_rate": 1.59270352132934e-05, + "loss": 0.0916, "step": 12035 }, { - "epoch": 0.3056225409315903, - "grad_norm": 0.4740082323551178, - "learning_rate": 1.79625163937894e-05, - "loss": 0.1396, + "epoch": 0.6111985379968526, + "grad_norm": 0.38802623748779297, + "learning_rate": 1.5925343080020984e-05, + "loss": 0.0906, "step": 12040 }, { - "epoch": 0.3057494605914456, - "grad_norm": 0.6573173403739929, - "learning_rate": 1.7961670262723697e-05, - "loss": 0.1106, + "epoch": 0.6114523579877151, + "grad_norm": 0.5826549530029297, + "learning_rate": 1.5923650946748567e-05, + "loss": 0.0934, "step": 12045 }, { - "epoch": 0.30587638025130093, - "grad_norm": 0.6028203964233398, - "learning_rate": 1.7960824131657995e-05, - "loss": 0.1412, + "epoch": 0.6117061779785776, + "grad_norm": 0.5934155583381653, + "learning_rate": 1.592195881347615e-05, + "loss": 0.0948, "step": 12050 }, { - "epoch": 0.3060032999111562, - "grad_norm": 1.0297577381134033, - "learning_rate": 1.7959978000592294e-05, - "loss": 0.1183, + "epoch": 0.6119599979694401, + "grad_norm": 0.5034151077270508, + "learning_rate": 1.5920266680203734e-05, + "loss": 0.0793, "step": 12055 }, { - "epoch": 0.30613021957101155, - "grad_norm": 0.5358377695083618, - "learning_rate": 1.7959131869526592e-05, - "loss": 0.096, + "epoch": 0.6122138179603026, + "grad_norm": 0.5423274040222168, + "learning_rate": 1.5918574546931318e-05, + "loss": 0.0899, "step": 12060 }, { - "epoch": 0.30625713923086684, - "grad_norm": 0.8961896300315857, - "learning_rate": 1.795828573846089e-05, - "loss": 0.1331, + "epoch": 0.612467637951165, + "grad_norm": 0.6164039373397827, + "learning_rate": 1.59168824136589e-05, + "loss": 0.0932, "step": 12065 }, { - "epoch": 0.3063840588907222, - "grad_norm": 0.5976610779762268, - "learning_rate": 1.7957439607395185e-05, - "loss": 0.1182, + "epoch": 0.6127214579420275, + "grad_norm": 0.5863872766494751, + "learning_rate": 1.5915190280386485e-05, + "loss": 0.0858, "step": 12070 }, { - "epoch": 0.30651097855057746, - "grad_norm": 0.5689316987991333, - "learning_rate": 1.7956593476329484e-05, - "loss": 0.1286, + "epoch": 0.61297527793289, + "grad_norm": 0.3876388967037201, + "learning_rate": 1.591349814711407e-05, + "loss": 0.0785, "step": 12075 }, { - "epoch": 0.3066378982104328, - "grad_norm": 0.7775274515151978, - "learning_rate": 1.7955747345263782e-05, - "loss": 0.1432, + "epoch": 0.6132290979237525, + "grad_norm": 0.7438451051712036, + "learning_rate": 1.5911806013841652e-05, + "loss": 0.098, "step": 12080 }, { - "epoch": 0.3067648178702881, - "grad_norm": 0.5504811406135559, - "learning_rate": 1.795490121419808e-05, - "loss": 0.1244, + "epoch": 0.613482917914615, + "grad_norm": 0.48748913407325745, + "learning_rate": 1.5910113880569236e-05, + "loss": 0.0984, "step": 12085 }, { - "epoch": 0.3068917375301434, - "grad_norm": 0.6400503516197205, - "learning_rate": 1.795405508313238e-05, - "loss": 0.1043, + "epoch": 0.6137367379054774, + "grad_norm": 1.2986968755722046, + "learning_rate": 1.590842174729682e-05, + "loss": 0.0888, "step": 12090 }, { - "epoch": 0.3070186571899987, - "grad_norm": 0.8005660176277161, - "learning_rate": 1.7953208952066677e-05, - "loss": 0.1327, + "epoch": 0.6139905578963399, + "grad_norm": 0.525091826915741, + "learning_rate": 1.59067296140244e-05, + "loss": 0.0947, "step": 12095 }, { - "epoch": 0.30714557684985405, - "grad_norm": 0.7215431332588196, - "learning_rate": 1.7952362821000976e-05, - "loss": 0.1257, + "epoch": 0.6142443778872024, + "grad_norm": 0.42398321628570557, + "learning_rate": 1.5905037480751986e-05, + "loss": 0.0882, "step": 12100 }, { - "epoch": 0.30727249650970934, - "grad_norm": 0.793637216091156, - "learning_rate": 1.7951516689935274e-05, - "loss": 0.1251, + "epoch": 0.6144981978780649, + "grad_norm": 0.5521324276924133, + "learning_rate": 1.590334534747957e-05, + "loss": 0.0885, "step": 12105 }, { - "epoch": 0.3073994161695647, - "grad_norm": 0.682940661907196, - "learning_rate": 1.795067055886957e-05, - "loss": 0.1284, + "epoch": 0.6147520178689273, + "grad_norm": 0.5781256556510925, + "learning_rate": 1.5901653214207153e-05, + "loss": 0.1041, "step": 12110 }, { - "epoch": 0.30752633582941996, - "grad_norm": 0.5513983368873596, - "learning_rate": 1.7949824427803868e-05, - "loss": 0.1302, + "epoch": 0.6150058378597898, + "grad_norm": 0.8404202461242676, + "learning_rate": 1.5899961080934737e-05, + "loss": 0.0966, "step": 12115 }, { - "epoch": 0.3076532554892753, - "grad_norm": 0.621599555015564, - "learning_rate": 1.7948978296738166e-05, - "loss": 0.1258, + "epoch": 0.6152596578506523, + "grad_norm": 0.5177925229072571, + "learning_rate": 1.5898268947662317e-05, + "loss": 0.0846, "step": 12120 }, { - "epoch": 0.3077801751491306, - "grad_norm": 0.6562413573265076, - "learning_rate": 1.7948132165672464e-05, - "loss": 0.1385, + "epoch": 0.6155134778415148, + "grad_norm": 0.4168604910373688, + "learning_rate": 1.5896576814389904e-05, + "loss": 0.0937, "step": 12125 }, { - "epoch": 0.3079070948089859, - "grad_norm": 0.44536256790161133, - "learning_rate": 1.7947286034606763e-05, - "loss": 0.1644, + "epoch": 0.6157672978323773, + "grad_norm": 0.3698541820049286, + "learning_rate": 1.5894884681117488e-05, + "loss": 0.0905, "step": 12130 }, { - "epoch": 0.3080340144688412, - "grad_norm": 0.5406820178031921, - "learning_rate": 1.794643990354106e-05, - "loss": 0.1256, + "epoch": 0.6160211178232398, + "grad_norm": 0.43006083369255066, + "learning_rate": 1.5893192547845068e-05, + "loss": 0.0896, "step": 12135 }, { - "epoch": 0.30816093412869655, - "grad_norm": 0.8436091542243958, - "learning_rate": 1.794559377247536e-05, - "loss": 0.1287, + "epoch": 0.6162749378141023, + "grad_norm": 0.7461796402931213, + "learning_rate": 1.5891500414572655e-05, + "loss": 0.0853, "step": 12140 }, { - "epoch": 0.30828785378855184, - "grad_norm": 1.1192821264266968, - "learning_rate": 1.7944747641409658e-05, - "loss": 0.157, + "epoch": 0.6165287578049647, + "grad_norm": 0.3485049307346344, + "learning_rate": 1.5889808281300235e-05, + "loss": 0.0941, "step": 12145 }, { - "epoch": 0.3084147734484072, - "grad_norm": 0.6577200293540955, - "learning_rate": 1.7943901510343953e-05, - "loss": 0.1238, + "epoch": 0.6167825777958272, + "grad_norm": 0.3577573299407959, + "learning_rate": 1.588811614802782e-05, + "loss": 0.0934, "step": 12150 }, { - "epoch": 0.30854169310826246, - "grad_norm": 0.8020147085189819, - "learning_rate": 1.794305537927825e-05, - "loss": 0.1243, + "epoch": 0.6170363977866897, + "grad_norm": 0.43190285563468933, + "learning_rate": 1.5886424014755405e-05, + "loss": 0.0791, "step": 12155 }, { - "epoch": 0.3086686127681178, - "grad_norm": 0.5331521034240723, - "learning_rate": 1.794220924821255e-05, - "loss": 0.1491, + "epoch": 0.6172902177775521, + "grad_norm": 0.3519067168235779, + "learning_rate": 1.5884731881482985e-05, + "loss": 0.0942, "step": 12160 }, { - "epoch": 0.3087955324279731, - "grad_norm": 0.7375391721725464, - "learning_rate": 1.7941363117146848e-05, - "loss": 0.1371, + "epoch": 0.6175440377684146, + "grad_norm": 0.35261034965515137, + "learning_rate": 1.5883039748210572e-05, + "loss": 0.0882, "step": 12165 }, { - "epoch": 0.3089224520878284, - "grad_norm": 0.6245969533920288, - "learning_rate": 1.7940516986081143e-05, - "loss": 0.1282, + "epoch": 0.6177978577592771, + "grad_norm": 0.39754313230514526, + "learning_rate": 1.5881347614938153e-05, + "loss": 0.0867, "step": 12170 }, { - "epoch": 0.3090493717476837, - "grad_norm": 0.7958798408508301, - "learning_rate": 1.793967085501544e-05, - "loss": 0.1383, + "epoch": 0.6180516777501396, + "grad_norm": 0.5548686981201172, + "learning_rate": 1.5879655481665736e-05, + "loss": 0.099, "step": 12175 }, { - "epoch": 0.30917629140753905, - "grad_norm": 0.71240234375, - "learning_rate": 1.793882472394974e-05, - "loss": 0.1269, + "epoch": 0.6183054977410021, + "grad_norm": 0.526914656162262, + "learning_rate": 1.5877963348393323e-05, + "loss": 0.087, "step": 12180 }, { - "epoch": 0.30930321106739433, - "grad_norm": 0.9235799312591553, - "learning_rate": 1.793797859288404e-05, - "loss": 0.1281, + "epoch": 0.6185593177318646, + "grad_norm": 0.6457729339599609, + "learning_rate": 1.5876271215120903e-05, + "loss": 0.0894, "step": 12185 }, { - "epoch": 0.3094301307272497, - "grad_norm": 0.5237361788749695, - "learning_rate": 1.7937132461818337e-05, - "loss": 0.1477, + "epoch": 0.6188131377227271, + "grad_norm": 0.8887588381767273, + "learning_rate": 1.5874579081848487e-05, + "loss": 0.0885, "step": 12190 }, { - "epoch": 0.30955705038710496, - "grad_norm": 0.4803338348865509, - "learning_rate": 1.7936286330752635e-05, - "loss": 0.1222, + "epoch": 0.6190669577135895, + "grad_norm": 0.6914089918136597, + "learning_rate": 1.587288694857607e-05, + "loss": 0.0858, "step": 12195 }, { - "epoch": 0.3096839700469603, - "grad_norm": 0.48353153467178345, - "learning_rate": 1.7935440199686934e-05, - "loss": 0.1149, + "epoch": 0.619320777704452, + "grad_norm": 0.489515095949173, + "learning_rate": 1.5871194815303654e-05, + "loss": 0.0879, "step": 12200 }, { - "epoch": 0.3098108897068156, - "grad_norm": 0.7033267021179199, - "learning_rate": 1.7934594068621232e-05, - "loss": 0.0992, + "epoch": 0.6195745976953145, + "grad_norm": 0.5289825797080994, + "learning_rate": 1.5869502682031237e-05, + "loss": 0.092, "step": 12205 }, { - "epoch": 0.3099378093666709, - "grad_norm": 0.590275228023529, - "learning_rate": 1.7933747937555527e-05, - "loss": 0.1489, + "epoch": 0.6198284176861769, + "grad_norm": 0.48612451553344727, + "learning_rate": 1.586781054875882e-05, + "loss": 0.0885, "step": 12210 }, { - "epoch": 0.3100647290265262, - "grad_norm": 1.0128660202026367, - "learning_rate": 1.7932901806489825e-05, - "loss": 0.1458, + "epoch": 0.6200822376770394, + "grad_norm": 0.4033755362033844, + "learning_rate": 1.5866118415486404e-05, + "loss": 0.0861, "step": 12215 }, { - "epoch": 0.3101916486863815, - "grad_norm": 0.6889309883117676, - "learning_rate": 1.7932055675424124e-05, - "loss": 0.1252, + "epoch": 0.6203360576679019, + "grad_norm": 0.5344930291175842, + "learning_rate": 1.5864426282213988e-05, + "loss": 0.1081, "step": 12220 }, { - "epoch": 0.31031856834623683, - "grad_norm": 0.5934571027755737, - "learning_rate": 1.7931209544358422e-05, - "loss": 0.1358, + "epoch": 0.6205898776587644, + "grad_norm": 0.3625963032245636, + "learning_rate": 1.586273414894157e-05, + "loss": 0.0916, "step": 12225 }, { - "epoch": 0.3104454880060921, - "grad_norm": 0.8545227646827698, - "learning_rate": 1.793036341329272e-05, - "loss": 0.1223, + "epoch": 0.6208436976496269, + "grad_norm": 0.44668540358543396, + "learning_rate": 1.5861042015669155e-05, + "loss": 0.0906, "step": 12230 }, { - "epoch": 0.31057240766594746, - "grad_norm": 0.7063995003700256, - "learning_rate": 1.792951728222702e-05, - "loss": 0.1162, + "epoch": 0.6210975176404894, + "grad_norm": 0.47905129194259644, + "learning_rate": 1.585934988239674e-05, + "loss": 0.0832, "step": 12235 }, { - "epoch": 0.31069932732580274, - "grad_norm": 1.3720461130142212, - "learning_rate": 1.7928671151161317e-05, - "loss": 0.1291, + "epoch": 0.6213513376313519, + "grad_norm": 0.5475975275039673, + "learning_rate": 1.5857657749124322e-05, + "loss": 0.0921, "step": 12240 }, { - "epoch": 0.3108262469856581, - "grad_norm": 0.5033501982688904, - "learning_rate": 1.7927825020095616e-05, - "loss": 0.1057, + "epoch": 0.6216051576222144, + "grad_norm": 0.4593254625797272, + "learning_rate": 1.5855965615851906e-05, + "loss": 0.0856, "step": 12245 }, { - "epoch": 0.31095316664551337, - "grad_norm": 0.601880669593811, - "learning_rate": 1.792697888902991e-05, - "loss": 0.1232, + "epoch": 0.6218589776130768, + "grad_norm": 0.7837191224098206, + "learning_rate": 1.585427348257949e-05, + "loss": 0.0886, "step": 12250 }, { - "epoch": 0.3110800863053687, - "grad_norm": 0.6526083946228027, - "learning_rate": 1.792613275796421e-05, - "loss": 0.1237, + "epoch": 0.6221127976039393, + "grad_norm": 0.3772871196269989, + "learning_rate": 1.5852581349307073e-05, + "loss": 0.0924, "step": 12255 }, { - "epoch": 0.311207005965224, - "grad_norm": 0.5695832371711731, - "learning_rate": 1.7925286626898508e-05, - "loss": 0.1069, + "epoch": 0.6223666175948017, + "grad_norm": 0.42365825176239014, + "learning_rate": 1.5850889216034656e-05, + "loss": 0.0944, "step": 12260 }, { - "epoch": 0.31133392562507933, - "grad_norm": 0.7525449395179749, - "learning_rate": 1.7924440495832806e-05, - "loss": 0.1304, + "epoch": 0.6226204375856642, + "grad_norm": 0.36323311924934387, + "learning_rate": 1.584919708276224e-05, + "loss": 0.0899, "step": 12265 }, { - "epoch": 0.3114608452849346, - "grad_norm": 0.5821312069892883, - "learning_rate": 1.7923594364767104e-05, - "loss": 0.1206, + "epoch": 0.6228742575765267, + "grad_norm": 0.39171355962753296, + "learning_rate": 1.5847504949489823e-05, + "loss": 0.0811, "step": 12270 }, { - "epoch": 0.31158776494478996, - "grad_norm": 0.8194396495819092, - "learning_rate": 1.7922748233701403e-05, - "loss": 0.1155, + "epoch": 0.6231280775673892, + "grad_norm": 0.914495587348938, + "learning_rate": 1.5845812816217407e-05, + "loss": 0.0969, "step": 12275 }, { - "epoch": 0.31171468460464524, - "grad_norm": 0.7415961623191833, - "learning_rate": 1.79219021026357e-05, - "loss": 0.1277, + "epoch": 0.6233818975582517, + "grad_norm": 0.46378669142723083, + "learning_rate": 1.584412068294499e-05, + "loss": 0.0989, "step": 12280 }, { - "epoch": 0.3118416042645006, - "grad_norm": 1.104042649269104, - "learning_rate": 1.792105597157e-05, - "loss": 0.1381, + "epoch": 0.6236357175491142, + "grad_norm": 0.33671021461486816, + "learning_rate": 1.5842428549672574e-05, + "loss": 0.0854, "step": 12285 }, { - "epoch": 0.31196852392435587, - "grad_norm": 1.3493258953094482, - "learning_rate": 1.7920209840504295e-05, - "loss": 0.1407, + "epoch": 0.6238895375399767, + "grad_norm": 0.6674852967262268, + "learning_rate": 1.5840736416400158e-05, + "loss": 0.0882, "step": 12290 }, { - "epoch": 0.3120954435842112, - "grad_norm": 0.6137605309486389, - "learning_rate": 1.7919363709438593e-05, - "loss": 0.1153, + "epoch": 0.6241433575308392, + "grad_norm": 0.3157777190208435, + "learning_rate": 1.583904428312774e-05, + "loss": 0.0932, "step": 12295 }, { - "epoch": 0.3122223632440665, - "grad_norm": 0.8228901028633118, - "learning_rate": 1.791851757837289e-05, - "loss": 0.1526, + "epoch": 0.6243971775217017, + "grad_norm": 0.6090107560157776, + "learning_rate": 1.583735214985532e-05, + "loss": 0.0912, "step": 12300 }, { - "epoch": 0.31234928290392183, - "grad_norm": 0.6474363803863525, - "learning_rate": 1.791767144730719e-05, - "loss": 0.1616, + "epoch": 0.6246509975125641, + "grad_norm": 0.4318656325340271, + "learning_rate": 1.5835660016582908e-05, + "loss": 0.0967, "step": 12305 }, { - "epoch": 0.3124762025637771, - "grad_norm": 0.8927374482154846, - "learning_rate": 1.7916825316241485e-05, - "loss": 0.1082, + "epoch": 0.6249048175034265, + "grad_norm": 0.42950794100761414, + "learning_rate": 1.5833967883310492e-05, + "loss": 0.0841, "step": 12310 }, { - "epoch": 0.31260312222363246, - "grad_norm": 0.7790806293487549, - "learning_rate": 1.7915979185175783e-05, - "loss": 0.1321, + "epoch": 0.625158637494289, + "grad_norm": 0.37206709384918213, + "learning_rate": 1.5832275750038075e-05, + "loss": 0.093, "step": 12315 }, { - "epoch": 0.31273004188348774, - "grad_norm": 0.48959606885910034, - "learning_rate": 1.7915133054110082e-05, - "loss": 0.1061, + "epoch": 0.6254124574851515, + "grad_norm": 0.5274568200111389, + "learning_rate": 1.583058361676566e-05, + "loss": 0.1056, "step": 12320 }, { - "epoch": 0.3128569615433431, - "grad_norm": 0.7265244722366333, - "learning_rate": 1.791428692304438e-05, - "loss": 0.1229, + "epoch": 0.625666277476014, + "grad_norm": 0.39392906427383423, + "learning_rate": 1.582889148349324e-05, + "loss": 0.0845, "step": 12325 }, { - "epoch": 0.31298388120319837, - "grad_norm": 0.7290911674499512, - "learning_rate": 1.791344079197868e-05, - "loss": 0.1404, + "epoch": 0.6259200974668765, + "grad_norm": 0.6189650297164917, + "learning_rate": 1.5827199350220826e-05, + "loss": 0.0961, "step": 12330 }, { - "epoch": 0.3131108008630537, - "grad_norm": 0.6224589347839355, - "learning_rate": 1.7912594660912977e-05, - "loss": 0.1182, + "epoch": 0.626173917457739, + "grad_norm": 0.5449414253234863, + "learning_rate": 1.582550721694841e-05, + "loss": 0.0868, "step": 12335 }, { - "epoch": 0.313237720522909, - "grad_norm": 0.7020136117935181, - "learning_rate": 1.7911748529847275e-05, - "loss": 0.1195, + "epoch": 0.6264277374486015, + "grad_norm": 0.3943621516227722, + "learning_rate": 1.582381508367599e-05, + "loss": 0.0779, "step": 12340 }, { - "epoch": 0.31336464018276433, - "grad_norm": 0.477474570274353, - "learning_rate": 1.7910902398781574e-05, - "loss": 0.1429, + "epoch": 0.626681557439464, + "grad_norm": 0.5067428946495056, + "learning_rate": 1.5822122950403577e-05, + "loss": 0.0941, "step": 12345 }, { - "epoch": 0.3134915598426196, - "grad_norm": 1.173130989074707, - "learning_rate": 1.791005626771587e-05, - "loss": 0.1475, + "epoch": 0.6269353774303265, + "grad_norm": 0.4015895426273346, + "learning_rate": 1.5820430817131157e-05, + "loss": 0.0895, "step": 12350 }, { - "epoch": 0.31361847950247496, - "grad_norm": 0.6749996542930603, - "learning_rate": 1.7909210136650167e-05, - "loss": 0.135, + "epoch": 0.6271891974211888, + "grad_norm": 0.3975540101528168, + "learning_rate": 1.5818738683858744e-05, + "loss": 0.0934, "step": 12355 }, { - "epoch": 0.31374539916233024, - "grad_norm": 1.9078925848007202, - "learning_rate": 1.7908364005584466e-05, - "loss": 0.1278, + "epoch": 0.6274430174120513, + "grad_norm": 0.430369108915329, + "learning_rate": 1.5817046550586327e-05, + "loss": 0.1002, "step": 12360 }, { - "epoch": 0.3138723188221856, - "grad_norm": 0.9514065980911255, - "learning_rate": 1.7907517874518764e-05, - "loss": 0.1564, + "epoch": 0.6276968374029138, + "grad_norm": 0.6089135408401489, + "learning_rate": 1.5815354417313907e-05, + "loss": 0.0862, "step": 12365 }, { - "epoch": 0.31399923848204087, - "grad_norm": 0.5917091965675354, - "learning_rate": 1.7906671743453062e-05, - "loss": 0.1268, + "epoch": 0.6279506573937763, + "grad_norm": 0.43870821595191956, + "learning_rate": 1.5813662284041494e-05, + "loss": 0.0919, "step": 12370 }, { - "epoch": 0.3141261581418962, - "grad_norm": 1.4226995706558228, - "learning_rate": 1.790582561238736e-05, - "loss": 0.1483, + "epoch": 0.6282044773846388, + "grad_norm": 0.49393364787101746, + "learning_rate": 1.5811970150769074e-05, + "loss": 0.0744, "step": 12375 }, { - "epoch": 0.3142530778017515, - "grad_norm": 0.6780105829238892, - "learning_rate": 1.790497948132166e-05, - "loss": 0.1109, + "epoch": 0.6284582973755013, + "grad_norm": 0.3724878132343292, + "learning_rate": 1.5810278017496658e-05, + "loss": 0.0882, "step": 12380 }, { - "epoch": 0.31437999746160683, - "grad_norm": 0.4586767852306366, - "learning_rate": 1.7904133350255958e-05, - "loss": 0.1298, + "epoch": 0.6287121173663638, + "grad_norm": 0.43538209795951843, + "learning_rate": 1.5808585884224245e-05, + "loss": 0.0975, "step": 12385 }, { - "epoch": 0.3145069171214621, - "grad_norm": 1.0541783571243286, - "learning_rate": 1.7903287219190256e-05, - "loss": 0.1298, + "epoch": 0.6289659373572263, + "grad_norm": 0.4566599130630493, + "learning_rate": 1.5806893750951825e-05, + "loss": 0.0894, "step": 12390 }, { - "epoch": 0.3146338367813174, - "grad_norm": 0.8873620629310608, - "learning_rate": 1.790244108812455e-05, - "loss": 0.1463, + "epoch": 0.6292197573480888, + "grad_norm": 0.5635015368461609, + "learning_rate": 1.580520161767941e-05, + "loss": 0.0965, "step": 12395 }, { - "epoch": 0.31476075644117274, - "grad_norm": 0.5752055048942566, - "learning_rate": 1.790159495705885e-05, - "loss": 0.1176, + "epoch": 0.6294735773389513, + "grad_norm": 0.3993977904319763, + "learning_rate": 1.5803509484406992e-05, + "loss": 0.0837, "step": 12400 }, { - "epoch": 0.314887676101028, - "grad_norm": 0.6977170705795288, - "learning_rate": 1.7900748825993148e-05, - "loss": 0.1245, + "epoch": 0.6297273973298136, + "grad_norm": 1.7102850675582886, + "learning_rate": 1.5801817351134576e-05, + "loss": 0.0964, "step": 12405 }, { - "epoch": 0.31501459576088336, - "grad_norm": 0.7732431292533875, - "learning_rate": 1.7899902694927446e-05, - "loss": 0.1113, + "epoch": 0.6299812173206761, + "grad_norm": 0.6190375089645386, + "learning_rate": 1.5800125217862163e-05, + "loss": 0.0833, "step": 12410 }, { - "epoch": 0.31514151542073865, - "grad_norm": 0.6753954887390137, - "learning_rate": 1.7899056563861745e-05, - "loss": 0.1389, + "epoch": 0.6302350373115386, + "grad_norm": 0.6037442684173584, + "learning_rate": 1.5798433084589743e-05, + "loss": 0.0892, "step": 12415 }, { - "epoch": 0.315268435080594, - "grad_norm": 0.7564544677734375, - "learning_rate": 1.7898210432796043e-05, - "loss": 0.1567, + "epoch": 0.6304888573024011, + "grad_norm": 0.36666348576545715, + "learning_rate": 1.5796740951317326e-05, + "loss": 0.0823, "step": 12420 }, { - "epoch": 0.3153953547404493, - "grad_norm": 0.6495887637138367, - "learning_rate": 1.789736430173034e-05, - "loss": 0.1373, + "epoch": 0.6307426772932636, + "grad_norm": 0.8224522471427917, + "learning_rate": 1.579504881804491e-05, + "loss": 0.0833, "step": 12425 }, { - "epoch": 0.3155222744003046, - "grad_norm": 0.8097955584526062, - "learning_rate": 1.789651817066464e-05, - "loss": 0.1095, + "epoch": 0.6309964972841261, + "grad_norm": 0.37060612440109253, + "learning_rate": 1.5793356684772493e-05, + "loss": 0.09, "step": 12430 }, { - "epoch": 0.3156491940601599, - "grad_norm": 0.5367494821548462, - "learning_rate": 1.7895672039598935e-05, - "loss": 0.1043, + "epoch": 0.6312503172749886, + "grad_norm": 0.362363338470459, + "learning_rate": 1.5791664551500077e-05, + "loss": 0.0871, "step": 12435 }, { - "epoch": 0.31577611372001524, - "grad_norm": 0.6050944924354553, - "learning_rate": 1.7894825908533233e-05, - "loss": 0.122, + "epoch": 0.6315041372658511, + "grad_norm": 0.4423457980155945, + "learning_rate": 1.578997241822766e-05, + "loss": 0.0924, "step": 12440 }, { - "epoch": 0.3159030333798705, - "grad_norm": 0.5090828537940979, - "learning_rate": 1.789397977746753e-05, - "loss": 0.1528, + "epoch": 0.6317579572567136, + "grad_norm": 0.3518649935722351, + "learning_rate": 1.5788280284955244e-05, + "loss": 0.0833, "step": 12445 }, { - "epoch": 0.31602995303972586, - "grad_norm": 0.6532463431358337, - "learning_rate": 1.789313364640183e-05, - "loss": 0.1168, + "epoch": 0.6320117772475761, + "grad_norm": 0.46395477652549744, + "learning_rate": 1.5786588151682828e-05, + "loss": 0.0922, "step": 12450 }, { - "epoch": 0.31615687269958115, - "grad_norm": 0.6214874982833862, - "learning_rate": 1.7892287515336125e-05, - "loss": 0.113, + "epoch": 0.6322655972384384, + "grad_norm": 0.34381961822509766, + "learning_rate": 1.578489601841041e-05, + "loss": 0.0995, "step": 12455 }, { - "epoch": 0.3162837923594365, - "grad_norm": 1.0586261749267578, - "learning_rate": 1.7891441384270423e-05, - "loss": 0.1021, + "epoch": 0.6325194172293009, + "grad_norm": 0.4519914388656616, + "learning_rate": 1.5783203885137995e-05, + "loss": 0.0853, "step": 12460 }, { - "epoch": 0.3164107120192918, - "grad_norm": 0.9258720874786377, - "learning_rate": 1.7890595253204722e-05, - "loss": 0.1387, + "epoch": 0.6327732372201634, + "grad_norm": 0.30855950713157654, + "learning_rate": 1.5781511751865578e-05, + "loss": 0.0788, "step": 12465 }, { - "epoch": 0.3165376316791471, - "grad_norm": 0.7597251534461975, - "learning_rate": 1.788974912213902e-05, - "loss": 0.1275, + "epoch": 0.6330270572110259, + "grad_norm": 0.4378630518913269, + "learning_rate": 1.5779819618593162e-05, + "loss": 0.0803, "step": 12470 }, { - "epoch": 0.3166645513390024, - "grad_norm": 0.4584490656852722, - "learning_rate": 1.788890299107332e-05, - "loss": 0.1095, + "epoch": 0.6332808772018884, + "grad_norm": 0.7794264554977417, + "learning_rate": 1.5778127485320745e-05, + "loss": 0.0947, "step": 12475 }, { - "epoch": 0.31679147099885774, - "grad_norm": 1.0131702423095703, - "learning_rate": 1.7888056860007617e-05, - "loss": 0.1694, + "epoch": 0.6335346971927509, + "grad_norm": 0.44606897234916687, + "learning_rate": 1.577643535204833e-05, + "loss": 0.0966, "step": 12480 }, { - "epoch": 0.316918390658713, - "grad_norm": 0.6071594953536987, - "learning_rate": 1.7887210728941915e-05, - "loss": 0.1275, + "epoch": 0.6337885171836134, + "grad_norm": 0.8295760750770569, + "learning_rate": 1.5774743218775912e-05, + "loss": 0.0973, "step": 12485 }, { - "epoch": 0.31704531031856836, - "grad_norm": 0.8175333142280579, - "learning_rate": 1.7886364597876214e-05, - "loss": 0.1384, + "epoch": 0.6340423371744759, + "grad_norm": 0.6100575923919678, + "learning_rate": 1.5773051085503496e-05, + "loss": 0.0763, "step": 12490 }, { - "epoch": 0.31717222997842365, - "grad_norm": 0.5272860527038574, - "learning_rate": 1.788551846681051e-05, - "loss": 0.1264, + "epoch": 0.6342961571653384, + "grad_norm": 0.43771716952323914, + "learning_rate": 1.577135895223108e-05, + "loss": 0.092, "step": 12495 }, { - "epoch": 0.317299149638279, - "grad_norm": 0.6092495322227478, - "learning_rate": 1.7884672335744807e-05, - "loss": 0.1295, + "epoch": 0.6345499771562009, + "grad_norm": 0.43595728278160095, + "learning_rate": 1.5769666818958663e-05, + "loss": 0.083, "step": 12500 }, { - "epoch": 0.3174260692981343, - "grad_norm": 0.9540702104568481, - "learning_rate": 1.7883826204679106e-05, - "loss": 0.1275, + "epoch": 0.6348037971470633, + "grad_norm": 0.5066717267036438, + "learning_rate": 1.5767974685686247e-05, + "loss": 0.0909, "step": 12505 }, { - "epoch": 0.3175529889579896, - "grad_norm": 0.44694605469703674, - "learning_rate": 1.7882980073613404e-05, - "loss": 0.1049, + "epoch": 0.6350576171379257, + "grad_norm": 0.42285871505737305, + "learning_rate": 1.576628255241383e-05, + "loss": 0.0894, "step": 12510 }, { - "epoch": 0.3176799086178449, - "grad_norm": 1.1299331188201904, - "learning_rate": 1.7882133942547702e-05, - "loss": 0.1366, + "epoch": 0.6353114371287882, + "grad_norm": 0.35143038630485535, + "learning_rate": 1.5764590419141414e-05, + "loss": 0.0883, "step": 12515 }, { - "epoch": 0.31780682827770024, - "grad_norm": 0.8791343569755554, - "learning_rate": 1.7881287811482e-05, - "loss": 0.1336, + "epoch": 0.6355652571196507, + "grad_norm": 0.36473914980888367, + "learning_rate": 1.5762898285868997e-05, + "loss": 0.091, "step": 12520 }, { - "epoch": 0.3179337479375555, - "grad_norm": 0.45984092354774475, - "learning_rate": 1.78804416804163e-05, - "loss": 0.1518, + "epoch": 0.6358190771105132, + "grad_norm": 0.4791051745414734, + "learning_rate": 1.576120615259658e-05, + "loss": 0.1023, "step": 12525 }, { - "epoch": 0.31806066759741086, - "grad_norm": 0.5226448774337769, - "learning_rate": 1.7879595549350598e-05, - "loss": 0.1296, + "epoch": 0.6360728971013757, + "grad_norm": 0.3315756916999817, + "learning_rate": 1.575951401932416e-05, + "loss": 0.0806, "step": 12530 }, { - "epoch": 0.31818758725726615, - "grad_norm": 0.5544032454490662, - "learning_rate": 1.7878749418284893e-05, - "loss": 0.1145, + "epoch": 0.6363267170922382, + "grad_norm": 0.3584679365158081, + "learning_rate": 1.5757821886051748e-05, + "loss": 0.0875, "step": 12535 }, { - "epoch": 0.3183145069171215, - "grad_norm": 0.6903852224349976, - "learning_rate": 1.787790328721919e-05, - "loss": 0.1101, + "epoch": 0.6365805370831007, + "grad_norm": 0.40165871381759644, + "learning_rate": 1.575612975277933e-05, + "loss": 0.0916, "step": 12540 }, { - "epoch": 0.31844142657697677, - "grad_norm": 0.5193667411804199, - "learning_rate": 1.787705715615349e-05, - "loss": 0.0955, + "epoch": 0.6368343570739632, + "grad_norm": 0.8896801471710205, + "learning_rate": 1.575443761950691e-05, + "loss": 0.0886, "step": 12545 }, { - "epoch": 0.3185683462368321, - "grad_norm": 0.664323091506958, - "learning_rate": 1.7876211025087788e-05, - "loss": 0.0921, + "epoch": 0.6370881770648257, + "grad_norm": 0.3733901083469391, + "learning_rate": 1.57527454862345e-05, + "loss": 0.0791, "step": 12550 }, { - "epoch": 0.3186952658966874, - "grad_norm": 1.2426121234893799, - "learning_rate": 1.7875364894022086e-05, - "loss": 0.1279, + "epoch": 0.6373419970556881, + "grad_norm": 0.43153393268585205, + "learning_rate": 1.575105335296208e-05, + "loss": 0.0894, "step": 12555 }, { - "epoch": 0.3188221855565427, - "grad_norm": 1.280501127243042, - "learning_rate": 1.7874518762956385e-05, - "loss": 0.1253, + "epoch": 0.6375958170465506, + "grad_norm": 0.6183637976646423, + "learning_rate": 1.5749361219689666e-05, + "loss": 0.0858, "step": 12560 }, { - "epoch": 0.318949105216398, - "grad_norm": 0.6945462822914124, - "learning_rate": 1.7873672631890683e-05, - "loss": 0.1267, + "epoch": 0.637849637037413, + "grad_norm": 0.4100203216075897, + "learning_rate": 1.574766908641725e-05, + "loss": 0.1033, "step": 12565 }, { - "epoch": 0.3190760248762533, - "grad_norm": 0.5747771859169006, - "learning_rate": 1.787282650082498e-05, - "loss": 0.1225, + "epoch": 0.6381034570282755, + "grad_norm": 0.5121060013771057, + "learning_rate": 1.574597695314483e-05, + "loss": 0.0868, "step": 12570 }, { - "epoch": 0.31920294453610865, - "grad_norm": 0.6856048703193665, - "learning_rate": 1.7871980369759277e-05, - "loss": 0.1325, + "epoch": 0.638357277019138, + "grad_norm": 1.0560269355773926, + "learning_rate": 1.5744284819872416e-05, + "loss": 0.09, "step": 12575 }, { - "epoch": 0.31932986419596393, - "grad_norm": 0.7884708642959595, - "learning_rate": 1.7871134238693575e-05, - "loss": 0.1021, + "epoch": 0.6386110970100005, + "grad_norm": 0.365310937166214, + "learning_rate": 1.5742592686599996e-05, + "loss": 0.0885, "step": 12580 }, { - "epoch": 0.31945678385581927, - "grad_norm": 0.38450977206230164, - "learning_rate": 1.7870288107627873e-05, - "loss": 0.1281, + "epoch": 0.638864917000863, + "grad_norm": 0.41774865984916687, + "learning_rate": 1.574090055332758e-05, + "loss": 0.0791, "step": 12585 }, { - "epoch": 0.31958370351567456, - "grad_norm": 0.48311489820480347, - "learning_rate": 1.7869441976562172e-05, - "loss": 0.1011, + "epoch": 0.6391187369917255, + "grad_norm": 0.4893551468849182, + "learning_rate": 1.5739208420055167e-05, + "loss": 0.0845, "step": 12590 }, { - "epoch": 0.3197106231755299, - "grad_norm": 0.5911639332771301, - "learning_rate": 1.7868595845496467e-05, - "loss": 0.1485, + "epoch": 0.639372556982588, + "grad_norm": 0.5731279850006104, + "learning_rate": 1.5737516286782747e-05, + "loss": 0.1017, "step": 12595 }, { - "epoch": 0.3198375428353852, - "grad_norm": 1.2138959169387817, - "learning_rate": 1.7867749714430765e-05, - "loss": 0.1223, + "epoch": 0.6396263769734505, + "grad_norm": 0.6109359860420227, + "learning_rate": 1.573582415351033e-05, + "loss": 0.0796, "step": 12600 }, { - "epoch": 0.3199644624952405, - "grad_norm": 0.896991491317749, - "learning_rate": 1.7866903583365064e-05, - "loss": 0.1149, + "epoch": 0.6398801969643129, + "grad_norm": 0.3426520526409149, + "learning_rate": 1.5734132020237914e-05, + "loss": 0.0999, "step": 12605 }, { - "epoch": 0.3200913821550958, - "grad_norm": 0.8059782981872559, - "learning_rate": 1.7866057452299362e-05, - "loss": 0.1487, + "epoch": 0.6401340169551754, + "grad_norm": 0.3924323618412018, + "learning_rate": 1.5732439886965498e-05, + "loss": 0.0845, "step": 12610 }, { - "epoch": 0.32021830181495115, - "grad_norm": 0.9824503660202026, - "learning_rate": 1.786521132123366e-05, - "loss": 0.1325, + "epoch": 0.6403878369460378, + "grad_norm": 1.8754016160964966, + "learning_rate": 1.5730747753693085e-05, + "loss": 0.1001, "step": 12615 }, { - "epoch": 0.32034522147480643, - "grad_norm": 0.636685848236084, - "learning_rate": 1.786436519016796e-05, - "loss": 0.0989, + "epoch": 0.6406416569369003, + "grad_norm": 0.3848492205142975, + "learning_rate": 1.5729055620420665e-05, + "loss": 0.0788, "step": 12620 }, { - "epoch": 0.32047214113466177, - "grad_norm": 0.4544721245765686, - "learning_rate": 1.7863519059102257e-05, - "loss": 0.1246, + "epoch": 0.6408954769277628, + "grad_norm": 0.671140730381012, + "learning_rate": 1.572736348714825e-05, + "loss": 0.0758, "step": 12625 }, { - "epoch": 0.32059906079451705, - "grad_norm": 0.47396227717399597, - "learning_rate": 1.7862672928036556e-05, - "loss": 0.1053, + "epoch": 0.6411492969186253, + "grad_norm": 0.38087940216064453, + "learning_rate": 1.5725671353875832e-05, + "loss": 0.0932, "step": 12630 }, { - "epoch": 0.3207259804543724, - "grad_norm": 0.6803264021873474, - "learning_rate": 1.786182679697085e-05, - "loss": 0.1104, + "epoch": 0.6414031169094878, + "grad_norm": 0.5257353782653809, + "learning_rate": 1.5723979220603415e-05, + "loss": 0.0882, "step": 12635 }, { - "epoch": 0.3208529001142277, - "grad_norm": 0.7021086812019348, - "learning_rate": 1.786098066590515e-05, - "loss": 0.1246, + "epoch": 0.6416569369003503, + "grad_norm": 0.35013777017593384, + "learning_rate": 1.5722287087331e-05, + "loss": 0.0856, "step": 12640 }, { - "epoch": 0.320979819774083, - "grad_norm": 0.7126387357711792, - "learning_rate": 1.7860134534839447e-05, - "loss": 0.1179, + "epoch": 0.6419107568912128, + "grad_norm": 0.4160863161087036, + "learning_rate": 1.5720594954058582e-05, + "loss": 0.0833, "step": 12645 }, { - "epoch": 0.3211067394339383, - "grad_norm": 1.0175765752792358, - "learning_rate": 1.7859288403773746e-05, - "loss": 0.153, + "epoch": 0.6421645768820752, + "grad_norm": 0.3413524031639099, + "learning_rate": 1.5718902820786166e-05, + "loss": 0.0802, "step": 12650 }, { - "epoch": 0.32123365909379364, - "grad_norm": 0.8803224563598633, - "learning_rate": 1.7858442272708044e-05, - "loss": 0.119, + "epoch": 0.6424183968729377, + "grad_norm": 0.5095176100730896, + "learning_rate": 1.571721068751375e-05, + "loss": 0.0931, "step": 12655 }, { - "epoch": 0.32136057875364893, - "grad_norm": 0.7037733197212219, - "learning_rate": 1.7857596141642343e-05, - "loss": 0.128, + "epoch": 0.6426722168638002, + "grad_norm": 0.45131492614746094, + "learning_rate": 1.5715518554241333e-05, + "loss": 0.0814, "step": 12660 }, { - "epoch": 0.32148749841350427, - "grad_norm": 1.149380087852478, - "learning_rate": 1.785675001057664e-05, - "loss": 0.1268, + "epoch": 0.6429260368546627, + "grad_norm": 0.4562227427959442, + "learning_rate": 1.5713826420968917e-05, + "loss": 0.0849, "step": 12665 }, { - "epoch": 0.32161441807335955, - "grad_norm": 0.6205734610557556, - "learning_rate": 1.785590387951094e-05, - "loss": 0.1084, + "epoch": 0.6431798568455251, + "grad_norm": 0.38386526703834534, + "learning_rate": 1.57121342876965e-05, + "loss": 0.0848, "step": 12670 }, { - "epoch": 0.3217413377332149, - "grad_norm": 1.5584102869033813, - "learning_rate": 1.7855057748445234e-05, - "loss": 0.1166, + "epoch": 0.6434336768363876, + "grad_norm": 0.5002121329307556, + "learning_rate": 1.5710442154424084e-05, + "loss": 0.1066, "step": 12675 }, { - "epoch": 0.3218682573930702, - "grad_norm": 0.644212007522583, - "learning_rate": 1.7854211617379533e-05, - "loss": 0.1027, + "epoch": 0.6436874968272501, + "grad_norm": 0.42015817761421204, + "learning_rate": 1.5708750021151667e-05, + "loss": 0.0995, "step": 12680 }, { - "epoch": 0.3219951770529255, - "grad_norm": 0.9415879249572754, - "learning_rate": 1.785336548631383e-05, - "loss": 0.1303, + "epoch": 0.6439413168181126, + "grad_norm": 0.3642427921295166, + "learning_rate": 1.570705788787925e-05, + "loss": 0.0811, "step": 12685 }, { - "epoch": 0.3221220967127808, - "grad_norm": 0.7308040261268616, - "learning_rate": 1.785251935524813e-05, - "loss": 0.118, + "epoch": 0.6441951368089751, + "grad_norm": 0.40864598751068115, + "learning_rate": 1.5705365754606834e-05, + "loss": 0.0896, "step": 12690 }, { - "epoch": 0.32224901637263614, - "grad_norm": 0.6291442513465881, - "learning_rate": 1.7851673224182428e-05, - "loss": 0.1109, + "epoch": 0.6444489567998376, + "grad_norm": 0.3729974925518036, + "learning_rate": 1.5703673621334418e-05, + "loss": 0.0944, "step": 12695 }, { - "epoch": 0.32237593603249143, - "grad_norm": 0.7900291085243225, - "learning_rate": 1.7850827093116726e-05, - "loss": 0.1164, + "epoch": 0.6447027767907, + "grad_norm": 0.8188928365707397, + "learning_rate": 1.5701981488062e-05, + "loss": 0.0831, "step": 12700 }, { - "epoch": 0.32250285569234677, - "grad_norm": 0.7580839395523071, - "learning_rate": 1.7849980962051025e-05, - "loss": 0.1179, + "epoch": 0.6449565967815625, + "grad_norm": 0.49652352929115295, + "learning_rate": 1.5700289354789585e-05, + "loss": 0.0879, "step": 12705 }, { - "epoch": 0.32262977535220205, - "grad_norm": 0.7359871864318848, - "learning_rate": 1.7849134830985323e-05, - "loss": 0.1207, + "epoch": 0.645210416772425, + "grad_norm": 0.5149166584014893, + "learning_rate": 1.569859722151717e-05, + "loss": 0.0998, "step": 12710 }, { - "epoch": 0.3227566950120574, - "grad_norm": 0.7279356122016907, - "learning_rate": 1.7848288699919618e-05, - "loss": 0.1303, + "epoch": 0.6454642367632875, + "grad_norm": 0.3833802044391632, + "learning_rate": 1.5696905088244752e-05, + "loss": 0.0943, "step": 12715 }, { - "epoch": 0.3228836146719127, - "grad_norm": 0.7038505673408508, - "learning_rate": 1.7847442568853917e-05, - "loss": 0.1034, + "epoch": 0.64571805675415, + "grad_norm": 0.5231676697731018, + "learning_rate": 1.5695212954972336e-05, + "loss": 0.0819, "step": 12720 }, { - "epoch": 0.323010534331768, - "grad_norm": 0.5795091986656189, - "learning_rate": 1.7846596437788215e-05, - "loss": 0.0934, + "epoch": 0.6459718767450124, + "grad_norm": 0.977109432220459, + "learning_rate": 1.569352082169992e-05, + "loss": 0.0956, "step": 12725 }, { - "epoch": 0.3231374539916233, - "grad_norm": 0.9701076745986938, - "learning_rate": 1.7845750306722513e-05, - "loss": 0.1217, + "epoch": 0.6462256967358749, + "grad_norm": 0.4414095878601074, + "learning_rate": 1.5691828688427503e-05, + "loss": 0.0873, "step": 12730 }, { - "epoch": 0.3232643736514786, - "grad_norm": 0.6891577839851379, - "learning_rate": 1.784490417565681e-05, - "loss": 0.1075, + "epoch": 0.6464795167267374, + "grad_norm": 0.46533313393592834, + "learning_rate": 1.5690136555155083e-05, + "loss": 0.0878, "step": 12735 }, { - "epoch": 0.3233912933113339, - "grad_norm": 0.8595738410949707, - "learning_rate": 1.7844058044591107e-05, - "loss": 0.1145, + "epoch": 0.6467333367175999, + "grad_norm": 0.4510734975337982, + "learning_rate": 1.568844442188267e-05, + "loss": 0.0954, "step": 12740 }, { - "epoch": 0.3235182129711892, - "grad_norm": 0.55492103099823, - "learning_rate": 1.7843211913525405e-05, - "loss": 0.1095, + "epoch": 0.6469871567084624, + "grad_norm": 0.43129807710647583, + "learning_rate": 1.5686752288610253e-05, + "loss": 0.0892, "step": 12745 }, { - "epoch": 0.32364513263104455, - "grad_norm": 0.5590693950653076, - "learning_rate": 1.7842365782459704e-05, - "loss": 0.1414, + "epoch": 0.6472409766993248, + "grad_norm": 0.5520406365394592, + "learning_rate": 1.5685060155337837e-05, + "loss": 0.0969, "step": 12750 }, { - "epoch": 0.32377205229089984, - "grad_norm": 0.5927556753158569, - "learning_rate": 1.7841519651394002e-05, - "loss": 0.1268, + "epoch": 0.6474947966901873, + "grad_norm": 0.40193498134613037, + "learning_rate": 1.568336802206542e-05, + "loss": 0.0808, "step": 12755 }, { - "epoch": 0.3238989719507552, - "grad_norm": 0.5598572492599487, - "learning_rate": 1.78406735203283e-05, - "loss": 0.1139, + "epoch": 0.6477486166810498, + "grad_norm": 0.3910532295703888, + "learning_rate": 1.5681675888793e-05, + "loss": 0.0952, "step": 12760 }, { - "epoch": 0.32402589161061046, - "grad_norm": 0.6423149108886719, - "learning_rate": 1.78398273892626e-05, - "loss": 0.1213, + "epoch": 0.6480024366719123, + "grad_norm": 0.3040367066860199, + "learning_rate": 1.5679983755520588e-05, + "loss": 0.0744, "step": 12765 }, { - "epoch": 0.3241528112704658, - "grad_norm": 0.7493904829025269, - "learning_rate": 1.7838981258196897e-05, - "loss": 0.1413, + "epoch": 0.6482562566627748, + "grad_norm": 0.34013864398002625, + "learning_rate": 1.567829162224817e-05, + "loss": 0.0849, "step": 12770 }, { - "epoch": 0.3242797309303211, - "grad_norm": 1.0229781866073608, - "learning_rate": 1.7838135127131192e-05, - "loss": 0.1342, + "epoch": 0.6485100766536372, + "grad_norm": 0.426899790763855, + "learning_rate": 1.567659948897575e-05, + "loss": 0.0898, "step": 12775 }, { - "epoch": 0.3244066505901764, - "grad_norm": 0.5058711767196655, - "learning_rate": 1.783728899606549e-05, - "loss": 0.1502, + "epoch": 0.6487638966444997, + "grad_norm": 0.32523438334465027, + "learning_rate": 1.5674907355703338e-05, + "loss": 0.0828, "step": 12780 }, { - "epoch": 0.3245335702500317, - "grad_norm": 0.5811285972595215, - "learning_rate": 1.783644286499979e-05, - "loss": 0.1115, + "epoch": 0.6490177166353622, + "grad_norm": 0.35754647850990295, + "learning_rate": 1.567321522243092e-05, + "loss": 0.0731, "step": 12785 }, { - "epoch": 0.32466048990988705, - "grad_norm": 0.6096453666687012, - "learning_rate": 1.7835596733934087e-05, - "loss": 0.1072, + "epoch": 0.6492715366262247, + "grad_norm": 0.45699095726013184, + "learning_rate": 1.5671523089158502e-05, + "loss": 0.0838, "step": 12790 }, { - "epoch": 0.32478740956974234, - "grad_norm": 0.4231071472167969, - "learning_rate": 1.7834750602868386e-05, - "loss": 0.1139, + "epoch": 0.6495253566170872, + "grad_norm": 0.41177016496658325, + "learning_rate": 1.566983095588609e-05, + "loss": 0.0801, "step": 12795 }, { - "epoch": 0.3249143292295977, - "grad_norm": 0.5225934386253357, - "learning_rate": 1.7833904471802684e-05, - "loss": 0.1465, + "epoch": 0.6497791766079496, + "grad_norm": 0.3860916793346405, + "learning_rate": 1.566813882261367e-05, + "loss": 0.0871, "step": 12800 }, { - "epoch": 0.32504124888945296, - "grad_norm": 0.787052571773529, - "learning_rate": 1.7833058340736983e-05, - "loss": 0.1573, + "epoch": 0.6500329965988121, + "grad_norm": 0.6945805549621582, + "learning_rate": 1.5666446689341256e-05, + "loss": 0.0896, "step": 12805 }, { - "epoch": 0.3251681685493083, - "grad_norm": 0.5038688778877258, - "learning_rate": 1.783221220967128e-05, - "loss": 0.1228, + "epoch": 0.6502868165896746, + "grad_norm": 0.3880050778388977, + "learning_rate": 1.5664754556068836e-05, + "loss": 0.0779, "step": 12810 }, { - "epoch": 0.3252950882091636, - "grad_norm": 0.8457798957824707, - "learning_rate": 1.7831366078605576e-05, - "loss": 0.1327, + "epoch": 0.6505406365805371, + "grad_norm": 0.689802348613739, + "learning_rate": 1.566306242279642e-05, + "loss": 0.0824, "step": 12815 }, { - "epoch": 0.3254220078690189, - "grad_norm": 0.6739228367805481, - "learning_rate": 1.7830519947539874e-05, - "loss": 0.1346, + "epoch": 0.6507944565713996, + "grad_norm": 0.5213664174079895, + "learning_rate": 1.5661370289524007e-05, + "loss": 0.0873, "step": 12820 }, { - "epoch": 0.3255489275288742, - "grad_norm": 0.6895453929901123, - "learning_rate": 1.7829673816474173e-05, - "loss": 0.1173, + "epoch": 0.651048276562262, + "grad_norm": 0.4796338975429535, + "learning_rate": 1.5659678156251587e-05, + "loss": 0.0889, "step": 12825 }, { - "epoch": 0.32567584718872955, - "grad_norm": 0.4593029022216797, - "learning_rate": 1.782882768540847e-05, - "loss": 0.1033, + "epoch": 0.6513020965531245, + "grad_norm": 0.4608859419822693, + "learning_rate": 1.565798602297917e-05, + "loss": 0.0846, "step": 12830 }, { - "epoch": 0.32580276684858483, - "grad_norm": 0.5381485223770142, - "learning_rate": 1.782798155434277e-05, - "loss": 0.117, + "epoch": 0.651555916543987, + "grad_norm": 0.3637705147266388, + "learning_rate": 1.5656293889706754e-05, + "loss": 0.0808, "step": 12835 }, { - "epoch": 0.3259296865084402, - "grad_norm": 0.8668895363807678, - "learning_rate": 1.7827135423277068e-05, - "loss": 0.1173, + "epoch": 0.6518097365348495, + "grad_norm": 0.45812270045280457, + "learning_rate": 1.5654601756434337e-05, + "loss": 0.0876, "step": 12840 }, { - "epoch": 0.32605660616829546, - "grad_norm": 0.5500327944755554, - "learning_rate": 1.7826289292211367e-05, - "loss": 0.1158, + "epoch": 0.652063556525712, + "grad_norm": 0.35677555203437805, + "learning_rate": 1.565290962316192e-05, + "loss": 0.0921, "step": 12845 }, { - "epoch": 0.3261835258281508, - "grad_norm": 0.6237538456916809, - "learning_rate": 1.7825443161145665e-05, - "loss": 0.1298, + "epoch": 0.6523173765165744, + "grad_norm": 0.6063011884689331, + "learning_rate": 1.5651217489889504e-05, + "loss": 0.0882, "step": 12850 }, { - "epoch": 0.3263104454880061, - "grad_norm": 1.034631371498108, - "learning_rate": 1.782459703007996e-05, - "loss": 0.152, + "epoch": 0.6525711965074369, + "grad_norm": 0.3067625164985657, + "learning_rate": 1.5649525356617088e-05, + "loss": 0.0763, "step": 12855 }, { - "epoch": 0.3264373651478614, - "grad_norm": 0.6142244338989258, - "learning_rate": 1.782375089901426e-05, - "loss": 0.123, + "epoch": 0.6528250164982994, + "grad_norm": 0.3955833315849304, + "learning_rate": 1.564783322334467e-05, + "loss": 0.0813, "step": 12860 }, { - "epoch": 0.3265642848077167, - "grad_norm": 0.6680431365966797, - "learning_rate": 1.7822904767948557e-05, - "loss": 0.1379, + "epoch": 0.6530788364891619, + "grad_norm": 0.4854811728000641, + "learning_rate": 1.5646141090072255e-05, + "loss": 0.0762, "step": 12865 }, { - "epoch": 0.32669120446757205, - "grad_norm": 0.7854471206665039, - "learning_rate": 1.7822058636882855e-05, - "loss": 0.1279, + "epoch": 0.6533326564800244, + "grad_norm": 0.3633703589439392, + "learning_rate": 1.564444895679984e-05, + "loss": 0.0891, "step": 12870 }, { - "epoch": 0.32681812412742733, - "grad_norm": 1.0621627569198608, - "learning_rate": 1.782121250581715e-05, - "loss": 0.1254, + "epoch": 0.6535864764708869, + "grad_norm": 0.41939306259155273, + "learning_rate": 1.5642756823527422e-05, + "loss": 0.0981, "step": 12875 }, { - "epoch": 0.3269450437872827, - "grad_norm": 0.5635915398597717, - "learning_rate": 1.782036637475145e-05, - "loss": 0.1282, + "epoch": 0.6538402964617493, + "grad_norm": 0.8128605484962463, + "learning_rate": 1.5641064690255006e-05, + "loss": 0.0942, "step": 12880 }, { - "epoch": 0.32707196344713796, - "grad_norm": 0.5175842046737671, - "learning_rate": 1.7819520243685747e-05, - "loss": 0.1192, + "epoch": 0.6540941164526118, + "grad_norm": 1.6264680624008179, + "learning_rate": 1.563937255698259e-05, + "loss": 0.0831, "step": 12885 }, { - "epoch": 0.3271988831069933, - "grad_norm": 1.6413469314575195, - "learning_rate": 1.7818674112620045e-05, - "loss": 0.1454, + "epoch": 0.6543479364434743, + "grad_norm": 0.35295000672340393, + "learning_rate": 1.5637680423710173e-05, + "loss": 0.0805, "step": 12890 }, { - "epoch": 0.3273258027668486, - "grad_norm": 0.6084355711936951, - "learning_rate": 1.7817827981554344e-05, - "loss": 0.1109, + "epoch": 0.6546017564343368, + "grad_norm": 0.327200323343277, + "learning_rate": 1.5635988290437756e-05, + "loss": 0.085, "step": 12895 }, { - "epoch": 0.3274527224267039, - "grad_norm": 0.7451075911521912, - "learning_rate": 1.7816981850488642e-05, - "loss": 0.1064, + "epoch": 0.6548555764251992, + "grad_norm": 0.37761542201042175, + "learning_rate": 1.563429615716534e-05, + "loss": 0.0808, "step": 12900 }, { - "epoch": 0.3275796420865592, - "grad_norm": 0.5941367745399475, - "learning_rate": 1.781613571942294e-05, - "loss": 0.1338, + "epoch": 0.6551093964160617, + "grad_norm": 0.39181774854660034, + "learning_rate": 1.5632604023892923e-05, + "loss": 0.0852, "step": 12905 }, { - "epoch": 0.3277065617464145, - "grad_norm": 1.2077457904815674, - "learning_rate": 1.781528958835724e-05, - "loss": 0.1406, + "epoch": 0.6553632164069242, + "grad_norm": 0.4610046148300171, + "learning_rate": 1.5630911890620507e-05, + "loss": 0.0861, "step": 12910 }, { - "epoch": 0.32783348140626983, - "grad_norm": 1.3040300607681274, - "learning_rate": 1.7814443457291537e-05, - "loss": 0.1213, + "epoch": 0.6556170363977867, + "grad_norm": 0.42793646454811096, + "learning_rate": 1.562921975734809e-05, + "loss": 0.0876, "step": 12915 }, { - "epoch": 0.3279604010661251, - "grad_norm": 0.4710959792137146, - "learning_rate": 1.7813597326225832e-05, - "loss": 0.1414, + "epoch": 0.6558708563886492, + "grad_norm": 0.6725565195083618, + "learning_rate": 1.5627527624075674e-05, + "loss": 0.1022, "step": 12920 }, { - "epoch": 0.32808732072598046, - "grad_norm": 0.46817776560783386, - "learning_rate": 1.781275119516013e-05, - "loss": 0.1316, + "epoch": 0.6561246763795117, + "grad_norm": 0.6574685573577881, + "learning_rate": 1.5625835490803258e-05, + "loss": 0.1023, "step": 12925 }, { - "epoch": 0.32821424038583574, - "grad_norm": 0.6481413245201111, - "learning_rate": 1.781190506409443e-05, - "loss": 0.1194, + "epoch": 0.6563784963703742, + "grad_norm": 0.45522236824035645, + "learning_rate": 1.562414335753084e-05, + "loss": 0.0794, "step": 12930 }, { - "epoch": 0.3283411600456911, - "grad_norm": 0.6798949241638184, - "learning_rate": 1.7811058933028728e-05, - "loss": 0.1162, + "epoch": 0.6566323163612366, + "grad_norm": 0.4538464844226837, + "learning_rate": 1.5622451224258425e-05, + "loss": 0.087, "step": 12935 }, { - "epoch": 0.32846807970554637, - "grad_norm": 0.6121481657028198, - "learning_rate": 1.7810212801963026e-05, - "loss": 0.1216, + "epoch": 0.6568861363520991, + "grad_norm": 0.6361369490623474, + "learning_rate": 1.5620759090986005e-05, + "loss": 0.0863, "step": 12940 }, { - "epoch": 0.3285949993654017, - "grad_norm": 0.4887334704399109, - "learning_rate": 1.7809366670897324e-05, - "loss": 0.0979, + "epoch": 0.6571399563429615, + "grad_norm": 0.47415581345558167, + "learning_rate": 1.5619066957713592e-05, + "loss": 0.0845, "step": 12945 }, { - "epoch": 0.328721919025257, - "grad_norm": 0.7898147106170654, - "learning_rate": 1.7808520539831623e-05, - "loss": 0.1218, + "epoch": 0.657393776333824, + "grad_norm": 0.35891467332839966, + "learning_rate": 1.5617374824441175e-05, + "loss": 0.0912, "step": 12950 }, { - "epoch": 0.32884883868511233, - "grad_norm": 0.8564327359199524, - "learning_rate": 1.780767440876592e-05, - "loss": 0.1015, + "epoch": 0.6576475963246865, + "grad_norm": 0.5842028260231018, + "learning_rate": 1.561568269116876e-05, + "loss": 0.0764, "step": 12955 }, { - "epoch": 0.3289757583449676, - "grad_norm": 0.6723572015762329, - "learning_rate": 1.7806828277700216e-05, - "loss": 0.1319, + "epoch": 0.657901416315549, + "grad_norm": 0.7354117035865784, + "learning_rate": 1.5613990557896342e-05, + "loss": 0.0835, "step": 12960 }, { - "epoch": 0.32910267800482296, - "grad_norm": 0.6533916592597961, - "learning_rate": 1.7805982146634515e-05, - "loss": 0.1405, + "epoch": 0.6581552363064115, + "grad_norm": 0.43807005882263184, + "learning_rate": 1.5612298424623923e-05, + "loss": 0.1034, "step": 12965 }, { - "epoch": 0.32922959766467824, - "grad_norm": 0.6955756545066833, - "learning_rate": 1.7805136015568813e-05, - "loss": 0.1152, + "epoch": 0.658409056297274, + "grad_norm": 0.5703946948051453, + "learning_rate": 1.561060629135151e-05, + "loss": 0.0799, "step": 12970 }, { - "epoch": 0.3293565173245336, - "grad_norm": 1.4541422128677368, - "learning_rate": 1.780428988450311e-05, - "loss": 0.1238, + "epoch": 0.6586628762881365, + "grad_norm": 0.29095369577407837, + "learning_rate": 1.5608914158079093e-05, + "loss": 0.0762, "step": 12975 }, { - "epoch": 0.32948343698438887, - "grad_norm": 0.6428455710411072, - "learning_rate": 1.780344375343741e-05, - "loss": 0.1221, + "epoch": 0.658916696278999, + "grad_norm": 0.4557504653930664, + "learning_rate": 1.5607222024806673e-05, + "loss": 0.0991, "step": 12980 }, { - "epoch": 0.3296103566442442, - "grad_norm": 0.5422447323799133, - "learning_rate": 1.7802597622371708e-05, - "loss": 0.1081, + "epoch": 0.6591705162698615, + "grad_norm": 0.5406267642974854, + "learning_rate": 1.560552989153426e-05, + "loss": 0.0837, "step": 12985 }, { - "epoch": 0.3297372763040995, - "grad_norm": 0.4620777368545532, - "learning_rate": 1.7801751491306007e-05, - "loss": 0.1086, + "epoch": 0.6594243362607239, + "grad_norm": 0.5688093304634094, + "learning_rate": 1.560383775826184e-05, + "loss": 0.0819, "step": 12990 }, { - "epoch": 0.32986419596395483, - "grad_norm": 0.8989139199256897, - "learning_rate": 1.7800905360240305e-05, - "loss": 0.1008, + "epoch": 0.6596781562515863, + "grad_norm": 0.4395672082901001, + "learning_rate": 1.5602145624989427e-05, + "loss": 0.0845, "step": 12995 }, { - "epoch": 0.3299911156238101, - "grad_norm": 0.5560199022293091, - "learning_rate": 1.78000592291746e-05, - "loss": 0.1303, + "epoch": 0.6599319762424488, + "grad_norm": 0.3437829315662384, + "learning_rate": 1.560045349171701e-05, + "loss": 0.0785, "step": 13000 }, { - "epoch": 0.33011803528366546, - "grad_norm": 0.48749473690986633, - "learning_rate": 1.77992130981089e-05, - "loss": 0.1047, + "epoch": 0.6601857962333113, + "grad_norm": 0.42482784390449524, + "learning_rate": 1.559876135844459e-05, + "loss": 0.0914, "step": 13005 }, { - "epoch": 0.33024495494352074, - "grad_norm": 1.1257989406585693, - "learning_rate": 1.7798366967043197e-05, - "loss": 0.1523, + "epoch": 0.6604396162241738, + "grad_norm": 0.4697912931442261, + "learning_rate": 1.5597069225172178e-05, + "loss": 0.0852, "step": 13010 }, { - "epoch": 0.3303718746033761, - "grad_norm": 1.0362420082092285, - "learning_rate": 1.7797520835977495e-05, - "loss": 0.1282, + "epoch": 0.6606934362150363, + "grad_norm": 0.36666378378868103, + "learning_rate": 1.5595377091899758e-05, + "loss": 0.0798, "step": 13015 }, { - "epoch": 0.33049879426323137, - "grad_norm": 0.7488190531730652, - "learning_rate": 1.779667470491179e-05, - "loss": 0.1127, + "epoch": 0.6609472562058988, + "grad_norm": 0.37574443221092224, + "learning_rate": 1.559368495862734e-05, + "loss": 0.0932, "step": 13020 }, { - "epoch": 0.3306257139230867, - "grad_norm": 1.1124886274337769, - "learning_rate": 1.779582857384609e-05, - "loss": 0.1311, + "epoch": 0.6612010761967613, + "grad_norm": 0.5501673221588135, + "learning_rate": 1.559199282535493e-05, + "loss": 0.0967, "step": 13025 }, { - "epoch": 0.330752633582942, - "grad_norm": 0.4866411089897156, - "learning_rate": 1.7794982442780387e-05, - "loss": 0.13, + "epoch": 0.6614548961876238, + "grad_norm": 0.48891526460647583, + "learning_rate": 1.559030069208251e-05, + "loss": 0.0814, "step": 13030 }, { - "epoch": 0.33087955324279733, - "grad_norm": 0.6996091604232788, - "learning_rate": 1.7794136311714685e-05, - "loss": 0.124, + "epoch": 0.6617087161784863, + "grad_norm": 0.40868237614631653, + "learning_rate": 1.5588608558810092e-05, + "loss": 0.0857, "step": 13035 }, { - "epoch": 0.3310064729026526, - "grad_norm": 0.4892304539680481, - "learning_rate": 1.7793290180648984e-05, - "loss": 0.1156, + "epoch": 0.6619625361693487, + "grad_norm": 0.5038067102432251, + "learning_rate": 1.5586916425537676e-05, + "loss": 0.0791, "step": 13040 }, { - "epoch": 0.33113339256250796, - "grad_norm": 0.43472906947135925, - "learning_rate": 1.7792444049583282e-05, - "loss": 0.123, + "epoch": 0.6622163561602111, + "grad_norm": 0.3892373740673065, + "learning_rate": 1.558522429226526e-05, + "loss": 0.0853, "step": 13045 }, { - "epoch": 0.33126031222236324, - "grad_norm": 0.9068819880485535, - "learning_rate": 1.779159791851758e-05, - "loss": 0.1287, + "epoch": 0.6624701761510736, + "grad_norm": 0.6677381992340088, + "learning_rate": 1.5583532158992846e-05, + "loss": 0.083, "step": 13050 }, { - "epoch": 0.3313872318822186, - "grad_norm": 0.9670745730400085, - "learning_rate": 1.779075178745188e-05, - "loss": 0.1381, + "epoch": 0.6627239961419361, + "grad_norm": 0.34850823879241943, + "learning_rate": 1.5581840025720426e-05, + "loss": 0.0779, "step": 13055 }, { - "epoch": 0.33151415154207386, - "grad_norm": 0.4483663737773895, - "learning_rate": 1.7789905656386174e-05, - "loss": 0.1226, + "epoch": 0.6629778161327986, + "grad_norm": 0.5405170321464539, + "learning_rate": 1.558014789244801e-05, + "loss": 0.076, "step": 13060 }, { - "epoch": 0.3316410712019292, - "grad_norm": 0.734273374080658, - "learning_rate": 1.7789059525320472e-05, - "loss": 0.1109, + "epoch": 0.6632316361236611, + "grad_norm": 0.4197944402694702, + "learning_rate": 1.5578455759175593e-05, + "loss": 0.0795, "step": 13065 }, { - "epoch": 0.3317679908617845, - "grad_norm": 0.6812616586685181, - "learning_rate": 1.778821339425477e-05, - "loss": 0.1142, + "epoch": 0.6634854561145236, + "grad_norm": 0.48564544320106506, + "learning_rate": 1.5576763625903177e-05, + "loss": 0.0815, "step": 13070 }, { - "epoch": 0.3318949105216398, - "grad_norm": 0.48626768589019775, - "learning_rate": 1.778736726318907e-05, - "loss": 0.1026, + "epoch": 0.6637392761053861, + "grad_norm": 0.37770646810531616, + "learning_rate": 1.557507149263076e-05, + "loss": 0.0891, "step": 13075 }, { - "epoch": 0.3320218301814951, - "grad_norm": 0.5359590649604797, - "learning_rate": 1.7786521132123368e-05, - "loss": 0.1167, + "epoch": 0.6639930960962486, + "grad_norm": 0.39739593863487244, + "learning_rate": 1.5573379359358344e-05, + "loss": 0.0833, "step": 13080 }, { - "epoch": 0.3321487498413504, - "grad_norm": 0.777076005935669, - "learning_rate": 1.7785675001057666e-05, - "loss": 0.1332, + "epoch": 0.6642469160871111, + "grad_norm": 0.3831949234008789, + "learning_rate": 1.5571687226085928e-05, + "loss": 0.0854, "step": 13085 }, { - "epoch": 0.33227566950120574, - "grad_norm": 0.9867997169494629, - "learning_rate": 1.7784828869991964e-05, - "loss": 0.1234, + "epoch": 0.6645007360779736, + "grad_norm": 0.3688269257545471, + "learning_rate": 1.556999509281351e-05, + "loss": 0.0814, "step": 13090 }, { - "epoch": 0.332402589161061, - "grad_norm": 0.8253533244132996, - "learning_rate": 1.7783982738926263e-05, - "loss": 0.1528, + "epoch": 0.6647545560688359, + "grad_norm": 0.3308967649936676, + "learning_rate": 1.5568302959541095e-05, + "loss": 0.0668, "step": 13095 }, { - "epoch": 0.33252950882091636, - "grad_norm": 0.7511464357376099, - "learning_rate": 1.7783136607860558e-05, - "loss": 0.1145, + "epoch": 0.6650083760596984, + "grad_norm": 0.665611743927002, + "learning_rate": 1.5566610826268678e-05, + "loss": 0.0919, "step": 13100 }, { - "epoch": 0.33265642848077165, - "grad_norm": 0.5364869236946106, - "learning_rate": 1.7782290476794856e-05, - "loss": 0.1106, + "epoch": 0.6652621960505609, + "grad_norm": 0.3787246346473694, + "learning_rate": 1.5564918692996262e-05, + "loss": 0.0869, "step": 13105 }, { - "epoch": 0.332783348140627, - "grad_norm": 0.6977491974830627, - "learning_rate": 1.7781444345729155e-05, - "loss": 0.1135, + "epoch": 0.6655160160414234, + "grad_norm": 0.5023996233940125, + "learning_rate": 1.5563226559723845e-05, + "loss": 0.0873, "step": 13110 }, { - "epoch": 0.3329102678004823, - "grad_norm": 0.41914114356040955, - "learning_rate": 1.7780598214663453e-05, - "loss": 0.1254, + "epoch": 0.6657698360322859, + "grad_norm": 0.36124274134635925, + "learning_rate": 1.556153442645143e-05, + "loss": 0.0768, "step": 13115 }, { - "epoch": 0.3330371874603376, - "grad_norm": 0.6170309782028198, - "learning_rate": 1.777975208359775e-05, - "loss": 0.1224, + "epoch": 0.6660236560231484, + "grad_norm": 0.7764711380004883, + "learning_rate": 1.5559842293179012e-05, + "loss": 0.083, "step": 13120 }, { - "epoch": 0.3331641071201929, - "grad_norm": 0.44137534499168396, - "learning_rate": 1.777890595253205e-05, - "loss": 0.117, + "epoch": 0.6662774760140109, + "grad_norm": 0.36708641052246094, + "learning_rate": 1.5558150159906596e-05, + "loss": 0.0899, "step": 13125 }, { - "epoch": 0.33329102678004824, - "grad_norm": 0.5025061368942261, - "learning_rate": 1.777805982146635e-05, - "loss": 0.1153, + "epoch": 0.6665312960048734, + "grad_norm": 0.45462819933891296, + "learning_rate": 1.555645802663418e-05, + "loss": 0.0968, "step": 13130 }, { - "epoch": 0.3334179464399035, - "grad_norm": 1.0764132738113403, - "learning_rate": 1.7777213690400647e-05, - "loss": 0.0959, + "epoch": 0.6667851159957359, + "grad_norm": 0.4756264388561249, + "learning_rate": 1.5554765893361763e-05, + "loss": 0.0873, "step": 13135 }, { - "epoch": 0.33354486609975886, - "grad_norm": 0.8904016017913818, - "learning_rate": 1.7776367559334942e-05, - "loss": 0.1391, + "epoch": 0.6670389359865984, + "grad_norm": 0.3898143470287323, + "learning_rate": 1.5553073760089347e-05, + "loss": 0.0862, "step": 13140 }, { - "epoch": 0.33367178575961415, - "grad_norm": 0.7472026348114014, - "learning_rate": 1.777552142826924e-05, - "loss": 0.1521, + "epoch": 0.6672927559774607, + "grad_norm": 0.32764703035354614, + "learning_rate": 1.555138162681693e-05, + "loss": 0.0877, "step": 13145 }, { - "epoch": 0.3337987054194695, - "grad_norm": 0.6194224953651428, - "learning_rate": 1.777467529720354e-05, - "loss": 0.1056, + "epoch": 0.6675465759683232, + "grad_norm": 0.5033909678459167, + "learning_rate": 1.5549689493544514e-05, + "loss": 0.0812, "step": 13150 }, { - "epoch": 0.3339256250793248, - "grad_norm": 0.39206573367118835, - "learning_rate": 1.7773829166137837e-05, - "loss": 0.0944, + "epoch": 0.6678003959591857, + "grad_norm": 0.42792779207229614, + "learning_rate": 1.5547997360272097e-05, + "loss": 0.0917, "step": 13155 }, { - "epoch": 0.3340525447391801, - "grad_norm": 0.6348617076873779, - "learning_rate": 1.7772983035072132e-05, - "loss": 0.1299, + "epoch": 0.6680542159500482, + "grad_norm": 0.37381792068481445, + "learning_rate": 1.554630522699968e-05, + "loss": 0.0796, "step": 13160 }, { - "epoch": 0.3341794643990354, - "grad_norm": 0.6465216875076294, - "learning_rate": 1.777213690400643e-05, - "loss": 0.1343, + "epoch": 0.6683080359409107, + "grad_norm": 0.4677964448928833, + "learning_rate": 1.5544613093727264e-05, + "loss": 0.0892, "step": 13165 }, { - "epoch": 0.33430638405889074, - "grad_norm": 0.49467864632606506, - "learning_rate": 1.777129077294073e-05, - "loss": 0.112, + "epoch": 0.6685618559317732, + "grad_norm": 0.36071425676345825, + "learning_rate": 1.5542920960454845e-05, + "loss": 0.0961, "step": 13170 }, { - "epoch": 0.334433303718746, - "grad_norm": 0.4545252323150635, - "learning_rate": 1.7770444641875027e-05, - "loss": 0.1069, + "epoch": 0.6688156759226357, + "grad_norm": 0.41257521510124207, + "learning_rate": 1.554122882718243e-05, + "loss": 0.0826, "step": 13175 }, { - "epoch": 0.33456022337860136, - "grad_norm": 1.1407583951950073, - "learning_rate": 1.7769598510809326e-05, - "loss": 0.1036, + "epoch": 0.6690694959134982, + "grad_norm": 0.48096606135368347, + "learning_rate": 1.5539536693910015e-05, + "loss": 0.0819, "step": 13180 }, { - "epoch": 0.33468714303845665, - "grad_norm": 0.9785552024841309, - "learning_rate": 1.7768752379743624e-05, - "loss": 0.1114, + "epoch": 0.6693233159043607, + "grad_norm": 0.3370281755924225, + "learning_rate": 1.5537844560637595e-05, + "loss": 0.0878, "step": 13185 }, { - "epoch": 0.334814062698312, - "grad_norm": 0.8269959688186646, - "learning_rate": 1.7767906248677922e-05, - "loss": 0.1146, + "epoch": 0.669577135895223, + "grad_norm": 0.4116274416446686, + "learning_rate": 1.5536152427365182e-05, + "loss": 0.0878, "step": 13190 }, { - "epoch": 0.33494098235816727, - "grad_norm": 0.6583883166313171, - "learning_rate": 1.776706011761222e-05, - "loss": 0.1387, + "epoch": 0.6698309558860855, + "grad_norm": 0.5579774975776672, + "learning_rate": 1.5534460294092762e-05, + "loss": 0.0886, "step": 13195 }, { - "epoch": 0.3350679020180226, - "grad_norm": 1.5872074365615845, - "learning_rate": 1.7766213986546516e-05, - "loss": 0.1143, + "epoch": 0.670084775876948, + "grad_norm": 0.5344901084899902, + "learning_rate": 1.553276816082035e-05, + "loss": 0.0756, "step": 13200 }, { - "epoch": 0.3351948216778779, - "grad_norm": 0.83211749792099, - "learning_rate": 1.7765367855480814e-05, - "loss": 0.1168, + "epoch": 0.6703385958678105, + "grad_norm": 0.5055976510047913, + "learning_rate": 1.5531076027547933e-05, + "loss": 0.097, "step": 13205 }, { - "epoch": 0.33532174133773324, - "grad_norm": 0.7151371836662292, - "learning_rate": 1.7764521724415113e-05, - "loss": 0.1019, + "epoch": 0.670592415858673, + "grad_norm": 0.3117898106575012, + "learning_rate": 1.5529383894275513e-05, + "loss": 0.0839, "step": 13210 }, { - "epoch": 0.3354486609975885, - "grad_norm": 0.694428563117981, - "learning_rate": 1.776367559334941e-05, - "loss": 0.1101, + "epoch": 0.6708462358495355, + "grad_norm": 0.4203091561794281, + "learning_rate": 1.55276917610031e-05, + "loss": 0.0738, "step": 13215 }, { - "epoch": 0.33557558065744386, - "grad_norm": 0.45481351017951965, - "learning_rate": 1.776282946228371e-05, - "loss": 0.099, + "epoch": 0.671100055840398, + "grad_norm": 0.975365936756134, + "learning_rate": 1.552599962773068e-05, + "loss": 0.0877, "step": 13220 }, { - "epoch": 0.33570250031729915, - "grad_norm": 0.6167437434196472, - "learning_rate": 1.7761983331218008e-05, - "loss": 0.1221, + "epoch": 0.6713538758312605, + "grad_norm": 0.400333970785141, + "learning_rate": 1.5524307494458263e-05, + "loss": 0.0679, "step": 13225 }, { - "epoch": 0.3358294199771545, - "grad_norm": 0.8407954573631287, - "learning_rate": 1.7761137200152306e-05, - "loss": 0.1168, + "epoch": 0.671607695822123, + "grad_norm": 0.42872706055641174, + "learning_rate": 1.552261536118585e-05, + "loss": 0.0807, "step": 13230 }, { - "epoch": 0.33595633963700977, - "grad_norm": 0.6753121614456177, - "learning_rate": 1.7760291069086605e-05, - "loss": 0.1258, + "epoch": 0.6718615158129855, + "grad_norm": 0.6446133852005005, + "learning_rate": 1.552092322791343e-05, + "loss": 0.0725, "step": 13235 }, { - "epoch": 0.3360832592968651, - "grad_norm": 0.7265034317970276, - "learning_rate": 1.77594449380209e-05, - "loss": 0.1167, + "epoch": 0.6721153358038479, + "grad_norm": 0.5523983836174011, + "learning_rate": 1.5519231094641018e-05, + "loss": 0.0851, "step": 13240 }, { - "epoch": 0.3362101789567204, - "grad_norm": 2.569481611251831, - "learning_rate": 1.7758598806955198e-05, - "loss": 0.1508, + "epoch": 0.6723691557947103, + "grad_norm": 0.329487681388855, + "learning_rate": 1.5517538961368598e-05, + "loss": 0.0744, "step": 13245 }, { - "epoch": 0.3363370986165757, - "grad_norm": 0.8065722584724426, - "learning_rate": 1.7757752675889496e-05, - "loss": 0.1302, + "epoch": 0.6726229757855728, + "grad_norm": 0.38870227336883545, + "learning_rate": 1.551584682809618e-05, + "loss": 0.0739, "step": 13250 }, { - "epoch": 0.336464018276431, - "grad_norm": 0.5631741881370544, - "learning_rate": 1.7756906544823795e-05, - "loss": 0.127, + "epoch": 0.6728767957764353, + "grad_norm": 0.47514641284942627, + "learning_rate": 1.5514154694823768e-05, + "loss": 0.0765, "step": 13255 }, { - "epoch": 0.3365909379362863, - "grad_norm": 0.542319655418396, - "learning_rate": 1.7756060413758093e-05, - "loss": 0.1332, + "epoch": 0.6731306157672978, + "grad_norm": 0.40743643045425415, + "learning_rate": 1.551246256155135e-05, + "loss": 0.0772, "step": 13260 }, { - "epoch": 0.33671785759614165, - "grad_norm": 0.49833816289901733, - "learning_rate": 1.775521428269239e-05, - "loss": 0.1197, + "epoch": 0.6733844357581603, + "grad_norm": 0.4392092525959015, + "learning_rate": 1.5510770428278932e-05, + "loss": 0.094, "step": 13265 }, { - "epoch": 0.33684477725599693, - "grad_norm": 0.8954143524169922, - "learning_rate": 1.775436815162669e-05, - "loss": 0.1195, + "epoch": 0.6736382557490228, + "grad_norm": 0.46465203166007996, + "learning_rate": 1.5509078295006515e-05, + "loss": 0.0869, "step": 13270 }, { - "epoch": 0.33697169691585227, - "grad_norm": 0.9539107084274292, - "learning_rate": 1.775352202056099e-05, - "loss": 0.1388, + "epoch": 0.6738920757398853, + "grad_norm": 0.5030555725097656, + "learning_rate": 1.55073861617341e-05, + "loss": 0.0846, "step": 13275 }, { - "epoch": 0.33709861657570755, - "grad_norm": 1.2798664569854736, - "learning_rate": 1.7752675889495283e-05, - "loss": 0.1264, + "epoch": 0.6741458957307478, + "grad_norm": 0.36062154173851013, + "learning_rate": 1.5505694028461682e-05, + "loss": 0.0826, "step": 13280 }, { - "epoch": 0.3372255362355629, - "grad_norm": 0.5872482061386108, - "learning_rate": 1.7751829758429582e-05, - "loss": 0.1294, + "epoch": 0.6743997157216103, + "grad_norm": 0.29261308908462524, + "learning_rate": 1.5504001895189266e-05, + "loss": 0.0802, "step": 13285 }, { - "epoch": 0.3373524558954182, - "grad_norm": 0.5831118226051331, - "learning_rate": 1.775098362736388e-05, - "loss": 0.1001, + "epoch": 0.6746535357124727, + "grad_norm": 0.40803733468055725, + "learning_rate": 1.550230976191685e-05, + "loss": 0.0844, "step": 13290 }, { - "epoch": 0.3374793755552735, - "grad_norm": 0.6712405681610107, - "learning_rate": 1.775013749629818e-05, - "loss": 0.1043, + "epoch": 0.6749073557033352, + "grad_norm": 0.4190680980682373, + "learning_rate": 1.5500617628644433e-05, + "loss": 0.0813, "step": 13295 }, { - "epoch": 0.3376062952151288, - "grad_norm": 2.349065065383911, - "learning_rate": 1.7749291365232474e-05, - "loss": 0.1242, + "epoch": 0.6751611756941976, + "grad_norm": 0.4443584084510803, + "learning_rate": 1.5498925495372017e-05, + "loss": 0.0856, "step": 13300 }, { - "epoch": 0.33773321487498414, - "grad_norm": 0.7075605988502502, - "learning_rate": 1.7748445234166772e-05, - "loss": 0.1283, + "epoch": 0.6754149956850601, + "grad_norm": 0.3262367248535156, + "learning_rate": 1.54972333620996e-05, + "loss": 0.087, "step": 13305 }, { - "epoch": 0.33786013453483943, - "grad_norm": 0.5350863933563232, - "learning_rate": 1.774759910310107e-05, - "loss": 0.0991, + "epoch": 0.6756688156759226, + "grad_norm": 1.0407583713531494, + "learning_rate": 1.5495541228827184e-05, + "loss": 0.0806, "step": 13310 }, { - "epoch": 0.33798705419469477, - "grad_norm": 0.6154424548149109, - "learning_rate": 1.774675297203537e-05, - "loss": 0.1213, + "epoch": 0.6759226356667851, + "grad_norm": 0.5855976939201355, + "learning_rate": 1.5493849095554767e-05, + "loss": 0.0895, "step": 13315 }, { - "epoch": 0.33811397385455005, - "grad_norm": 0.5377549529075623, - "learning_rate": 1.7745906840969667e-05, - "loss": 0.0845, + "epoch": 0.6761764556576476, + "grad_norm": 0.6860768795013428, + "learning_rate": 1.549215696228235e-05, + "loss": 0.0732, "step": 13320 }, { - "epoch": 0.3382408935144054, - "grad_norm": 0.4775916635990143, - "learning_rate": 1.7745060709903966e-05, - "loss": 0.1326, + "epoch": 0.6764302756485101, + "grad_norm": 0.5066936612129211, + "learning_rate": 1.5490464829009934e-05, + "loss": 0.086, "step": 13325 }, { - "epoch": 0.3383678131742607, - "grad_norm": 0.979779839515686, - "learning_rate": 1.7744214578838264e-05, - "loss": 0.1311, + "epoch": 0.6766840956393726, + "grad_norm": 0.49966979026794434, + "learning_rate": 1.5488772695737518e-05, + "loss": 0.0848, "step": 13330 }, { - "epoch": 0.338494732834116, - "grad_norm": 0.5579276084899902, - "learning_rate": 1.7743368447772562e-05, - "loss": 0.1269, + "epoch": 0.6769379156302351, + "grad_norm": 0.41109582781791687, + "learning_rate": 1.54870805624651e-05, + "loss": 0.0834, "step": 13335 }, { - "epoch": 0.3386216524939713, - "grad_norm": 1.0007121562957764, - "learning_rate": 1.7742522316706857e-05, - "loss": 0.1121, + "epoch": 0.6771917356210975, + "grad_norm": 0.6102433204650879, + "learning_rate": 1.5485388429192685e-05, + "loss": 0.1299, "step": 13340 }, { - "epoch": 0.33874857215382664, - "grad_norm": 0.6112388372421265, - "learning_rate": 1.7741676185641156e-05, - "loss": 0.0947, + "epoch": 0.67744555561196, + "grad_norm": 0.39649656414985657, + "learning_rate": 1.548369629592027e-05, + "loss": 0.0946, "step": 13345 }, { - "epoch": 0.33887549181368193, - "grad_norm": 1.1045678853988647, - "learning_rate": 1.7740830054575454e-05, - "loss": 0.1098, + "epoch": 0.6776993756028225, + "grad_norm": 0.375001460313797, + "learning_rate": 1.5482004162647852e-05, + "loss": 0.0756, "step": 13350 }, { - "epoch": 0.33900241147353727, - "grad_norm": 1.0759145021438599, - "learning_rate": 1.7739983923509753e-05, - "loss": 0.1144, + "epoch": 0.6779531955936849, + "grad_norm": 0.4396669268608093, + "learning_rate": 1.5480312029375436e-05, + "loss": 0.0791, "step": 13355 }, { - "epoch": 0.33912933113339255, - "grad_norm": 0.7216346859931946, - "learning_rate": 1.773913779244405e-05, - "loss": 0.1331, + "epoch": 0.6782070155845474, + "grad_norm": 0.6256466507911682, + "learning_rate": 1.547861989610302e-05, + "loss": 0.0827, "step": 13360 }, { - "epoch": 0.3392562507932479, - "grad_norm": 2.0786614418029785, - "learning_rate": 1.773829166137835e-05, - "loss": 0.1466, + "epoch": 0.6784608355754099, + "grad_norm": 0.39854422211647034, + "learning_rate": 1.5476927762830603e-05, + "loss": 0.0791, "step": 13365 }, { - "epoch": 0.3393831704531032, - "grad_norm": 0.720319390296936, - "learning_rate": 1.7737445530312648e-05, - "loss": 0.1136, + "epoch": 0.6787146555662724, + "grad_norm": 0.6447271108627319, + "learning_rate": 1.5475235629558186e-05, + "loss": 0.0898, "step": 13370 }, { - "epoch": 0.3395100901129585, - "grad_norm": 0.37978672981262207, - "learning_rate": 1.7736599399246946e-05, - "loss": 0.1017, + "epoch": 0.6789684755571349, + "grad_norm": 0.3512633740901947, + "learning_rate": 1.5473543496285766e-05, + "loss": 0.0914, "step": 13375 }, { - "epoch": 0.3396370097728138, - "grad_norm": 0.6362811326980591, - "learning_rate": 1.773575326818124e-05, - "loss": 0.1259, + "epoch": 0.6792222955479974, + "grad_norm": 0.37331703305244446, + "learning_rate": 1.5471851363013353e-05, + "loss": 0.0822, "step": 13380 }, { - "epoch": 0.33976392943266914, - "grad_norm": 0.8065671920776367, - "learning_rate": 1.773490713711554e-05, - "loss": 0.1085, + "epoch": 0.6794761155388599, + "grad_norm": 0.5161044001579285, + "learning_rate": 1.5470159229740937e-05, + "loss": 0.0886, "step": 13385 }, { - "epoch": 0.3398908490925244, - "grad_norm": 0.7643458247184753, - "learning_rate": 1.7734061006049838e-05, - "loss": 0.112, + "epoch": 0.6797299355297223, + "grad_norm": 0.3599933981895447, + "learning_rate": 1.546846709646852e-05, + "loss": 0.081, "step": 13390 }, { - "epoch": 0.34001776875237977, - "grad_norm": 0.5762065052986145, - "learning_rate": 1.7733214874984137e-05, - "loss": 0.1129, + "epoch": 0.6799837555205848, + "grad_norm": 0.4737204313278198, + "learning_rate": 1.5466774963196104e-05, + "loss": 0.0925, "step": 13395 }, { - "epoch": 0.34014468841223505, - "grad_norm": 0.4323127865791321, - "learning_rate": 1.7732368743918435e-05, - "loss": 0.1306, + "epoch": 0.6802375755114473, + "grad_norm": 0.46349260210990906, + "learning_rate": 1.5465082829923684e-05, + "loss": 0.0966, "step": 13400 }, { - "epoch": 0.3402716080720904, - "grad_norm": 1.2943257093429565, - "learning_rate": 1.7731522612852733e-05, - "loss": 0.1103, + "epoch": 0.6804913955023097, + "grad_norm": 0.4162529706954956, + "learning_rate": 1.546339069665127e-05, + "loss": 0.0854, "step": 13405 }, { - "epoch": 0.3403985277319457, - "grad_norm": 0.9502193331718445, - "learning_rate": 1.7730676481787032e-05, - "loss": 0.1344, + "epoch": 0.6807452154931722, + "grad_norm": 0.3948574662208557, + "learning_rate": 1.5461698563378855e-05, + "loss": 0.0883, "step": 13410 }, { - "epoch": 0.340525447391801, - "grad_norm": 1.1606521606445312, - "learning_rate": 1.772983035072133e-05, - "loss": 0.1023, + "epoch": 0.6809990354840347, + "grad_norm": 0.4505586624145508, + "learning_rate": 1.5460006430106435e-05, + "loss": 0.0968, "step": 13415 }, { - "epoch": 0.3406523670516563, - "grad_norm": 0.6914339661598206, - "learning_rate": 1.772898421965563e-05, - "loss": 0.1, + "epoch": 0.6812528554748972, + "grad_norm": 0.42507103085517883, + "learning_rate": 1.5458314296834022e-05, + "loss": 0.0841, "step": 13420 }, { - "epoch": 0.3407792867115116, - "grad_norm": 0.8827507495880127, - "learning_rate": 1.7728138088589924e-05, - "loss": 0.1221, + "epoch": 0.6815066754657597, + "grad_norm": 0.5045070648193359, + "learning_rate": 1.5456622163561602e-05, + "loss": 0.0894, "step": 13425 }, { - "epoch": 0.3409062063713669, - "grad_norm": 0.7411719560623169, - "learning_rate": 1.7727291957524222e-05, - "loss": 0.1512, + "epoch": 0.6817604954566222, + "grad_norm": 0.4296087622642517, + "learning_rate": 1.5454930030289185e-05, + "loss": 0.0866, "step": 13430 }, { - "epoch": 0.3410331260312222, - "grad_norm": 0.5889956951141357, - "learning_rate": 1.772644582645852e-05, - "loss": 0.106, + "epoch": 0.6820143154474847, + "grad_norm": 0.40591058135032654, + "learning_rate": 1.5453237897016772e-05, + "loss": 0.0887, "step": 13435 }, { - "epoch": 0.34116004569107755, - "grad_norm": 0.892516016960144, - "learning_rate": 1.772559969539282e-05, - "loss": 0.1134, + "epoch": 0.6822681354383471, + "grad_norm": 0.2938343286514282, + "learning_rate": 1.5451545763744353e-05, + "loss": 0.0783, "step": 13440 }, { - "epoch": 0.34128696535093284, - "grad_norm": 0.5107155442237854, - "learning_rate": 1.7724753564327114e-05, - "loss": 0.1302, + "epoch": 0.6825219554292096, + "grad_norm": 0.41390931606292725, + "learning_rate": 1.544985363047194e-05, + "loss": 0.0865, "step": 13445 }, { - "epoch": 0.3414138850107882, - "grad_norm": 0.5132307410240173, - "learning_rate": 1.7723907433261412e-05, - "loss": 0.1163, + "epoch": 0.6827757754200721, + "grad_norm": 1.899959683418274, + "learning_rate": 1.544816149719952e-05, + "loss": 0.0849, "step": 13450 }, { - "epoch": 0.34154080467064346, - "grad_norm": 0.9214804768562317, - "learning_rate": 1.772306130219571e-05, - "loss": 0.1054, + "epoch": 0.6830295954109346, + "grad_norm": 0.6087545156478882, + "learning_rate": 1.5446469363927103e-05, + "loss": 0.0885, "step": 13455 }, { - "epoch": 0.3416677243304988, - "grad_norm": 0.4891313910484314, - "learning_rate": 1.772221517113001e-05, - "loss": 0.0969, + "epoch": 0.683283415401797, + "grad_norm": 0.42178311944007874, + "learning_rate": 1.5444777230654687e-05, + "loss": 0.0784, "step": 13460 }, { - "epoch": 0.3417946439903541, - "grad_norm": 0.657814085483551, - "learning_rate": 1.7721369040064307e-05, - "loss": 0.1245, + "epoch": 0.6835372353926595, + "grad_norm": 0.3378629684448242, + "learning_rate": 1.544308509738227e-05, + "loss": 0.0891, "step": 13465 }, { - "epoch": 0.3419215636502094, - "grad_norm": 0.8914409875869751, - "learning_rate": 1.7720522908998606e-05, - "loss": 0.1308, + "epoch": 0.683791055383522, + "grad_norm": 0.45582592487335205, + "learning_rate": 1.5441392964109854e-05, + "loss": 0.0785, "step": 13470 }, { - "epoch": 0.3420484833100647, - "grad_norm": 0.6880514025688171, - "learning_rate": 1.7719676777932904e-05, - "loss": 0.1094, + "epoch": 0.6840448753743845, + "grad_norm": 0.4021526277065277, + "learning_rate": 1.5439700830837437e-05, + "loss": 0.0861, "step": 13475 }, { - "epoch": 0.34217540296992005, - "grad_norm": 0.5405988097190857, - "learning_rate": 1.7718830646867203e-05, - "loss": 0.1001, + "epoch": 0.684298695365247, + "grad_norm": 0.42688125371932983, + "learning_rate": 1.543800869756502e-05, + "loss": 0.0804, "step": 13480 }, { - "epoch": 0.34230232262977534, - "grad_norm": 0.720170795917511, - "learning_rate": 1.7717984515801498e-05, - "loss": 0.1029, + "epoch": 0.6845525153561094, + "grad_norm": 0.37077417969703674, + "learning_rate": 1.5436316564292604e-05, + "loss": 0.0765, "step": 13485 }, { - "epoch": 0.3424292422896307, - "grad_norm": 0.5017971396446228, - "learning_rate": 1.7717138384735796e-05, - "loss": 0.1168, + "epoch": 0.6848063353469719, + "grad_norm": 0.3427707850933075, + "learning_rate": 1.5434624431020188e-05, + "loss": 0.0857, "step": 13490 }, { - "epoch": 0.34255616194948596, - "grad_norm": 0.7826741933822632, - "learning_rate": 1.7716292253670094e-05, - "loss": 0.0961, + "epoch": 0.6850601553378344, + "grad_norm": 0.4085235297679901, + "learning_rate": 1.543293229774777e-05, + "loss": 0.088, "step": 13495 }, { - "epoch": 0.3426830816093413, - "grad_norm": 0.9121469259262085, - "learning_rate": 1.7715446122604393e-05, - "loss": 0.1211, + "epoch": 0.6853139753286969, + "grad_norm": 0.38972538709640503, + "learning_rate": 1.5431240164475355e-05, + "loss": 0.0877, "step": 13500 }, { - "epoch": 0.3428100012691966, - "grad_norm": 1.3908172845840454, - "learning_rate": 1.771459999153869e-05, - "loss": 0.117, + "epoch": 0.6855677953195594, + "grad_norm": 0.35393092036247253, + "learning_rate": 1.542954803120294e-05, + "loss": 0.0827, "step": 13505 }, { - "epoch": 0.3429369209290519, - "grad_norm": 0.6833902597427368, - "learning_rate": 1.771375386047299e-05, - "loss": 0.1148, + "epoch": 0.6858216153104219, + "grad_norm": 0.4118354916572571, + "learning_rate": 1.5427855897930522e-05, + "loss": 0.0786, "step": 13510 }, { - "epoch": 0.3430638405889072, - "grad_norm": 0.5564882159233093, - "learning_rate": 1.7712907729407288e-05, - "loss": 0.1226, + "epoch": 0.6860754353012843, + "grad_norm": 0.5753445625305176, + "learning_rate": 1.5426163764658106e-05, + "loss": 0.0815, "step": 13515 }, { - "epoch": 0.34319076024876255, - "grad_norm": 0.6977376937866211, - "learning_rate": 1.7712061598341586e-05, - "loss": 0.0964, + "epoch": 0.6863292552921468, + "grad_norm": 0.48167121410369873, + "learning_rate": 1.542447163138569e-05, + "loss": 0.0914, "step": 13520 }, { - "epoch": 0.34331767990861783, - "grad_norm": 0.6176235675811768, - "learning_rate": 1.771121546727588e-05, - "loss": 0.1311, + "epoch": 0.6865830752830093, + "grad_norm": 0.48669248819351196, + "learning_rate": 1.5422779498113273e-05, + "loss": 0.0841, "step": 13525 }, { - "epoch": 0.3434445995684732, - "grad_norm": 0.5135802030563354, - "learning_rate": 1.771036933621018e-05, - "loss": 0.1097, + "epoch": 0.6868368952738718, + "grad_norm": 0.40434154868125916, + "learning_rate": 1.5421087364840856e-05, + "loss": 0.0851, "step": 13530 }, { - "epoch": 0.34357151922832846, - "grad_norm": 0.7990770936012268, - "learning_rate": 1.7709523205144478e-05, - "loss": 0.119, + "epoch": 0.6870907152647342, + "grad_norm": 0.5444993376731873, + "learning_rate": 1.541939523156844e-05, + "loss": 0.0977, "step": 13535 }, { - "epoch": 0.3436984388881838, - "grad_norm": 0.5865839719772339, - "learning_rate": 1.7708677074078777e-05, - "loss": 0.1164, + "epoch": 0.6873445352555967, + "grad_norm": 0.5064772367477417, + "learning_rate": 1.5417703098296023e-05, + "loss": 0.0787, "step": 13540 }, { - "epoch": 0.3438253585480391, - "grad_norm": 0.625033438205719, - "learning_rate": 1.7707830943013075e-05, - "loss": 0.1105, + "epoch": 0.6875983552464592, + "grad_norm": 0.28774887323379517, + "learning_rate": 1.5416010965023607e-05, + "loss": 0.0955, "step": 13545 }, { - "epoch": 0.3439522782078944, - "grad_norm": 0.5063086748123169, - "learning_rate": 1.7706984811947373e-05, - "loss": 0.0915, + "epoch": 0.6878521752373217, + "grad_norm": 0.41530025005340576, + "learning_rate": 1.541431883175119e-05, + "loss": 0.069, "step": 13550 }, { - "epoch": 0.3440791978677497, - "grad_norm": 0.5800322890281677, - "learning_rate": 1.7706138680881672e-05, - "loss": 0.1061, + "epoch": 0.6881059952281842, + "grad_norm": 0.769633948802948, + "learning_rate": 1.5412626698478774e-05, + "loss": 0.0872, "step": 13555 }, { - "epoch": 0.34420611752760505, - "grad_norm": 0.7131021022796631, - "learning_rate": 1.770529254981597e-05, - "loss": 0.1107, + "epoch": 0.6883598152190467, + "grad_norm": 0.47131699323654175, + "learning_rate": 1.5410934565206358e-05, + "loss": 0.0871, "step": 13560 }, { - "epoch": 0.34433303718746033, - "grad_norm": 0.6432245969772339, - "learning_rate": 1.7704446418750265e-05, - "loss": 0.1218, + "epoch": 0.6886136352099091, + "grad_norm": 0.3732922077178955, + "learning_rate": 1.540924243193394e-05, + "loss": 0.0847, "step": 13565 }, { - "epoch": 0.3444599568473157, - "grad_norm": 0.9717443585395813, - "learning_rate": 1.7703600287684564e-05, - "loss": 0.0973, + "epoch": 0.6888674552007716, + "grad_norm": 0.5022915005683899, + "learning_rate": 1.5407550298661525e-05, + "loss": 0.0887, "step": 13570 }, { - "epoch": 0.34458687650717096, - "grad_norm": 1.0621604919433594, - "learning_rate": 1.7702754156618862e-05, - "loss": 0.1089, + "epoch": 0.6891212751916341, + "grad_norm": 0.3835129141807556, + "learning_rate": 1.5405858165389108e-05, + "loss": 0.0825, "step": 13575 }, { - "epoch": 0.3447137961670263, - "grad_norm": 1.45867121219635, - "learning_rate": 1.770190802555316e-05, - "loss": 0.12, + "epoch": 0.6893750951824966, + "grad_norm": 0.5404291749000549, + "learning_rate": 1.540416603211669e-05, + "loss": 0.082, "step": 13580 }, { - "epoch": 0.3448407158268816, - "grad_norm": 0.6606584787368774, - "learning_rate": 1.7701061894487455e-05, - "loss": 0.0944, + "epoch": 0.689628915173359, + "grad_norm": 0.3274235129356384, + "learning_rate": 1.5402473898844275e-05, + "loss": 0.0833, "step": 13585 }, { - "epoch": 0.3449676354867369, - "grad_norm": 0.8564186096191406, - "learning_rate": 1.7700215763421754e-05, - "loss": 0.1194, + "epoch": 0.6898827351642215, + "grad_norm": 0.3606513440608978, + "learning_rate": 1.540078176557186e-05, + "loss": 0.079, "step": 13590 }, { - "epoch": 0.3450945551465922, - "grad_norm": 0.6258057355880737, - "learning_rate": 1.7699369632356052e-05, - "loss": 0.1373, + "epoch": 0.690136555155084, + "grad_norm": 0.6473351716995239, + "learning_rate": 1.5399089632299442e-05, + "loss": 0.0856, "step": 13595 }, { - "epoch": 0.3452214748064475, - "grad_norm": 0.5857575535774231, - "learning_rate": 1.769852350129035e-05, - "loss": 0.1158, + "epoch": 0.6903903751459465, + "grad_norm": 1.008514165878296, + "learning_rate": 1.5397397499027026e-05, + "loss": 0.0856, "step": 13600 }, { - "epoch": 0.34534839446630283, - "grad_norm": 0.5225048661231995, - "learning_rate": 1.769767737022465e-05, - "loss": 0.111, + "epoch": 0.690644195136809, + "grad_norm": 0.5139790177345276, + "learning_rate": 1.5395705365754606e-05, + "loss": 0.087, "step": 13605 }, { - "epoch": 0.3454753141261581, - "grad_norm": 1.3358922004699707, - "learning_rate": 1.7696831239158947e-05, - "loss": 0.1275, + "epoch": 0.6908980151276715, + "grad_norm": 0.3781962990760803, + "learning_rate": 1.5394013232482193e-05, + "loss": 0.0808, "step": 13610 }, { - "epoch": 0.34560223378601346, - "grad_norm": 0.9150373339653015, - "learning_rate": 1.7695985108093246e-05, - "loss": 0.1002, + "epoch": 0.691151835118534, + "grad_norm": 0.3120231628417969, + "learning_rate": 1.5392321099209777e-05, + "loss": 0.072, "step": 13615 }, { - "epoch": 0.34572915344586874, - "grad_norm": 0.4550745487213135, - "learning_rate": 1.7695138977027544e-05, - "loss": 0.0982, + "epoch": 0.6914056551093964, + "grad_norm": 0.3772776424884796, + "learning_rate": 1.5390628965937357e-05, + "loss": 0.0701, "step": 13620 }, { - "epoch": 0.3458560731057241, - "grad_norm": 0.7867966294288635, - "learning_rate": 1.769429284596184e-05, - "loss": 0.1107, + "epoch": 0.6916594751002589, + "grad_norm": 0.4236140549182892, + "learning_rate": 1.5388936832664944e-05, + "loss": 0.0822, "step": 13625 }, { - "epoch": 0.34598299276557937, - "grad_norm": 1.1039930582046509, - "learning_rate": 1.7693446714896138e-05, - "loss": 0.1151, + "epoch": 0.6919132950911214, + "grad_norm": 0.4006426930427551, + "learning_rate": 1.5387244699392524e-05, + "loss": 0.0839, "step": 13630 }, { - "epoch": 0.3461099124254347, - "grad_norm": 0.6451926231384277, - "learning_rate": 1.7692600583830436e-05, - "loss": 0.1193, + "epoch": 0.6921671150819838, + "grad_norm": 0.45694807171821594, + "learning_rate": 1.538555256612011e-05, + "loss": 0.0738, "step": 13635 }, { - "epoch": 0.34623683208529, - "grad_norm": 0.38221728801727295, - "learning_rate": 1.7691754452764734e-05, - "loss": 0.0981, + "epoch": 0.6924209350728463, + "grad_norm": 0.6357932090759277, + "learning_rate": 1.538386043284769e-05, + "loss": 0.0842, "step": 13640 }, { - "epoch": 0.34636375174514533, - "grad_norm": 0.539564847946167, - "learning_rate": 1.7690908321699033e-05, - "loss": 0.0996, + "epoch": 0.6926747550637088, + "grad_norm": 0.5149153470993042, + "learning_rate": 1.5382168299575274e-05, + "loss": 0.0776, "step": 13645 }, { - "epoch": 0.3464906714050006, - "grad_norm": 0.614389181137085, - "learning_rate": 1.769006219063333e-05, - "loss": 0.1335, + "epoch": 0.6929285750545713, + "grad_norm": 0.8405686616897583, + "learning_rate": 1.538047616630286e-05, + "loss": 0.0821, "step": 13650 }, { - "epoch": 0.34661759106485596, - "grad_norm": 0.4627392888069153, - "learning_rate": 1.768921605956763e-05, - "loss": 0.1165, + "epoch": 0.6931823950454338, + "grad_norm": 0.35849130153656006, + "learning_rate": 1.537878403303044e-05, + "loss": 0.0803, "step": 13655 }, { - "epoch": 0.34674451072471124, - "grad_norm": 0.4829283654689789, - "learning_rate": 1.7688369928501928e-05, - "loss": 0.1211, + "epoch": 0.6934362150362963, + "grad_norm": 0.41094881296157837, + "learning_rate": 1.5377091899758025e-05, + "loss": 0.0863, "step": 13660 }, { - "epoch": 0.3468714303845666, - "grad_norm": 1.0489591360092163, - "learning_rate": 1.7687523797436223e-05, - "loss": 0.0975, + "epoch": 0.6936900350271588, + "grad_norm": 0.4219074249267578, + "learning_rate": 1.537539976648561e-05, + "loss": 0.0951, "step": 13665 }, { - "epoch": 0.34699835004442187, - "grad_norm": 0.4297552704811096, - "learning_rate": 1.768667766637052e-05, - "loss": 0.1337, + "epoch": 0.6939438550180212, + "grad_norm": 0.4253228008747101, + "learning_rate": 1.5373707633213192e-05, + "loss": 0.0918, "step": 13670 }, { - "epoch": 0.3471252697042772, - "grad_norm": 0.6025472283363342, - "learning_rate": 1.768583153530482e-05, - "loss": 0.1284, + "epoch": 0.6941976750088837, + "grad_norm": 0.5601229071617126, + "learning_rate": 1.5372015499940776e-05, + "loss": 0.0704, "step": 13675 }, { - "epoch": 0.3472521893641325, - "grad_norm": 0.618807852268219, - "learning_rate": 1.768498540423912e-05, - "loss": 0.1133, + "epoch": 0.6944514949997462, + "grad_norm": 0.40687859058380127, + "learning_rate": 1.537032336666836e-05, + "loss": 0.0689, "step": 13680 }, { - "epoch": 0.34737910902398783, - "grad_norm": 0.5849055647850037, - "learning_rate": 1.7684139273173417e-05, - "loss": 0.1299, + "epoch": 0.6947053149906086, + "grad_norm": 0.4325862526893616, + "learning_rate": 1.5368631233395943e-05, + "loss": 0.0917, "step": 13685 }, { - "epoch": 0.3475060286838431, - "grad_norm": 0.5315744280815125, - "learning_rate": 1.7683293142107715e-05, - "loss": 0.1443, + "epoch": 0.6949591349814711, + "grad_norm": 0.44314250349998474, + "learning_rate": 1.5366939100123526e-05, + "loss": 0.0812, "step": 13690 }, { - "epoch": 0.34763294834369846, - "grad_norm": 0.8789403438568115, - "learning_rate": 1.7682447011042014e-05, - "loss": 0.1283, + "epoch": 0.6952129549723336, + "grad_norm": 0.35597068071365356, + "learning_rate": 1.536524696685111e-05, + "loss": 0.0767, "step": 13695 }, { - "epoch": 0.34775986800355374, - "grad_norm": 0.6296190619468689, - "learning_rate": 1.7681600879976312e-05, - "loss": 0.1085, + "epoch": 0.6954667749631961, + "grad_norm": 0.7931150794029236, + "learning_rate": 1.5363554833578693e-05, + "loss": 0.0794, "step": 13700 }, { - "epoch": 0.3478867876634091, - "grad_norm": 0.8489715456962585, - "learning_rate": 1.7680754748910607e-05, - "loss": 0.1433, + "epoch": 0.6957205949540586, + "grad_norm": 0.511711835861206, + "learning_rate": 1.5361862700306277e-05, + "loss": 0.074, "step": 13705 }, { - "epoch": 0.34801370732326437, - "grad_norm": 0.6356040835380554, - "learning_rate": 1.7679908617844905e-05, - "loss": 0.1282, + "epoch": 0.6959744149449211, + "grad_norm": 0.8357803225517273, + "learning_rate": 1.536017056703386e-05, + "loss": 0.0688, "step": 13710 }, { - "epoch": 0.3481406269831197, - "grad_norm": 0.7844175696372986, - "learning_rate": 1.7679062486779204e-05, - "loss": 0.0924, + "epoch": 0.6962282349357836, + "grad_norm": 0.3736538887023926, + "learning_rate": 1.5358478433761444e-05, + "loss": 0.0676, "step": 13715 }, { - "epoch": 0.348267546642975, - "grad_norm": 0.9483705163002014, - "learning_rate": 1.7678216355713502e-05, - "loss": 0.1228, + "epoch": 0.696482054926646, + "grad_norm": 0.38975706696510315, + "learning_rate": 1.5356786300489028e-05, + "loss": 0.0779, "step": 13720 }, { - "epoch": 0.34839446630283033, - "grad_norm": 0.8341890573501587, - "learning_rate": 1.7677370224647797e-05, - "loss": 0.1085, + "epoch": 0.6967358749175085, + "grad_norm": 0.524750828742981, + "learning_rate": 1.535509416721661e-05, + "loss": 0.0827, "step": 13725 }, { - "epoch": 0.3485213859626856, - "grad_norm": 0.5195904970169067, - "learning_rate": 1.7676524093582096e-05, - "loss": 0.0861, + "epoch": 0.696989694908371, + "grad_norm": 0.44146502017974854, + "learning_rate": 1.5353402033944195e-05, + "loss": 0.08, "step": 13730 }, { - "epoch": 0.34864830562254095, - "grad_norm": 0.887784481048584, - "learning_rate": 1.7675677962516394e-05, - "loss": 0.1166, + "epoch": 0.6972435148992334, + "grad_norm": 0.49009469151496887, + "learning_rate": 1.5351709900671778e-05, + "loss": 0.0832, "step": 13735 }, { - "epoch": 0.34877522528239624, - "grad_norm": 0.5980648994445801, - "learning_rate": 1.7674831831450692e-05, - "loss": 0.1149, + "epoch": 0.6974973348900959, + "grad_norm": 0.5824193358421326, + "learning_rate": 1.5350017767399362e-05, + "loss": 0.0844, "step": 13740 }, { - "epoch": 0.3489021449422516, - "grad_norm": 0.47139960527420044, - "learning_rate": 1.767398570038499e-05, - "loss": 0.1146, + "epoch": 0.6977511548809584, + "grad_norm": 0.3237861394882202, + "learning_rate": 1.5348325634126945e-05, + "loss": 0.0803, "step": 13745 }, { - "epoch": 0.34902906460210686, - "grad_norm": 0.5608014464378357, - "learning_rate": 1.767313956931929e-05, - "loss": 0.1233, + "epoch": 0.6980049748718209, + "grad_norm": 0.42894473671913147, + "learning_rate": 1.534663350085453e-05, + "loss": 0.0822, "step": 13750 }, { - "epoch": 0.3491559842619622, - "grad_norm": 0.6983084082603455, - "learning_rate": 1.7672293438253588e-05, - "loss": 0.1358, + "epoch": 0.6982587948626834, + "grad_norm": 0.43603384494781494, + "learning_rate": 1.5344941367582112e-05, + "loss": 0.0749, "step": 13755 }, { - "epoch": 0.3492829039218175, - "grad_norm": 0.5329160094261169, - "learning_rate": 1.7671447307187886e-05, - "loss": 0.1236, + "epoch": 0.6985126148535459, + "grad_norm": 0.3695114850997925, + "learning_rate": 1.5343249234309696e-05, + "loss": 0.0844, "step": 13760 }, { - "epoch": 0.3494098235816728, - "grad_norm": 0.9051629304885864, - "learning_rate": 1.767060117612218e-05, - "loss": 0.1204, + "epoch": 0.6987664348444084, + "grad_norm": 0.43473580479621887, + "learning_rate": 1.534155710103728e-05, + "loss": 0.0835, "step": 13765 }, { - "epoch": 0.3495367432415281, - "grad_norm": 0.5193426609039307, - "learning_rate": 1.766975504505648e-05, - "loss": 0.1178, + "epoch": 0.6990202548352709, + "grad_norm": 1.5757832527160645, + "learning_rate": 1.5339864967764863e-05, + "loss": 0.0821, "step": 13770 }, { - "epoch": 0.3496636629013834, - "grad_norm": 0.842942476272583, - "learning_rate": 1.7668908913990778e-05, - "loss": 0.14, + "epoch": 0.6992740748261334, + "grad_norm": 0.6827208399772644, + "learning_rate": 1.5338172834492447e-05, + "loss": 0.0784, "step": 13775 }, { - "epoch": 0.34979058256123874, - "grad_norm": 0.5223641991615295, - "learning_rate": 1.7668062782925076e-05, - "loss": 0.1159, + "epoch": 0.6995278948169957, + "grad_norm": 0.30773502588272095, + "learning_rate": 1.533648070122003e-05, + "loss": 0.1152, "step": 13780 }, { - "epoch": 0.349917502221094, - "grad_norm": 0.5255125761032104, - "learning_rate": 1.7667216651859375e-05, - "loss": 0.0961, + "epoch": 0.6997817148078582, + "grad_norm": 0.4392739534378052, + "learning_rate": 1.5334788567947614e-05, + "loss": 0.0784, "step": 13785 }, { - "epoch": 0.35004442188094936, - "grad_norm": 0.9451989531517029, - "learning_rate": 1.7666370520793673e-05, - "loss": 0.1185, + "epoch": 0.7000355347987207, + "grad_norm": 0.7191630601882935, + "learning_rate": 1.5333096434675197e-05, + "loss": 0.0789, "step": 13790 }, { - "epoch": 0.35017134154080465, - "grad_norm": 0.466840535402298, - "learning_rate": 1.766552438972797e-05, - "loss": 0.1115, + "epoch": 0.7002893547895832, + "grad_norm": 0.2967609763145447, + "learning_rate": 1.533140430140278e-05, + "loss": 0.0778, "step": 13795 }, { - "epoch": 0.35029826120066, - "grad_norm": 1.0222551822662354, - "learning_rate": 1.766467825866227e-05, - "loss": 0.1173, + "epoch": 0.7005431747804457, + "grad_norm": 0.9909408688545227, + "learning_rate": 1.5329712168130364e-05, + "loss": 0.0884, "step": 13800 }, { - "epoch": 0.3504251808605153, - "grad_norm": 0.5500748157501221, - "learning_rate": 1.7663832127596565e-05, - "loss": 0.144, + "epoch": 0.7007969947713082, + "grad_norm": 0.35766205191612244, + "learning_rate": 1.5328020034857948e-05, + "loss": 0.0733, "step": 13805 }, { - "epoch": 0.3505521005203706, - "grad_norm": 0.5949193835258484, - "learning_rate": 1.7662985996530863e-05, - "loss": 0.1169, + "epoch": 0.7010508147621707, + "grad_norm": 0.44659724831581116, + "learning_rate": 1.5326327901585528e-05, + "loss": 0.0882, "step": 13810 }, { - "epoch": 0.3506790201802259, - "grad_norm": 0.7357984781265259, - "learning_rate": 1.766213986546516e-05, - "loss": 0.1023, + "epoch": 0.7013046347530332, + "grad_norm": 0.34097808599472046, + "learning_rate": 1.5324635768313115e-05, + "loss": 0.0785, "step": 13815 }, { - "epoch": 0.35080593984008124, - "grad_norm": 0.6027583479881287, - "learning_rate": 1.766129373439946e-05, - "loss": 0.1391, + "epoch": 0.7015584547438957, + "grad_norm": 0.34435734152793884, + "learning_rate": 1.5322943635040695e-05, + "loss": 0.0818, "step": 13820 }, { - "epoch": 0.3509328594999365, - "grad_norm": 1.0255922079086304, - "learning_rate": 1.766044760333376e-05, - "loss": 0.0967, + "epoch": 0.7018122747347582, + "grad_norm": 0.4698044955730438, + "learning_rate": 1.532125150176828e-05, + "loss": 0.0848, "step": 13825 }, { - "epoch": 0.35105977915979186, - "grad_norm": 2.7473678588867188, - "learning_rate": 1.7659601472268057e-05, - "loss": 0.1333, + "epoch": 0.7020660947256205, + "grad_norm": 0.4470899999141693, + "learning_rate": 1.5319559368495866e-05, + "loss": 0.0815, "step": 13830 }, { - "epoch": 0.35118669881964715, - "grad_norm": 1.3359792232513428, - "learning_rate": 1.7658755341202355e-05, - "loss": 0.1225, + "epoch": 0.702319914716483, + "grad_norm": 0.3169173300266266, + "learning_rate": 1.5317867235223446e-05, + "loss": 0.0738, "step": 13835 }, { - "epoch": 0.3513136184795025, - "grad_norm": 1.6293309926986694, - "learning_rate": 1.7657909210136654e-05, - "loss": 0.1066, + "epoch": 0.7025737347073455, + "grad_norm": 0.8150441646575928, + "learning_rate": 1.5316175101951033e-05, + "loss": 0.0763, "step": 13840 }, { - "epoch": 0.35144053813935777, - "grad_norm": 0.610477864742279, - "learning_rate": 1.765706307907095e-05, - "loss": 0.1147, + "epoch": 0.702827554698208, + "grad_norm": 0.35342103242874146, + "learning_rate": 1.5314482968678613e-05, + "loss": 0.0833, "step": 13845 }, { - "epoch": 0.3515674577992131, - "grad_norm": 1.54259192943573, - "learning_rate": 1.7656216948005247e-05, - "loss": 0.1078, + "epoch": 0.7030813746890705, + "grad_norm": 0.771515429019928, + "learning_rate": 1.5312790835406196e-05, + "loss": 0.0825, "step": 13850 }, { - "epoch": 0.3516943774590684, - "grad_norm": 1.0218652486801147, - "learning_rate": 1.7655370816939545e-05, - "loss": 0.1326, + "epoch": 0.703335194679933, + "grad_norm": 0.3441005051136017, + "learning_rate": 1.5311098702133783e-05, + "loss": 0.0842, "step": 13855 }, { - "epoch": 0.35182129711892374, - "grad_norm": 0.6735953688621521, - "learning_rate": 1.7654524685873844e-05, - "loss": 0.112, + "epoch": 0.7035890146707955, + "grad_norm": 0.45794785022735596, + "learning_rate": 1.5309406568861363e-05, + "loss": 0.0833, "step": 13860 }, { - "epoch": 0.351948216778779, - "grad_norm": 0.7342565059661865, - "learning_rate": 1.765367855480814e-05, - "loss": 0.1261, + "epoch": 0.703842834661658, + "grad_norm": 0.41484132409095764, + "learning_rate": 1.5307714435588947e-05, + "loss": 0.0749, "step": 13865 }, { - "epoch": 0.35207513643863436, - "grad_norm": 0.8111591339111328, - "learning_rate": 1.7652832423742437e-05, - "loss": 0.1361, + "epoch": 0.7040966546525205, + "grad_norm": 0.38982802629470825, + "learning_rate": 1.530602230231653e-05, + "loss": 0.0762, "step": 13870 }, { - "epoch": 0.35220205609848965, - "grad_norm": 0.8838436603546143, - "learning_rate": 1.7651986292676736e-05, - "loss": 0.1252, + "epoch": 0.704350474643383, + "grad_norm": 0.3871098458766937, + "learning_rate": 1.5304330169044114e-05, + "loss": 0.0876, "step": 13875 }, { - "epoch": 0.352328975758345, - "grad_norm": 0.5825421810150146, - "learning_rate": 1.7651140161611034e-05, - "loss": 0.1065, + "epoch": 0.7046042946342453, + "grad_norm": 0.3723105192184448, + "learning_rate": 1.53026380357717e-05, + "loss": 0.0687, "step": 13880 }, { - "epoch": 0.35245589541820027, - "grad_norm": 1.338118314743042, - "learning_rate": 1.7650294030545332e-05, - "loss": 0.1094, + "epoch": 0.7048581146251078, + "grad_norm": 0.4953676462173462, + "learning_rate": 1.530094590249928e-05, + "loss": 0.0743, "step": 13885 }, { - "epoch": 0.3525828150780556, - "grad_norm": 0.4932008683681488, - "learning_rate": 1.764944789947963e-05, - "loss": 0.1298, + "epoch": 0.7051119346159703, + "grad_norm": 0.43342849612236023, + "learning_rate": 1.5299253769226865e-05, + "loss": 0.0738, "step": 13890 }, { - "epoch": 0.3527097347379109, - "grad_norm": 0.4849335551261902, - "learning_rate": 1.764860176841393e-05, - "loss": 0.1239, + "epoch": 0.7053657546068328, + "grad_norm": 0.6391082406044006, + "learning_rate": 1.5297561635954448e-05, + "loss": 0.0903, "step": 13895 }, { - "epoch": 0.35283665439776624, - "grad_norm": 0.5591191053390503, - "learning_rate": 1.7647755637348228e-05, - "loss": 0.1408, + "epoch": 0.7056195745976953, + "grad_norm": 0.46714121103286743, + "learning_rate": 1.5295869502682032e-05, + "loss": 0.0842, "step": 13900 }, { - "epoch": 0.3529635740576215, - "grad_norm": 0.5034964084625244, - "learning_rate": 1.7646909506282523e-05, - "loss": 0.1049, + "epoch": 0.7058733945885578, + "grad_norm": 0.5677165985107422, + "learning_rate": 1.5294177369409615e-05, + "loss": 0.0879, "step": 13905 }, { - "epoch": 0.35309049371747686, - "grad_norm": 0.8947626352310181, - "learning_rate": 1.764606337521682e-05, - "loss": 0.1112, + "epoch": 0.7061272145794203, + "grad_norm": 0.4069088101387024, + "learning_rate": 1.52924852361372e-05, + "loss": 0.0744, "step": 13910 }, { - "epoch": 0.35321741337733215, - "grad_norm": 0.49912628531455994, - "learning_rate": 1.764521724415112e-05, - "loss": 0.1143, + "epoch": 0.7063810345702828, + "grad_norm": 0.44778573513031006, + "learning_rate": 1.5290793102864782e-05, + "loss": 0.073, "step": 13915 }, { - "epoch": 0.3533443330371875, - "grad_norm": 0.7132894992828369, - "learning_rate": 1.7644371113085418e-05, - "loss": 0.1201, + "epoch": 0.7066348545611453, + "grad_norm": 0.40604692697525024, + "learning_rate": 1.5289100969592366e-05, + "loss": 0.089, "step": 13920 }, { - "epoch": 0.35347125269704277, - "grad_norm": 0.7309268712997437, - "learning_rate": 1.7643524982019716e-05, - "loss": 0.1259, + "epoch": 0.7068886745520078, + "grad_norm": 0.3721408545970917, + "learning_rate": 1.528740883631995e-05, + "loss": 0.0789, "step": 13925 }, { - "epoch": 0.3535981723568981, - "grad_norm": 0.9173821806907654, - "learning_rate": 1.7642678850954015e-05, - "loss": 0.1074, + "epoch": 0.7071424945428701, + "grad_norm": 0.31769973039627075, + "learning_rate": 1.5285716703047533e-05, + "loss": 0.0816, "step": 13930 }, { - "epoch": 0.3537250920167534, - "grad_norm": 0.5079876184463501, - "learning_rate": 1.7641832719888313e-05, - "loss": 0.1029, + "epoch": 0.7073963145337326, + "grad_norm": 0.4818114638328552, + "learning_rate": 1.5284024569775117e-05, + "loss": 0.0868, "step": 13935 }, { - "epoch": 0.3538520116766087, - "grad_norm": 0.6132115125656128, - "learning_rate": 1.764098658882261e-05, - "loss": 0.1332, + "epoch": 0.7076501345245951, + "grad_norm": 0.3946104943752289, + "learning_rate": 1.52823324365027e-05, + "loss": 0.0707, "step": 13940 }, { - "epoch": 0.353978931336464, - "grad_norm": 0.5243884921073914, - "learning_rate": 1.764014045775691e-05, - "loss": 0.093, + "epoch": 0.7079039545154576, + "grad_norm": 0.6479266285896301, + "learning_rate": 1.5280640303230284e-05, + "loss": 0.0821, "step": 13945 }, { - "epoch": 0.3541058509963193, - "grad_norm": 0.8419904112815857, - "learning_rate": 1.7639294326691205e-05, - "loss": 0.1239, + "epoch": 0.7081577745063201, + "grad_norm": 0.32928308844566345, + "learning_rate": 1.5278948169957867e-05, + "loss": 0.0737, "step": 13950 }, { - "epoch": 0.35423277065617464, - "grad_norm": 0.4966047704219818, - "learning_rate": 1.7638448195625503e-05, - "loss": 0.1156, + "epoch": 0.7084115944971826, + "grad_norm": 0.39813652634620667, + "learning_rate": 1.527725603668545e-05, + "loss": 0.0796, "step": 13955 }, { - "epoch": 0.35435969031602993, - "grad_norm": 1.0535870790481567, - "learning_rate": 1.7637602064559802e-05, - "loss": 0.1376, + "epoch": 0.7086654144880451, + "grad_norm": 0.4560108780860901, + "learning_rate": 1.5275563903413034e-05, + "loss": 0.0915, "step": 13960 }, { - "epoch": 0.35448660997588527, - "grad_norm": 0.6314557790756226, - "learning_rate": 1.76367559334941e-05, - "loss": 0.1264, + "epoch": 0.7089192344789076, + "grad_norm": 1.1247360706329346, + "learning_rate": 1.5273871770140618e-05, + "loss": 0.0962, "step": 13965 }, { - "epoch": 0.35461352963574055, - "grad_norm": 0.8653870820999146, - "learning_rate": 1.76359098024284e-05, - "loss": 0.1143, + "epoch": 0.7091730544697701, + "grad_norm": 0.43578168749809265, + "learning_rate": 1.52721796368682e-05, + "loss": 0.0834, "step": 13970 }, { - "epoch": 0.3547404492955959, - "grad_norm": 0.7403030395507812, - "learning_rate": 1.7635063671362697e-05, - "loss": 0.1569, + "epoch": 0.7094268744606326, + "grad_norm": 0.45874667167663574, + "learning_rate": 1.5270487503595785e-05, + "loss": 0.072, "step": 13975 }, { - "epoch": 0.3548673689554512, - "grad_norm": 0.8321589231491089, - "learning_rate": 1.7634217540296995e-05, - "loss": 0.1228, + "epoch": 0.709680694451495, + "grad_norm": 0.5207342505455017, + "learning_rate": 1.526879537032337e-05, + "loss": 0.0886, "step": 13980 }, { - "epoch": 0.3549942886153065, - "grad_norm": 0.5747361779212952, - "learning_rate": 1.7633371409231294e-05, - "loss": 0.1231, + "epoch": 0.7099345144423574, + "grad_norm": 0.39302659034729004, + "learning_rate": 1.5267103237050952e-05, + "loss": 0.0836, "step": 13985 }, { - "epoch": 0.3551212082751618, - "grad_norm": 0.43671250343322754, - "learning_rate": 1.763252527816559e-05, - "loss": 0.1047, + "epoch": 0.7101883344332199, + "grad_norm": 0.44471731781959534, + "learning_rate": 1.5265411103778536e-05, + "loss": 0.1, "step": 13990 }, { - "epoch": 0.35524812793501714, - "grad_norm": 0.911439061164856, - "learning_rate": 1.7631679147099887e-05, - "loss": 0.1289, + "epoch": 0.7104421544240824, + "grad_norm": 0.386536180973053, + "learning_rate": 1.526371897050612e-05, + "loss": 0.0816, "step": 13995 }, { - "epoch": 0.35537504759487243, - "grad_norm": 0.5065643191337585, - "learning_rate": 1.7630833016034186e-05, - "loss": 0.1059, + "epoch": 0.7106959744149449, + "grad_norm": 0.4404861330986023, + "learning_rate": 1.52620268372337e-05, + "loss": 0.0936, "step": 14000 }, { - "epoch": 0.35550196725472777, - "grad_norm": 0.5817369222640991, - "learning_rate": 1.7629986884968484e-05, - "loss": 0.1162, + "epoch": 0.7109497944058074, + "grad_norm": 0.7851703763008118, + "learning_rate": 1.5260334703961286e-05, + "loss": 0.0798, "step": 14005 }, { - "epoch": 0.35562888691458305, - "grad_norm": 0.6469318866729736, - "learning_rate": 1.762914075390278e-05, - "loss": 0.1054, + "epoch": 0.7112036143966699, + "grad_norm": 0.6960633993148804, + "learning_rate": 1.5258642570688868e-05, + "loss": 0.0838, "step": 14010 }, { - "epoch": 0.3557558065744384, - "grad_norm": 0.6914425492286682, - "learning_rate": 1.7628294622837077e-05, - "loss": 0.1127, + "epoch": 0.7114574343875324, + "grad_norm": 0.47619956731796265, + "learning_rate": 1.5256950437416452e-05, + "loss": 0.0904, "step": 14015 }, { - "epoch": 0.3558827262342937, - "grad_norm": 0.6897328495979309, - "learning_rate": 1.7627448491771376e-05, - "loss": 0.138, + "epoch": 0.7117112543783949, + "grad_norm": 0.6438000202178955, + "learning_rate": 1.5255258304144037e-05, + "loss": 0.0736, "step": 14020 }, { - "epoch": 0.356009645894149, - "grad_norm": 0.4418046772480011, - "learning_rate": 1.7626602360705674e-05, - "loss": 0.1204, + "epoch": 0.7119650743692574, + "grad_norm": 0.4882700741291046, + "learning_rate": 1.5253566170871619e-05, + "loss": 0.0878, "step": 14025 }, { - "epoch": 0.3561365655540043, - "grad_norm": 0.3913286030292511, - "learning_rate": 1.7625756229639973e-05, - "loss": 0.1059, + "epoch": 0.7122188943601198, + "grad_norm": 0.36032137274742126, + "learning_rate": 1.5251874037599204e-05, + "loss": 0.0772, "step": 14030 }, { - "epoch": 0.35626348521385964, - "grad_norm": 0.426289826631546, - "learning_rate": 1.762491009857427e-05, - "loss": 0.1083, + "epoch": 0.7124727143509823, + "grad_norm": 0.5064921379089355, + "learning_rate": 1.5250181904326786e-05, + "loss": 0.0799, "step": 14035 }, { - "epoch": 0.3563904048737149, - "grad_norm": 0.8364172577857971, - "learning_rate": 1.762406396750857e-05, - "loss": 0.1219, + "epoch": 0.7127265343418447, + "grad_norm": 0.586258590221405, + "learning_rate": 1.524848977105437e-05, + "loss": 0.0856, "step": 14040 }, { - "epoch": 0.35651732453357027, - "grad_norm": 0.6322439312934875, - "learning_rate": 1.7623217836442868e-05, - "loss": 0.1356, + "epoch": 0.7129803543327072, + "grad_norm": 0.4364579916000366, + "learning_rate": 1.5246797637781953e-05, + "loss": 0.0797, "step": 14045 }, { - "epoch": 0.35664424419342555, - "grad_norm": 2.055595636367798, - "learning_rate": 1.7622371705377163e-05, - "loss": 0.1163, + "epoch": 0.7132341743235697, + "grad_norm": 0.30518513917922974, + "learning_rate": 1.5245105504509536e-05, + "loss": 0.0821, "step": 14050 }, { - "epoch": 0.3567711638532809, - "grad_norm": 0.5596067309379578, - "learning_rate": 1.762152557431146e-05, - "loss": 0.1143, + "epoch": 0.7134879943144322, + "grad_norm": 0.3665226399898529, + "learning_rate": 1.5243413371237118e-05, + "loss": 0.0899, "step": 14055 }, { - "epoch": 0.3568980835131362, - "grad_norm": 0.5196403861045837, - "learning_rate": 1.762067944324576e-05, - "loss": 0.1159, + "epoch": 0.7137418143052947, + "grad_norm": 0.5251713395118713, + "learning_rate": 1.5241721237964704e-05, + "loss": 0.0837, "step": 14060 }, { - "epoch": 0.3570250031729915, - "grad_norm": 0.5532774925231934, - "learning_rate": 1.7619833312180058e-05, - "loss": 0.113, + "epoch": 0.7139956342961572, + "grad_norm": 0.37838253378868103, + "learning_rate": 1.5240029104692287e-05, + "loss": 0.0744, "step": 14065 }, { - "epoch": 0.3571519228328468, - "grad_norm": 0.48096856474876404, - "learning_rate": 1.7618987181114356e-05, - "loss": 0.125, + "epoch": 0.7142494542870197, + "grad_norm": 0.4053058922290802, + "learning_rate": 1.5238336971419869e-05, + "loss": 0.0696, "step": 14070 }, { - "epoch": 0.35727884249270214, - "grad_norm": 0.5342534184455872, - "learning_rate": 1.7618141050048655e-05, - "loss": 0.1272, + "epoch": 0.7145032742778821, + "grad_norm": 0.4535035192966461, + "learning_rate": 1.5236644838147454e-05, + "loss": 0.0718, "step": 14075 }, { - "epoch": 0.3574057621525574, - "grad_norm": 1.462142825126648, - "learning_rate": 1.7617294918982953e-05, - "loss": 0.0983, + "epoch": 0.7147570942687446, + "grad_norm": 0.2996232807636261, + "learning_rate": 1.5234952704875036e-05, + "loss": 0.0823, "step": 14080 }, { - "epoch": 0.35753268181241277, - "grad_norm": 0.4559990465641022, - "learning_rate": 1.761644878791725e-05, - "loss": 0.106, + "epoch": 0.715010914259607, + "grad_norm": 0.40874767303466797, + "learning_rate": 1.5233260571602621e-05, + "loss": 0.0793, "step": 14085 }, { - "epoch": 0.35765960147226805, - "grad_norm": 0.5534087419509888, - "learning_rate": 1.7615602656851547e-05, - "loss": 0.1045, + "epoch": 0.7152647342504695, + "grad_norm": 0.34170955419540405, + "learning_rate": 1.5231568438330205e-05, + "loss": 0.0722, "step": 14090 }, { - "epoch": 0.3577865211321234, - "grad_norm": 0.5402079224586487, - "learning_rate": 1.7614756525785845e-05, - "loss": 0.1127, + "epoch": 0.715518554241332, + "grad_norm": 0.42445680499076843, + "learning_rate": 1.5229876305057787e-05, + "loss": 0.0707, "step": 14095 }, { - "epoch": 0.3579134407919787, - "grad_norm": 0.6993995904922485, - "learning_rate": 1.7613910394720143e-05, - "loss": 0.0903, + "epoch": 0.7157723742321945, + "grad_norm": 0.5553193688392639, + "learning_rate": 1.5228184171785372e-05, + "loss": 0.0758, "step": 14100 }, { - "epoch": 0.358040360451834, - "grad_norm": 0.8682286739349365, - "learning_rate": 1.7613064263654442e-05, - "loss": 0.1228, + "epoch": 0.716026194223057, + "grad_norm": 0.3143168091773987, + "learning_rate": 1.5226492038512954e-05, + "loss": 0.0619, "step": 14105 }, { - "epoch": 0.3581672801116893, - "grad_norm": 0.5332581400871277, - "learning_rate": 1.761221813258874e-05, - "loss": 0.1004, + "epoch": 0.7162800142139195, + "grad_norm": 0.633912980556488, + "learning_rate": 1.5224799905240537e-05, + "loss": 0.0746, "step": 14110 }, { - "epoch": 0.3582941997715446, - "grad_norm": 0.4584619104862213, - "learning_rate": 1.761137200152304e-05, - "loss": 0.1254, + "epoch": 0.716533834204782, + "grad_norm": 0.3377459943294525, + "learning_rate": 1.5223107771968123e-05, + "loss": 0.0761, "step": 14115 }, { - "epoch": 0.3584211194313999, - "grad_norm": 0.6587316989898682, - "learning_rate": 1.7610525870457337e-05, - "loss": 0.1321, + "epoch": 0.7167876541956445, + "grad_norm": 0.3944687247276306, + "learning_rate": 1.5221415638695704e-05, + "loss": 0.0766, "step": 14120 }, { - "epoch": 0.3585480390912552, - "grad_norm": 0.42019230127334595, - "learning_rate": 1.7609679739391635e-05, - "loss": 0.1013, + "epoch": 0.7170414741865069, + "grad_norm": 0.4088902771472931, + "learning_rate": 1.521972350542329e-05, + "loss": 0.077, "step": 14125 }, { - "epoch": 0.35867495875111055, - "grad_norm": 0.5526472330093384, - "learning_rate": 1.760883360832593e-05, - "loss": 0.1081, + "epoch": 0.7172952941773694, + "grad_norm": 0.4192088842391968, + "learning_rate": 1.5218031372150872e-05, + "loss": 0.0778, "step": 14130 }, { - "epoch": 0.35880187841096584, - "grad_norm": 0.4924180209636688, - "learning_rate": 1.760798747726023e-05, - "loss": 0.0989, + "epoch": 0.7175491141682319, + "grad_norm": 0.36348608136177063, + "learning_rate": 1.5216339238878455e-05, + "loss": 0.0854, "step": 14135 }, { - "epoch": 0.3589287980708212, - "grad_norm": 0.5741347670555115, - "learning_rate": 1.7607141346194527e-05, - "loss": 0.126, + "epoch": 0.7178029341590944, + "grad_norm": 0.3958783447742462, + "learning_rate": 1.521464710560604e-05, + "loss": 0.0742, "step": 14140 }, { - "epoch": 0.35905571773067646, - "grad_norm": 1.356222152709961, - "learning_rate": 1.7606295215128826e-05, - "loss": 0.1134, + "epoch": 0.7180567541499568, + "grad_norm": 0.44732987880706787, + "learning_rate": 1.5212954972333622e-05, + "loss": 0.076, "step": 14145 }, { - "epoch": 0.3591826373905318, - "grad_norm": 0.6130639910697937, - "learning_rate": 1.760544908406312e-05, - "loss": 0.1039, + "epoch": 0.7183105741408193, + "grad_norm": 1.2303112745285034, + "learning_rate": 1.5211262839061206e-05, + "loss": 0.0784, "step": 14150 }, { - "epoch": 0.3593095570503871, - "grad_norm": 0.8875076174736023, - "learning_rate": 1.760460295299742e-05, - "loss": 0.1006, + "epoch": 0.7185643941316818, + "grad_norm": 0.4170047342777252, + "learning_rate": 1.520957070578879e-05, + "loss": 0.0863, "step": 14155 }, { - "epoch": 0.3594364767102424, - "grad_norm": 0.6270765662193298, - "learning_rate": 1.7603756821931717e-05, - "loss": 0.1246, + "epoch": 0.7188182141225443, + "grad_norm": 0.3879324793815613, + "learning_rate": 1.5207878572516373e-05, + "loss": 0.0756, "step": 14160 }, { - "epoch": 0.3595633963700977, - "grad_norm": 0.631832480430603, - "learning_rate": 1.7602910690866016e-05, - "loss": 0.1168, + "epoch": 0.7190720341134068, + "grad_norm": 0.516608476638794, + "learning_rate": 1.5206186439243955e-05, + "loss": 0.0821, "step": 14165 }, { - "epoch": 0.35969031602995305, - "grad_norm": 0.5413528084754944, - "learning_rate": 1.7602064559800314e-05, - "loss": 0.1232, + "epoch": 0.7193258541042693, + "grad_norm": 0.4164332151412964, + "learning_rate": 1.520449430597154e-05, + "loss": 0.0678, "step": 14170 }, { - "epoch": 0.35981723568980833, - "grad_norm": 0.5163208246231079, - "learning_rate": 1.7601218428734613e-05, - "loss": 0.1014, + "epoch": 0.7195796740951317, + "grad_norm": 0.4437199532985687, + "learning_rate": 1.5202802172699123e-05, + "loss": 0.0825, "step": 14175 }, { - "epoch": 0.3599441553496637, - "grad_norm": 0.7023722529411316, - "learning_rate": 1.760037229766891e-05, - "loss": 0.1191, + "epoch": 0.7198334940859942, + "grad_norm": 0.49337777495384216, + "learning_rate": 1.5201110039426707e-05, + "loss": 0.106, "step": 14180 }, { - "epoch": 0.36007107500951896, - "grad_norm": 0.6611552834510803, - "learning_rate": 1.759952616660321e-05, - "loss": 0.1278, + "epoch": 0.7200873140768567, + "grad_norm": 0.43861648440361023, + "learning_rate": 1.519941790615429e-05, + "loss": 0.0871, "step": 14185 }, { - "epoch": 0.3601979946693743, - "grad_norm": 0.8834186792373657, - "learning_rate": 1.7598680035537505e-05, - "loss": 0.1295, + "epoch": 0.7203411340677192, + "grad_norm": 194.89373779296875, + "learning_rate": 1.5197725772881872e-05, + "loss": 0.0967, "step": 14190 }, { - "epoch": 0.3603249143292296, - "grad_norm": 0.8298308253288269, - "learning_rate": 1.7597833904471803e-05, - "loss": 0.1234, + "epoch": 0.7205949540585816, + "grad_norm": 0.3648269474506378, + "learning_rate": 1.5196033639609458e-05, + "loss": 0.0855, "step": 14195 }, { - "epoch": 0.3604518339890849, - "grad_norm": 0.7425659894943237, - "learning_rate": 1.75969877734061e-05, - "loss": 0.1272, + "epoch": 0.7208487740494441, + "grad_norm": 0.4006633758544922, + "learning_rate": 1.5194341506337041e-05, + "loss": 0.0798, "step": 14200 }, { - "epoch": 0.3605787536489402, - "grad_norm": 0.632369339466095, - "learning_rate": 1.75961416423404e-05, - "loss": 0.1178, + "epoch": 0.7211025940403066, + "grad_norm": 0.4306533932685852, + "learning_rate": 1.5192649373064623e-05, + "loss": 0.0915, "step": 14205 }, { - "epoch": 0.36070567330879555, - "grad_norm": 0.8766928315162659, - "learning_rate": 1.7595295511274698e-05, - "loss": 0.1408, + "epoch": 0.7213564140311691, + "grad_norm": 0.8204346895217896, + "learning_rate": 1.5190957239792208e-05, + "loss": 0.0843, "step": 14210 }, { - "epoch": 0.36083259296865083, - "grad_norm": 1.1381285190582275, - "learning_rate": 1.7594449380208997e-05, - "loss": 0.1105, + "epoch": 0.7216102340220316, + "grad_norm": 0.40292853116989136, + "learning_rate": 1.518926510651979e-05, + "loss": 0.0846, "step": 14215 }, { - "epoch": 0.3609595126285062, - "grad_norm": 0.872157096862793, - "learning_rate": 1.7593603249143295e-05, - "loss": 0.1118, + "epoch": 0.7218640540128941, + "grad_norm": 0.4678119122982025, + "learning_rate": 1.5187572973247374e-05, + "loss": 0.0818, "step": 14220 }, { - "epoch": 0.36108643228836146, - "grad_norm": 0.933265209197998, - "learning_rate": 1.7592757118077593e-05, - "loss": 0.1078, + "epoch": 0.7221178740037565, + "grad_norm": 0.5010687708854675, + "learning_rate": 1.5185880839974957e-05, + "loss": 0.081, "step": 14225 }, { - "epoch": 0.3612133519482168, - "grad_norm": 0.6264180541038513, - "learning_rate": 1.759191098701189e-05, - "loss": 0.1348, + "epoch": 0.722371693994619, + "grad_norm": 1.1079010963439941, + "learning_rate": 1.518418870670254e-05, + "loss": 0.0954, "step": 14230 }, { - "epoch": 0.3613402716080721, - "grad_norm": 1.2387373447418213, - "learning_rate": 1.7591064855946187e-05, - "loss": 0.1508, + "epoch": 0.7226255139854815, + "grad_norm": 0.32439419627189636, + "learning_rate": 1.5182496573430126e-05, + "loss": 0.0801, "step": 14235 }, { - "epoch": 0.3614671912679274, - "grad_norm": 3.154508113861084, - "learning_rate": 1.7590218724880485e-05, - "loss": 0.1327, + "epoch": 0.722879333976344, + "grad_norm": 0.4611896276473999, + "learning_rate": 1.5180804440157708e-05, + "loss": 0.07, "step": 14240 }, { - "epoch": 0.3615941109277827, - "grad_norm": 0.6328004002571106, - "learning_rate": 1.7589372593814784e-05, - "loss": 0.1146, + "epoch": 0.7231331539672065, + "grad_norm": 0.45286303758621216, + "learning_rate": 1.5179112306885291e-05, + "loss": 0.0791, "step": 14245 }, { - "epoch": 0.36172103058763805, - "grad_norm": 0.4777616560459137, - "learning_rate": 1.7588526462749082e-05, - "loss": 0.1191, + "epoch": 0.723386973958069, + "grad_norm": 0.6157929301261902, + "learning_rate": 1.5177420173612875e-05, + "loss": 0.0787, "step": 14250 }, { - "epoch": 0.36184795024749333, - "grad_norm": 0.5456236004829407, - "learning_rate": 1.758768033168338e-05, - "loss": 0.1078, + "epoch": 0.7236407939489314, + "grad_norm": 0.4677484333515167, + "learning_rate": 1.5175728040340458e-05, + "loss": 0.078, "step": 14255 }, { - "epoch": 0.3619748699073487, - "grad_norm": 0.8535609841346741, - "learning_rate": 1.758683420061768e-05, - "loss": 0.1222, + "epoch": 0.7238946139397939, + "grad_norm": 0.4395294487476349, + "learning_rate": 1.517403590706804e-05, + "loss": 0.0879, "step": 14260 }, { - "epoch": 0.36210178956720396, - "grad_norm": 0.6806531548500061, - "learning_rate": 1.7585988069551977e-05, - "loss": 0.1323, + "epoch": 0.7241484339306564, + "grad_norm": 0.48061785101890564, + "learning_rate": 1.5172343773795626e-05, + "loss": 0.0812, "step": 14265 }, { - "epoch": 0.3622287092270593, - "grad_norm": 0.7229576706886292, - "learning_rate": 1.7585141938486272e-05, - "loss": 0.0994, + "epoch": 0.7244022539215189, + "grad_norm": 0.378859281539917, + "learning_rate": 1.5170651640523209e-05, + "loss": 0.0837, "step": 14270 }, { - "epoch": 0.3623556288869146, - "grad_norm": 0.6759905815124512, - "learning_rate": 1.758429580742057e-05, - "loss": 0.1299, + "epoch": 0.7246560739123813, + "grad_norm": 0.40391427278518677, + "learning_rate": 1.5168959507250793e-05, + "loss": 0.0909, "step": 14275 }, { - "epoch": 0.36248254854676987, - "grad_norm": 0.7075567841529846, - "learning_rate": 1.758344967635487e-05, - "loss": 0.1322, + "epoch": 0.7249098939032438, + "grad_norm": 0.3788898289203644, + "learning_rate": 1.5167267373978376e-05, + "loss": 0.0832, "step": 14280 }, { - "epoch": 0.3626094682066252, - "grad_norm": 0.659299373626709, - "learning_rate": 1.7582603545289167e-05, - "loss": 0.0965, + "epoch": 0.7251637138941063, + "grad_norm": 0.2744353413581848, + "learning_rate": 1.5165575240705958e-05, + "loss": 0.0712, "step": 14285 }, { - "epoch": 0.3627363878664805, - "grad_norm": 0.7570000290870667, - "learning_rate": 1.7581757414223462e-05, - "loss": 0.1013, + "epoch": 0.7254175338849688, + "grad_norm": 0.49795615673065186, + "learning_rate": 1.5163883107433543e-05, + "loss": 0.0686, "step": 14290 }, { - "epoch": 0.36286330752633583, - "grad_norm": 0.5835824012756348, - "learning_rate": 1.758091128315776e-05, - "loss": 0.132, + "epoch": 0.7256713538758313, + "grad_norm": 1.3748142719268799, + "learning_rate": 1.5162190974161127e-05, + "loss": 0.0906, "step": 14295 }, { - "epoch": 0.3629902271861911, - "grad_norm": 0.7480833530426025, - "learning_rate": 1.758006515209206e-05, - "loss": 0.1055, + "epoch": 0.7259251738666938, + "grad_norm": 0.409864604473114, + "learning_rate": 1.5160498840888709e-05, + "loss": 0.082, "step": 14300 }, { - "epoch": 0.36311714684604646, - "grad_norm": 0.8081189393997192, - "learning_rate": 1.7579219021026358e-05, - "loss": 0.1199, + "epoch": 0.7261789938575562, + "grad_norm": 0.4577494263648987, + "learning_rate": 1.5158806707616294e-05, + "loss": 0.0847, "step": 14305 }, { - "epoch": 0.36324406650590174, - "grad_norm": 0.7020203471183777, - "learning_rate": 1.7578372889960656e-05, - "loss": 0.114, + "epoch": 0.7264328138484187, + "grad_norm": 0.3561350703239441, + "learning_rate": 1.5157114574343876e-05, + "loss": 0.0878, "step": 14310 }, { - "epoch": 0.3633709861657571, - "grad_norm": 0.5521241426467896, - "learning_rate": 1.7577526758894954e-05, - "loss": 0.0972, + "epoch": 0.7266866338392812, + "grad_norm": 0.3978186547756195, + "learning_rate": 1.515542244107146e-05, + "loss": 0.0881, "step": 14315 }, { - "epoch": 0.36349790582561237, - "grad_norm": 0.8054701685905457, - "learning_rate": 1.7576680627829253e-05, - "loss": 0.1138, + "epoch": 0.7269404538301436, + "grad_norm": 0.2696934938430786, + "learning_rate": 1.5153730307799045e-05, + "loss": 0.0994, "step": 14320 }, { - "epoch": 0.3636248254854677, - "grad_norm": 1.2457228899002075, - "learning_rate": 1.757583449676355e-05, - "loss": 0.0975, + "epoch": 0.7271942738210061, + "grad_norm": 0.47918376326560974, + "learning_rate": 1.5152038174526626e-05, + "loss": 0.0808, "step": 14325 }, { - "epoch": 0.363751745145323, - "grad_norm": 0.4248066246509552, - "learning_rate": 1.7574988365697846e-05, - "loss": 0.1065, + "epoch": 0.7274480938118686, + "grad_norm": 0.39144304394721985, + "learning_rate": 1.5150346041254212e-05, + "loss": 0.0863, "step": 14330 }, { - "epoch": 0.36387866480517833, - "grad_norm": 0.60561203956604, - "learning_rate": 1.7574142234632145e-05, - "loss": 0.1329, + "epoch": 0.7277019138027311, + "grad_norm": 0.39072200655937195, + "learning_rate": 1.5148653907981793e-05, + "loss": 0.0732, "step": 14335 }, { - "epoch": 0.3640055844650336, - "grad_norm": 3.5951054096221924, - "learning_rate": 1.7573296103566443e-05, - "loss": 0.1164, + "epoch": 0.7279557337935936, + "grad_norm": 0.3815653324127197, + "learning_rate": 1.5146961774709377e-05, + "loss": 0.0839, "step": 14340 }, { - "epoch": 0.36413250412488896, - "grad_norm": 0.5549519658088684, - "learning_rate": 1.757244997250074e-05, - "loss": 0.122, + "epoch": 0.7282095537844561, + "grad_norm": 0.4559372067451477, + "learning_rate": 1.5145269641436962e-05, + "loss": 0.0841, "step": 14345 }, { - "epoch": 0.36425942378474424, - "grad_norm": 0.5737352967262268, - "learning_rate": 1.757160384143504e-05, - "loss": 0.1026, + "epoch": 0.7284633737753186, + "grad_norm": 0.42653796076774597, + "learning_rate": 1.5143577508164544e-05, + "loss": 0.0842, "step": 14350 }, { - "epoch": 0.3643863434445996, - "grad_norm": 1.81071138381958, - "learning_rate": 1.7570757710369338e-05, - "loss": 0.1091, + "epoch": 0.728717193766181, + "grad_norm": 0.4985381066799164, + "learning_rate": 1.5141885374892128e-05, + "loss": 0.0883, "step": 14355 }, { - "epoch": 0.36451326310445487, - "grad_norm": 0.6840774416923523, - "learning_rate": 1.7569911579303637e-05, - "loss": 0.1077, + "epoch": 0.7289710137570435, + "grad_norm": 0.5497139692306519, + "learning_rate": 1.5140193241619711e-05, + "loss": 0.0819, "step": 14360 }, { - "epoch": 0.3646401827643102, - "grad_norm": 0.5765891075134277, - "learning_rate": 1.7569065448237935e-05, - "loss": 0.1127, + "epoch": 0.729224833747906, + "grad_norm": 0.595309853553772, + "learning_rate": 1.5138501108347295e-05, + "loss": 0.0848, "step": 14365 }, { - "epoch": 0.3647671024241655, - "grad_norm": 0.4617384672164917, - "learning_rate": 1.756821931717223e-05, - "loss": 0.1123, + "epoch": 0.7294786537387684, + "grad_norm": 0.47207218408584595, + "learning_rate": 1.513680897507488e-05, + "loss": 0.0805, "step": 14370 }, { - "epoch": 0.36489402208402083, - "grad_norm": 0.7442850470542908, - "learning_rate": 1.756737318610653e-05, - "loss": 0.1427, + "epoch": 0.7297324737296309, + "grad_norm": 0.38911616802215576, + "learning_rate": 1.5135116841802462e-05, + "loss": 0.0855, "step": 14375 }, { - "epoch": 0.3650209417438761, - "grad_norm": 0.5659260749816895, - "learning_rate": 1.7566527055040827e-05, - "loss": 0.1098, + "epoch": 0.7299862937204934, + "grad_norm": 0.6578527092933655, + "learning_rate": 1.5133424708530045e-05, + "loss": 0.0821, "step": 14380 }, { - "epoch": 0.36514786140373146, - "grad_norm": 0.6455766558647156, - "learning_rate": 1.7565680923975125e-05, - "loss": 0.1354, + "epoch": 0.7302401137113559, + "grad_norm": 0.3873518705368042, + "learning_rate": 1.5131732575257629e-05, + "loss": 0.0787, "step": 14385 }, { - "epoch": 0.36527478106358674, - "grad_norm": 0.8708602786064148, - "learning_rate": 1.7564834792909424e-05, - "loss": 0.1102, + "epoch": 0.7304939337022184, + "grad_norm": 0.3727337718009949, + "learning_rate": 1.5130040441985212e-05, + "loss": 0.0711, "step": 14390 }, { - "epoch": 0.3654017007234421, - "grad_norm": 0.5405154228210449, - "learning_rate": 1.7563988661843722e-05, - "loss": 0.1103, + "epoch": 0.7307477536930809, + "grad_norm": 0.3558390140533447, + "learning_rate": 1.5128348308712794e-05, + "loss": 0.0633, "step": 14395 }, { - "epoch": 0.36552862038329736, - "grad_norm": 0.7596319913864136, - "learning_rate": 1.756314253077802e-05, - "loss": 0.1104, + "epoch": 0.7310015736839434, + "grad_norm": 0.44894295930862427, + "learning_rate": 1.512665617544038e-05, + "loss": 0.0775, "step": 14400 }, { - "epoch": 0.3656555400431527, - "grad_norm": 0.6760889291763306, - "learning_rate": 1.756229639971232e-05, - "loss": 0.0906, + "epoch": 0.7312553936748059, + "grad_norm": 0.44867026805877686, + "learning_rate": 1.5124964042167961e-05, + "loss": 0.0862, "step": 14405 }, { - "epoch": 0.365782459703008, - "grad_norm": 0.5328531861305237, - "learning_rate": 1.7561450268646614e-05, - "loss": 0.1286, + "epoch": 0.7315092136656683, + "grad_norm": 0.3655999004840851, + "learning_rate": 1.5123271908895545e-05, + "loss": 0.0851, "step": 14410 }, { - "epoch": 0.36590937936286333, - "grad_norm": 0.7702007293701172, - "learning_rate": 1.7560604137580912e-05, - "loss": 0.1021, + "epoch": 0.7317630336565308, + "grad_norm": 0.4013903737068176, + "learning_rate": 1.512157977562313e-05, + "loss": 0.0861, "step": 14415 }, { - "epoch": 0.3660362990227186, - "grad_norm": 0.6242198944091797, - "learning_rate": 1.755975800651521e-05, - "loss": 0.0953, + "epoch": 0.7320168536473932, + "grad_norm": 0.3827629089355469, + "learning_rate": 1.5119887642350712e-05, + "loss": 0.0884, "step": 14420 }, { - "epoch": 0.36616321868257395, - "grad_norm": 0.8385759592056274, - "learning_rate": 1.755891187544951e-05, - "loss": 0.1584, + "epoch": 0.7322706736382557, + "grad_norm": 0.48554348945617676, + "learning_rate": 1.5118195509078297e-05, + "loss": 0.0761, "step": 14425 }, { - "epoch": 0.36629013834242924, - "grad_norm": 0.9922245740890503, - "learning_rate": 1.7558065744383804e-05, - "loss": 0.1523, + "epoch": 0.7325244936291182, + "grad_norm": 0.3680451214313507, + "learning_rate": 1.5116503375805879e-05, + "loss": 0.0731, "step": 14430 }, { - "epoch": 0.3664170580022846, - "grad_norm": 0.8228939175605774, - "learning_rate": 1.7557219613318102e-05, - "loss": 0.1388, + "epoch": 0.7327783136199807, + "grad_norm": 0.47662273049354553, + "learning_rate": 1.5114811242533463e-05, + "loss": 0.0814, "step": 14435 }, { - "epoch": 0.36654397766213986, - "grad_norm": 0.5923588871955872, - "learning_rate": 1.75563734822524e-05, - "loss": 0.1217, + "epoch": 0.7330321336108432, + "grad_norm": 0.4592245817184448, + "learning_rate": 1.5113119109261048e-05, + "loss": 0.0804, "step": 14440 }, { - "epoch": 0.3666708973219952, - "grad_norm": 0.8679959177970886, - "learning_rate": 1.75555273511867e-05, - "loss": 0.1195, + "epoch": 0.7332859536017057, + "grad_norm": 0.4146338701248169, + "learning_rate": 1.511142697598863e-05, + "loss": 0.0902, "step": 14445 }, { - "epoch": 0.3667978169818505, - "grad_norm": 0.5328546166419983, - "learning_rate": 1.7554681220120998e-05, - "loss": 0.1025, + "epoch": 0.7335397735925682, + "grad_norm": 0.4722953736782074, + "learning_rate": 1.5109734842716213e-05, + "loss": 0.0863, "step": 14450 }, { - "epoch": 0.3669247366417058, - "grad_norm": 0.9213349223136902, - "learning_rate": 1.7553835089055296e-05, - "loss": 0.1102, + "epoch": 0.7337935935834307, + "grad_norm": 0.7869719862937927, + "learning_rate": 1.5108042709443797e-05, + "loss": 0.0794, "step": 14455 }, { - "epoch": 0.3670516563015611, - "grad_norm": 0.5100712776184082, - "learning_rate": 1.7552988957989594e-05, - "loss": 0.1132, + "epoch": 0.7340474135742932, + "grad_norm": 0.4236357808113098, + "learning_rate": 1.510635057617138e-05, + "loss": 0.0723, "step": 14460 }, { - "epoch": 0.3671785759614164, - "grad_norm": 0.8430976271629333, - "learning_rate": 1.7552142826923893e-05, - "loss": 0.1022, + "epoch": 0.7343012335651556, + "grad_norm": 0.38017693161964417, + "learning_rate": 1.5104658442898962e-05, + "loss": 0.0717, "step": 14465 }, { - "epoch": 0.36730549562127174, - "grad_norm": 0.6529316306114197, - "learning_rate": 1.755129669585819e-05, - "loss": 0.1222, + "epoch": 0.734555053556018, + "grad_norm": 0.3802855908870697, + "learning_rate": 1.5102966309626547e-05, + "loss": 0.0795, "step": 14470 }, { - "epoch": 0.367432415281127, - "grad_norm": 0.7540631890296936, - "learning_rate": 1.7550450564792486e-05, - "loss": 0.1129, + "epoch": 0.7348088735468805, + "grad_norm": 0.506759762763977, + "learning_rate": 1.5101274176354131e-05, + "loss": 0.0783, "step": 14475 }, { - "epoch": 0.36755933494098236, - "grad_norm": 0.40097033977508545, - "learning_rate": 1.7549604433726785e-05, - "loss": 0.0992, + "epoch": 0.735062693537743, + "grad_norm": 0.6665279269218445, + "learning_rate": 1.5099582043081715e-05, + "loss": 0.0778, "step": 14480 }, { - "epoch": 0.36768625460083765, - "grad_norm": 0.4612237513065338, - "learning_rate": 1.7548758302661083e-05, - "loss": 0.1274, + "epoch": 0.7353165135286055, + "grad_norm": 0.5124176144599915, + "learning_rate": 1.5097889909809298e-05, + "loss": 0.0895, "step": 14485 }, { - "epoch": 0.367813174260693, - "grad_norm": 0.593195378780365, - "learning_rate": 1.754791217159538e-05, - "loss": 0.1157, + "epoch": 0.735570333519468, + "grad_norm": 0.3736747205257416, + "learning_rate": 1.509619777653688e-05, + "loss": 0.0754, "step": 14490 }, { - "epoch": 0.3679400939205483, - "grad_norm": 0.6711913347244263, - "learning_rate": 1.754706604052968e-05, - "loss": 0.1491, + "epoch": 0.7358241535103305, + "grad_norm": 0.5663830637931824, + "learning_rate": 1.5094505643264465e-05, + "loss": 0.0744, "step": 14495 }, { - "epoch": 0.3680670135804036, - "grad_norm": 0.435430645942688, - "learning_rate": 1.754621990946398e-05, - "loss": 0.1377, + "epoch": 0.736077973501193, + "grad_norm": 0.45994728803634644, + "learning_rate": 1.5092813509992049e-05, + "loss": 0.0805, "step": 14500 }, { - "epoch": 0.3681939332402589, - "grad_norm": 0.7123911380767822, - "learning_rate": 1.7545373778398277e-05, - "loss": 0.1048, + "epoch": 0.7363317934920555, + "grad_norm": 0.4209723472595215, + "learning_rate": 1.509112137671963e-05, + "loss": 0.07, "step": 14505 }, { - "epoch": 0.36832085290011424, - "grad_norm": 0.6449508666992188, - "learning_rate": 1.7544527647332575e-05, - "loss": 0.1248, + "epoch": 0.736585613482918, + "grad_norm": 0.3272269666194916, + "learning_rate": 1.5089429243447216e-05, + "loss": 0.0657, "step": 14510 }, { - "epoch": 0.3684477725599695, - "grad_norm": 1.1124721765518188, - "learning_rate": 1.754368151626687e-05, - "loss": 0.1361, + "epoch": 0.7368394334737804, + "grad_norm": 0.4161807894706726, + "learning_rate": 1.5087737110174798e-05, + "loss": 0.0726, "step": 14515 }, { - "epoch": 0.36857469221982486, - "grad_norm": 0.8066892623901367, - "learning_rate": 1.754283538520117e-05, - "loss": 0.1353, + "epoch": 0.7370932534646428, + "grad_norm": 0.43482705950737, + "learning_rate": 1.5086044976902383e-05, + "loss": 0.0788, "step": 14520 }, { - "epoch": 0.36870161187968015, - "grad_norm": 0.6676443815231323, - "learning_rate": 1.7541989254135467e-05, - "loss": 0.1047, + "epoch": 0.7373470734555053, + "grad_norm": 0.5974509716033936, + "learning_rate": 1.5084352843629966e-05, + "loss": 0.0749, "step": 14525 }, { - "epoch": 0.3688285315395355, - "grad_norm": 0.5298932194709778, - "learning_rate": 1.7541143123069765e-05, - "loss": 0.1149, + "epoch": 0.7376008934463678, + "grad_norm": 0.5275264978408813, + "learning_rate": 1.5082660710357548e-05, + "loss": 0.0762, "step": 14530 }, { - "epoch": 0.36895545119939077, - "grad_norm": 0.7102766036987305, - "learning_rate": 1.7540296992004064e-05, - "loss": 0.1434, + "epoch": 0.7378547134372303, + "grad_norm": 0.4393943250179291, + "learning_rate": 1.5080968577085134e-05, + "loss": 0.0831, "step": 14535 }, { - "epoch": 0.3690823708592461, - "grad_norm": 0.63113933801651, - "learning_rate": 1.7539450860938362e-05, - "loss": 0.1179, + "epoch": 0.7381085334280928, + "grad_norm": 0.3507572114467621, + "learning_rate": 1.5079276443812715e-05, + "loss": 0.0859, "step": 14540 }, { - "epoch": 0.3692092905191014, - "grad_norm": 1.7055164575576782, - "learning_rate": 1.753860472987266e-05, - "loss": 0.1205, + "epoch": 0.7383623534189553, + "grad_norm": 0.3876013457775116, + "learning_rate": 1.5077584310540299e-05, + "loss": 0.0779, "step": 14545 }, { - "epoch": 0.36933621017895674, - "grad_norm": 0.39538678526878357, - "learning_rate": 1.753775859880696e-05, - "loss": 0.1002, + "epoch": 0.7386161734098178, + "grad_norm": 0.37205737829208374, + "learning_rate": 1.5075892177267884e-05, + "loss": 0.0737, "step": 14550 }, { - "epoch": 0.369463129838812, - "grad_norm": 0.6789305806159973, - "learning_rate": 1.7536912467741254e-05, - "loss": 0.1121, + "epoch": 0.7388699934006803, + "grad_norm": 0.4882403314113617, + "learning_rate": 1.5074200043995466e-05, + "loss": 0.0878, "step": 14555 }, { - "epoch": 0.36959004949866736, - "grad_norm": 0.8020620346069336, - "learning_rate": 1.7536066336675552e-05, - "loss": 0.1116, + "epoch": 0.7391238133915428, + "grad_norm": 0.5520374774932861, + "learning_rate": 1.507250791072305e-05, + "loss": 0.0966, "step": 14560 }, { - "epoch": 0.36971696915852265, - "grad_norm": 0.6516659259796143, - "learning_rate": 1.753522020560985e-05, - "loss": 0.1082, + "epoch": 0.7393776333824053, + "grad_norm": 0.367116779088974, + "learning_rate": 1.5070815777450633e-05, + "loss": 0.0819, "step": 14565 }, { - "epoch": 0.369843888818378, - "grad_norm": 0.808914303779602, - "learning_rate": 1.753437407454415e-05, - "loss": 0.0978, + "epoch": 0.7396314533732676, + "grad_norm": 0.4586057960987091, + "learning_rate": 1.5069123644178217e-05, + "loss": 0.0776, "step": 14570 }, { - "epoch": 0.36997080847823327, - "grad_norm": 0.8734528422355652, - "learning_rate": 1.7533527943478444e-05, - "loss": 0.0852, + "epoch": 0.7398852733641301, + "grad_norm": 0.4211220145225525, + "learning_rate": 1.5067431510905802e-05, + "loss": 0.0726, "step": 14575 }, { - "epoch": 0.3700977281380886, - "grad_norm": 0.4880116283893585, - "learning_rate": 1.7532681812412743e-05, - "loss": 0.1119, + "epoch": 0.7401390933549926, + "grad_norm": 0.4111827611923218, + "learning_rate": 1.5065739377633384e-05, + "loss": 0.0803, "step": 14580 }, { - "epoch": 0.3702246477979439, - "grad_norm": 0.7761018872261047, - "learning_rate": 1.753183568134704e-05, - "loss": 0.1082, + "epoch": 0.7403929133458551, + "grad_norm": 0.49343472719192505, + "learning_rate": 1.5064047244360966e-05, + "loss": 0.0835, "step": 14585 }, { - "epoch": 0.37035156745779924, - "grad_norm": 0.9680399894714355, - "learning_rate": 1.753098955028134e-05, - "loss": 0.1131, + "epoch": 0.7406467333367176, + "grad_norm": 0.3709668815135956, + "learning_rate": 1.506235511108855e-05, + "loss": 0.0845, "step": 14590 }, { - "epoch": 0.3704784871176545, - "grad_norm": 0.4843808710575104, - "learning_rate": 1.7530143419215638e-05, - "loss": 0.1016, + "epoch": 0.7409005533275801, + "grad_norm": 0.45862144231796265, + "learning_rate": 1.5060662977816134e-05, + "loss": 0.0886, "step": 14595 }, { - "epoch": 0.37060540677750986, - "grad_norm": 0.6354820728302002, - "learning_rate": 1.7529297288149936e-05, - "loss": 0.1023, + "epoch": 0.7411543733184426, + "grad_norm": 0.4751611351966858, + "learning_rate": 1.5058970844543716e-05, + "loss": 0.0785, "step": 14600 }, { - "epoch": 0.37073232643736515, - "grad_norm": 0.9325487613677979, - "learning_rate": 1.7528451157084235e-05, - "loss": 0.1223, + "epoch": 0.7414081933093051, + "grad_norm": 0.302118718624115, + "learning_rate": 1.5057278711271301e-05, + "loss": 0.0766, "step": 14605 }, { - "epoch": 0.3708592460972205, - "grad_norm": 0.8607824444770813, - "learning_rate": 1.7527605026018533e-05, - "loss": 0.1228, + "epoch": 0.7416620133001676, + "grad_norm": 0.5119585990905762, + "learning_rate": 1.5055586577998883e-05, + "loss": 0.0839, "step": 14610 }, { - "epoch": 0.37098616575707577, - "grad_norm": 0.41307884454727173, - "learning_rate": 1.7526758894952828e-05, - "loss": 0.1052, + "epoch": 0.74191583329103, + "grad_norm": 0.3752945363521576, + "learning_rate": 1.5053894444726469e-05, + "loss": 0.076, "step": 14615 }, { - "epoch": 0.3711130854169311, - "grad_norm": 0.8660783171653748, - "learning_rate": 1.7525912763887126e-05, - "loss": 0.1019, + "epoch": 0.7421696532818924, + "grad_norm": 0.5176377296447754, + "learning_rate": 1.5052202311454052e-05, + "loss": 0.0817, "step": 14620 }, { - "epoch": 0.3712400050767864, - "grad_norm": 0.4363076686859131, - "learning_rate": 1.7525066632821425e-05, - "loss": 0.1297, + "epoch": 0.7424234732727549, + "grad_norm": 0.44988998770713806, + "learning_rate": 1.5050510178181634e-05, + "loss": 0.0824, "step": 14625 }, { - "epoch": 0.3713669247366417, - "grad_norm": 0.49201226234436035, - "learning_rate": 1.7524220501755723e-05, - "loss": 0.1319, + "epoch": 0.7426772932636174, + "grad_norm": 0.3214319348335266, + "learning_rate": 1.504881804490922e-05, + "loss": 0.0744, "step": 14630 }, { - "epoch": 0.371493844396497, - "grad_norm": 0.4526978135108948, - "learning_rate": 1.752337437069002e-05, - "loss": 0.1145, + "epoch": 0.7429311132544799, + "grad_norm": 0.5503107905387878, + "learning_rate": 1.5047125911636801e-05, + "loss": 0.0731, "step": 14635 }, { - "epoch": 0.3716207640563523, - "grad_norm": 0.5607075691223145, - "learning_rate": 1.752252823962432e-05, - "loss": 0.1161, + "epoch": 0.7431849332453424, + "grad_norm": 1.3438857793807983, + "learning_rate": 1.5045433778364385e-05, + "loss": 0.0832, "step": 14640 }, { - "epoch": 0.37174768371620764, - "grad_norm": 0.8384544253349304, - "learning_rate": 1.752168210855862e-05, - "loss": 0.1261, + "epoch": 0.7434387532362049, + "grad_norm": 0.447172075510025, + "learning_rate": 1.504374164509197e-05, + "loss": 0.072, "step": 14645 }, { - "epoch": 0.37187460337606293, - "grad_norm": 0.7503822445869446, - "learning_rate": 1.7520835977492917e-05, - "loss": 0.1661, + "epoch": 0.7436925732270674, + "grad_norm": 0.39440739154815674, + "learning_rate": 1.5042049511819552e-05, + "loss": 0.0921, "step": 14650 }, { - "epoch": 0.37200152303591827, - "grad_norm": 0.5378701090812683, - "learning_rate": 1.7519989846427212e-05, - "loss": 0.1319, + "epoch": 0.7439463932179299, + "grad_norm": 0.39479300379753113, + "learning_rate": 1.5040357378547135e-05, + "loss": 0.0791, "step": 14655 }, { - "epoch": 0.37212844269577355, - "grad_norm": 0.6000733971595764, - "learning_rate": 1.751914371536151e-05, - "loss": 0.1267, + "epoch": 0.7442002132087924, + "grad_norm": 0.5535449385643005, + "learning_rate": 1.5038665245274719e-05, + "loss": 0.0776, "step": 14660 }, { - "epoch": 0.3722553623556289, - "grad_norm": 0.701710045337677, - "learning_rate": 1.751829758429581e-05, - "loss": 0.1211, + "epoch": 0.7444540331996548, + "grad_norm": 0.3662354648113251, + "learning_rate": 1.5036973112002302e-05, + "loss": 0.0746, "step": 14665 }, { - "epoch": 0.3723822820154842, - "grad_norm": 0.4501943588256836, - "learning_rate": 1.7517451453230107e-05, - "loss": 0.1128, + "epoch": 0.7447078531905172, + "grad_norm": 0.34598585963249207, + "learning_rate": 1.5035280978729888e-05, + "loss": 0.0781, "step": 14670 }, { - "epoch": 0.3725092016753395, - "grad_norm": 0.42577624320983887, - "learning_rate": 1.7516605322164405e-05, - "loss": 0.1196, + "epoch": 0.7449616731813797, + "grad_norm": 0.43272921442985535, + "learning_rate": 1.503358884545747e-05, + "loss": 0.0801, "step": 14675 }, { - "epoch": 0.3726361213351948, - "grad_norm": 0.6056503057479858, - "learning_rate": 1.7515759191098704e-05, - "loss": 0.1189, + "epoch": 0.7452154931722422, + "grad_norm": 0.3881741464138031, + "learning_rate": 1.5031896712185053e-05, + "loss": 0.0823, "step": 14680 }, { - "epoch": 0.37276304099505014, - "grad_norm": 0.711727499961853, - "learning_rate": 1.7514913060033002e-05, - "loss": 0.1189, + "epoch": 0.7454693131631047, + "grad_norm": 0.40420830249786377, + "learning_rate": 1.5030204578912636e-05, + "loss": 0.0881, "step": 14685 }, { - "epoch": 0.37288996065490543, - "grad_norm": 0.8229162096977234, - "learning_rate": 1.75140669289673e-05, - "loss": 0.0993, + "epoch": 0.7457231331539672, + "grad_norm": 0.6293859481811523, + "learning_rate": 1.502851244564022e-05, + "loss": 0.0792, "step": 14690 }, { - "epoch": 0.37301688031476077, - "grad_norm": 0.5471954941749573, - "learning_rate": 1.7513220797901596e-05, - "loss": 0.1022, + "epoch": 0.7459769531448297, + "grad_norm": 0.6415058970451355, + "learning_rate": 1.5026820312367802e-05, + "loss": 0.0787, "step": 14695 }, { - "epoch": 0.37314379997461605, - "grad_norm": 0.6130200028419495, - "learning_rate": 1.7512374666835894e-05, - "loss": 0.1102, + "epoch": 0.7462307731356922, + "grad_norm": 0.9900127053260803, + "learning_rate": 1.5025128179095387e-05, + "loss": 0.08, "step": 14700 }, { - "epoch": 0.3732707196344714, - "grad_norm": 0.4465831518173218, - "learning_rate": 1.7511528535770192e-05, - "loss": 0.1062, + "epoch": 0.7464845931265547, + "grad_norm": 0.39871150255203247, + "learning_rate": 1.502343604582297e-05, + "loss": 0.0727, "step": 14705 }, { - "epoch": 0.3733976392943267, - "grad_norm": 0.9054153561592102, - "learning_rate": 1.751068240470449e-05, - "loss": 0.119, + "epoch": 0.7467384131174172, + "grad_norm": 0.46820712089538574, + "learning_rate": 1.5021743912550553e-05, + "loss": 0.0884, "step": 14710 }, { - "epoch": 0.373524558954182, - "grad_norm": 0.6649287343025208, - "learning_rate": 1.7509836273638786e-05, - "loss": 0.1194, + "epoch": 0.7469922331082796, + "grad_norm": 0.8658115267753601, + "learning_rate": 1.5020051779278138e-05, + "loss": 0.0752, "step": 14715 }, { - "epoch": 0.3736514786140373, - "grad_norm": 0.5919955372810364, - "learning_rate": 1.7508990142573084e-05, - "loss": 0.1026, + "epoch": 0.747246053099142, + "grad_norm": 0.5432624220848083, + "learning_rate": 1.501835964600572e-05, + "loss": 0.072, "step": 14720 }, { - "epoch": 0.37377839827389264, - "grad_norm": 0.7862746715545654, - "learning_rate": 1.7508144011507383e-05, - "loss": 0.1093, + "epoch": 0.7474998730900045, + "grad_norm": 0.3773350119590759, + "learning_rate": 1.5016667512733305e-05, + "loss": 0.07, "step": 14725 }, { - "epoch": 0.3739053179337479, - "grad_norm": 0.7232267260551453, - "learning_rate": 1.750729788044168e-05, - "loss": 0.1371, + "epoch": 0.747753693080867, + "grad_norm": 0.4341411292552948, + "learning_rate": 1.5014975379460888e-05, + "loss": 0.0742, "step": 14730 }, { - "epoch": 0.37403223759360327, - "grad_norm": 1.4160761833190918, - "learning_rate": 1.750645174937598e-05, - "loss": 0.1036, + "epoch": 0.7480075130717295, + "grad_norm": 0.5234106183052063, + "learning_rate": 1.501328324618847e-05, + "loss": 0.0716, "step": 14735 }, { - "epoch": 0.37415915725345855, - "grad_norm": 0.6545923352241516, - "learning_rate": 1.7505605618310278e-05, - "loss": 0.0982, + "epoch": 0.748261333062592, + "grad_norm": 0.7145912051200867, + "learning_rate": 1.5011591112916055e-05, + "loss": 0.0841, "step": 14740 }, { - "epoch": 0.3742860769133139, - "grad_norm": 0.3786046802997589, - "learning_rate": 1.7504759487244576e-05, - "loss": 0.102, + "epoch": 0.7485151530534545, + "grad_norm": 0.42830368876457214, + "learning_rate": 1.5009898979643637e-05, + "loss": 0.075, "step": 14745 }, { - "epoch": 0.3744129965731692, - "grad_norm": 0.5414413213729858, - "learning_rate": 1.7503913356178875e-05, - "loss": 0.1126, + "epoch": 0.748768973044317, + "grad_norm": 0.4660949409008026, + "learning_rate": 1.5008206846371221e-05, + "loss": 0.0733, "step": 14750 }, { - "epoch": 0.3745399162330245, - "grad_norm": 0.5048717260360718, - "learning_rate": 1.750306722511317e-05, - "loss": 0.0896, + "epoch": 0.7490227930351795, + "grad_norm": 0.34171634912490845, + "learning_rate": 1.5006514713098806e-05, + "loss": 0.0887, "step": 14755 }, { - "epoch": 0.3746668358928798, - "grad_norm": 2.0069148540496826, - "learning_rate": 1.7502221094047468e-05, - "loss": 0.1181, + "epoch": 0.749276613026042, + "grad_norm": 0.8460459113121033, + "learning_rate": 1.5004822579826388e-05, + "loss": 0.0861, "step": 14760 }, { - "epoch": 0.37479375555273514, - "grad_norm": 0.6645435094833374, - "learning_rate": 1.7501374962981767e-05, - "loss": 0.0832, + "epoch": 0.7495304330169044, + "grad_norm": 0.5516984462738037, + "learning_rate": 1.5003130446553973e-05, + "loss": 0.081, "step": 14765 }, { - "epoch": 0.3749206752125904, - "grad_norm": 0.6181977391242981, - "learning_rate": 1.7500528831916065e-05, - "loss": 0.1042, + "epoch": 0.7497842530077669, + "grad_norm": 0.4025214910507202, + "learning_rate": 1.5001438313281555e-05, + "loss": 0.0787, "step": 14770 }, { - "epoch": 0.37504759487244577, - "grad_norm": 0.5061550736427307, - "learning_rate": 1.7499682700850363e-05, - "loss": 0.1061, + "epoch": 0.7500380729986293, + "grad_norm": 0.44463062286376953, + "learning_rate": 1.4999746180009139e-05, + "loss": 0.0833, "step": 14775 }, { - "epoch": 0.37517451453230105, - "grad_norm": 2.2500369548797607, - "learning_rate": 1.7498836569784662e-05, - "loss": 0.1196, + "epoch": 0.7502918929894918, + "grad_norm": 0.5166744589805603, + "learning_rate": 1.4998054046736724e-05, + "loss": 0.084, "step": 14780 }, { - "epoch": 0.3753014341921564, - "grad_norm": 0.7525805830955505, - "learning_rate": 1.749799043871896e-05, - "loss": 0.1085, + "epoch": 0.7505457129803543, + "grad_norm": 0.5265631079673767, + "learning_rate": 1.4996361913464306e-05, + "loss": 0.0787, "step": 14785 }, { - "epoch": 0.3754283538520117, - "grad_norm": 0.5497461557388306, - "learning_rate": 1.749714430765326e-05, - "loss": 0.1155, + "epoch": 0.7507995329712168, + "grad_norm": 0.45194345712661743, + "learning_rate": 1.4994669780191888e-05, + "loss": 0.0765, "step": 14790 }, { - "epoch": 0.37555527351186696, - "grad_norm": 0.536488950252533, - "learning_rate": 1.7496298176587554e-05, - "loss": 0.1227, + "epoch": 0.7510533529620793, + "grad_norm": 0.4983544945716858, + "learning_rate": 1.4992977646919473e-05, + "loss": 0.0803, "step": 14795 }, { - "epoch": 0.3756821931717223, - "grad_norm": 0.5499987006187439, - "learning_rate": 1.7495452045521852e-05, - "loss": 0.1046, + "epoch": 0.7513071729529418, + "grad_norm": 0.4568130075931549, + "learning_rate": 1.4991285513647056e-05, + "loss": 0.0702, "step": 14800 }, { - "epoch": 0.3758091128315776, - "grad_norm": 0.672492504119873, - "learning_rate": 1.749460591445615e-05, - "loss": 0.101, + "epoch": 0.7515609929438043, + "grad_norm": 0.2652099132537842, + "learning_rate": 1.4989593380374638e-05, + "loss": 0.0802, "step": 14805 }, { - "epoch": 0.3759360324914329, - "grad_norm": 0.8735315203666687, - "learning_rate": 1.749375978339045e-05, - "loss": 0.1292, + "epoch": 0.7518148129346668, + "grad_norm": 0.40893828868865967, + "learning_rate": 1.4987901247102223e-05, + "loss": 0.0747, "step": 14810 }, { - "epoch": 0.3760629521512882, - "grad_norm": 0.40938737988471985, - "learning_rate": 1.7492913652324747e-05, - "loss": 0.1046, + "epoch": 0.7520686329255292, + "grad_norm": 0.8311263918876648, + "learning_rate": 1.4986209113829805e-05, + "loss": 0.0787, "step": 14815 }, { - "epoch": 0.37618987181114355, - "grad_norm": 2.281613349914551, - "learning_rate": 1.7492067521259046e-05, - "loss": 0.0932, + "epoch": 0.7523224529163917, + "grad_norm": 0.3804740905761719, + "learning_rate": 1.498451698055739e-05, + "loss": 0.0737, "step": 14820 }, { - "epoch": 0.37631679147099883, - "grad_norm": 0.6251809000968933, - "learning_rate": 1.7491221390193344e-05, - "loss": 0.1246, + "epoch": 0.7525762729072542, + "grad_norm": 0.3137752115726471, + "learning_rate": 1.4982824847284974e-05, + "loss": 0.0749, "step": 14825 }, { - "epoch": 0.3764437111308542, - "grad_norm": 0.40185678005218506, - "learning_rate": 1.7490375259127642e-05, - "loss": 0.102, + "epoch": 0.7528300928981166, + "grad_norm": 0.3906417191028595, + "learning_rate": 1.4981132714012556e-05, + "loss": 0.0833, "step": 14830 }, { - "epoch": 0.37657063079070946, - "grad_norm": 0.5938499569892883, - "learning_rate": 1.7489529128061937e-05, - "loss": 0.1187, + "epoch": 0.7530839128889791, + "grad_norm": 0.6462694406509399, + "learning_rate": 1.4979440580740141e-05, + "loss": 0.0784, "step": 14835 }, { - "epoch": 0.3766975504505648, - "grad_norm": 0.5414707064628601, - "learning_rate": 1.7488682996996236e-05, - "loss": 0.0978, + "epoch": 0.7533377328798416, + "grad_norm": 0.5136402249336243, + "learning_rate": 1.4977748447467723e-05, + "loss": 0.0754, "step": 14840 }, { - "epoch": 0.3768244701104201, - "grad_norm": 0.823503315448761, - "learning_rate": 1.7487836865930534e-05, - "loss": 0.1054, + "epoch": 0.7535915528707041, + "grad_norm": 0.5540376305580139, + "learning_rate": 1.4976056314195307e-05, + "loss": 0.0796, "step": 14845 }, { - "epoch": 0.3769513897702754, - "grad_norm": 0.6832050681114197, - "learning_rate": 1.7486990734864833e-05, - "loss": 0.1257, + "epoch": 0.7538453728615666, + "grad_norm": 0.4849870800971985, + "learning_rate": 1.4974364180922892e-05, + "loss": 0.0757, "step": 14850 }, { - "epoch": 0.3770783094301307, - "grad_norm": 0.5470040440559387, - "learning_rate": 1.7486144603799128e-05, - "loss": 0.1065, + "epoch": 0.7540991928524291, + "grad_norm": 0.739065408706665, + "learning_rate": 1.4972672047650474e-05, + "loss": 0.0847, "step": 14855 }, { - "epoch": 0.37720522908998605, - "grad_norm": 0.46596699953079224, - "learning_rate": 1.7485298472733426e-05, - "loss": 0.1206, + "epoch": 0.7543530128432916, + "grad_norm": 0.5149842500686646, + "learning_rate": 1.4970979914378057e-05, + "loss": 0.0752, "step": 14860 }, { - "epoch": 0.37733214874984133, - "grad_norm": 0.5975885987281799, - "learning_rate": 1.7484452341667724e-05, - "loss": 0.1083, + "epoch": 0.754606832834154, + "grad_norm": 0.42165669798851013, + "learning_rate": 1.496928778110564e-05, + "loss": 0.0831, "step": 14865 }, { - "epoch": 0.3774590684096967, - "grad_norm": 0.7016924619674683, - "learning_rate": 1.7483606210602023e-05, - "loss": 0.1078, + "epoch": 0.7548606528250165, + "grad_norm": 0.49979931116104126, + "learning_rate": 1.4967595647833224e-05, + "loss": 0.0733, "step": 14870 }, { - "epoch": 0.37758598806955196, - "grad_norm": 0.5413151383399963, - "learning_rate": 1.748276007953632e-05, - "loss": 0.0877, + "epoch": 0.755114472815879, + "grad_norm": 0.49712690711021423, + "learning_rate": 1.496590351456081e-05, + "loss": 0.0803, "step": 14875 }, { - "epoch": 0.3777129077294073, - "grad_norm": 0.6274673938751221, - "learning_rate": 1.748191394847062e-05, - "loss": 0.1469, + "epoch": 0.7553682928067414, + "grad_norm": 0.4191552400588989, + "learning_rate": 1.4964211381288391e-05, + "loss": 0.0834, "step": 14880 }, { - "epoch": 0.3778398273892626, - "grad_norm": 0.5119305849075317, - "learning_rate": 1.7481067817404918e-05, - "loss": 0.1271, + "epoch": 0.7556221127976039, + "grad_norm": 0.3019554316997528, + "learning_rate": 1.4962519248015975e-05, + "loss": 0.0778, "step": 14885 }, { - "epoch": 0.3779667470491179, - "grad_norm": 0.49654003977775574, - "learning_rate": 1.7480221686339216e-05, - "loss": 0.1193, + "epoch": 0.7558759327884664, + "grad_norm": 0.5018486380577087, + "learning_rate": 1.4960827114743558e-05, + "loss": 0.0766, "step": 14890 }, { - "epoch": 0.3780936667089732, - "grad_norm": 0.4709921181201935, - "learning_rate": 1.747937555527351e-05, - "loss": 0.0815, + "epoch": 0.7561297527793289, + "grad_norm": 0.33077868819236755, + "learning_rate": 1.4959134981471142e-05, + "loss": 0.0752, "step": 14895 }, { - "epoch": 0.37822058636882855, - "grad_norm": 0.9998751878738403, - "learning_rate": 1.747852942420781e-05, - "loss": 0.1037, + "epoch": 0.7563835727701914, + "grad_norm": 0.54078209400177, + "learning_rate": 1.4957442848198724e-05, + "loss": 0.0796, "step": 14900 }, { - "epoch": 0.37834750602868383, - "grad_norm": 0.46847447752952576, - "learning_rate": 1.7477683293142108e-05, - "loss": 0.0877, + "epoch": 0.7566373927610539, + "grad_norm": 0.4296625554561615, + "learning_rate": 1.4955750714926309e-05, + "loss": 0.0742, "step": 14905 }, { - "epoch": 0.3784744256885392, - "grad_norm": 0.8643396496772766, - "learning_rate": 1.7476837162076407e-05, - "loss": 0.0947, + "epoch": 0.7568912127519163, + "grad_norm": 0.33885428309440613, + "learning_rate": 1.4954058581653893e-05, + "loss": 0.0722, "step": 14910 }, { - "epoch": 0.37860134534839446, - "grad_norm": 0.7323237061500549, - "learning_rate": 1.7475991031010705e-05, - "loss": 0.1057, + "epoch": 0.7571450327427788, + "grad_norm": 0.4666841924190521, + "learning_rate": 1.4952366448381476e-05, + "loss": 0.0831, "step": 14915 }, { - "epoch": 0.3787282650082498, - "grad_norm": 0.6113660931587219, - "learning_rate": 1.7475144899945003e-05, - "loss": 0.102, + "epoch": 0.7573988527336413, + "grad_norm": 0.45907166600227356, + "learning_rate": 1.495067431510906e-05, + "loss": 0.0714, "step": 14920 }, { - "epoch": 0.3788551846681051, - "grad_norm": 0.45277413725852966, - "learning_rate": 1.7474298768879302e-05, - "loss": 0.1117, + "epoch": 0.7576526727245038, + "grad_norm": 0.32929760217666626, + "learning_rate": 1.4948982181836642e-05, + "loss": 0.0708, "step": 14925 }, { - "epoch": 0.3789821043279604, - "grad_norm": 0.62168949842453, - "learning_rate": 1.74734526378136e-05, - "loss": 0.1058, + "epoch": 0.7579064927153663, + "grad_norm": 0.4375610649585724, + "learning_rate": 1.4947290048564227e-05, + "loss": 0.0753, "step": 14930 }, { - "epoch": 0.3791090239878157, - "grad_norm": 0.6537298560142517, - "learning_rate": 1.7472606506747895e-05, - "loss": 0.1351, + "epoch": 0.7581603127062287, + "grad_norm": 0.32175201177597046, + "learning_rate": 1.494559791529181e-05, + "loss": 0.0816, "step": 14935 }, { - "epoch": 0.37923594364767105, - "grad_norm": 0.9718519449234009, - "learning_rate": 1.7471760375682194e-05, - "loss": 0.1179, + "epoch": 0.7584141326970912, + "grad_norm": 0.31863266229629517, + "learning_rate": 1.4943905782019392e-05, + "loss": 0.0768, "step": 14940 }, { - "epoch": 0.37936286330752633, - "grad_norm": 0.4267071485519409, - "learning_rate": 1.7470914244616492e-05, - "loss": 0.1096, + "epoch": 0.7586679526879537, + "grad_norm": 0.5156002640724182, + "learning_rate": 1.4942213648746977e-05, + "loss": 0.0784, "step": 14945 }, { - "epoch": 0.3794897829673817, - "grad_norm": 1.4345922470092773, - "learning_rate": 1.747006811355079e-05, - "loss": 0.1147, + "epoch": 0.7589217726788162, + "grad_norm": 0.3342384696006775, + "learning_rate": 1.494052151547456e-05, + "loss": 0.0766, "step": 14950 }, { - "epoch": 0.37961670262723696, - "grad_norm": 0.5857569575309753, - "learning_rate": 1.746922198248509e-05, - "loss": 0.1141, + "epoch": 0.7591755926696787, + "grad_norm": 0.3848695456981659, + "learning_rate": 1.4938829382202143e-05, + "loss": 0.0882, "step": 14955 }, { - "epoch": 0.3797436222870923, - "grad_norm": 0.6260550022125244, - "learning_rate": 1.7468375851419387e-05, - "loss": 0.127, + "epoch": 0.7594294126605411, + "grad_norm": 0.32741138339042664, + "learning_rate": 1.4937137248929728e-05, + "loss": 0.0804, "step": 14960 }, { - "epoch": 0.3798705419469476, - "grad_norm": 0.6482638120651245, - "learning_rate": 1.7467529720353686e-05, - "loss": 0.1122, + "epoch": 0.7596832326514036, + "grad_norm": 0.4225481152534485, + "learning_rate": 1.493544511565731e-05, + "loss": 0.0775, "step": 14965 }, { - "epoch": 0.37999746160680287, - "grad_norm": 0.42738810181617737, - "learning_rate": 1.7466683589287984e-05, - "loss": 0.1106, + "epoch": 0.7599370526422661, + "grad_norm": 0.5659987926483154, + "learning_rate": 1.4933752982384895e-05, + "loss": 0.09, "step": 14970 }, { - "epoch": 0.3801243812666582, - "grad_norm": 0.5085273385047913, - "learning_rate": 1.7465837458222282e-05, - "loss": 0.093, + "epoch": 0.7601908726331286, + "grad_norm": 0.46714866161346436, + "learning_rate": 1.4932060849112477e-05, + "loss": 0.076, "step": 14975 }, { - "epoch": 0.3802513009265135, - "grad_norm": 0.7200028300285339, - "learning_rate": 1.7464991327156577e-05, - "loss": 0.1145, + "epoch": 0.7604446926239911, + "grad_norm": 0.3290693759918213, + "learning_rate": 1.493036871584006e-05, + "loss": 0.0818, "step": 14980 }, { - "epoch": 0.38037822058636883, - "grad_norm": 0.6039773225784302, - "learning_rate": 1.7464145196090876e-05, - "loss": 0.0962, + "epoch": 0.7606985126148536, + "grad_norm": 0.43824639916419983, + "learning_rate": 1.4928676582567646e-05, + "loss": 0.0692, "step": 14985 }, { - "epoch": 0.3805051402462241, - "grad_norm": 0.4605987071990967, - "learning_rate": 1.7463299065025174e-05, - "loss": 0.1159, + "epoch": 0.760952332605716, + "grad_norm": 0.34384244680404663, + "learning_rate": 1.4926984449295228e-05, + "loss": 0.0865, "step": 14990 }, { - "epoch": 0.38063205990607946, - "grad_norm": 0.46527916193008423, - "learning_rate": 1.7462452933959473e-05, - "loss": 0.0939, + "epoch": 0.7612061525965785, + "grad_norm": 0.45859983563423157, + "learning_rate": 1.492529231602281e-05, + "loss": 0.0699, "step": 14995 }, { - "epoch": 0.38075897956593474, - "grad_norm": 0.49721723794937134, - "learning_rate": 1.7461606802893768e-05, - "loss": 0.1032, + "epoch": 0.761459972587441, + "grad_norm": 0.4020313024520874, + "learning_rate": 1.4923600182750395e-05, + "loss": 0.079, "step": 15000 }, { - "epoch": 0.3808858992257901, - "grad_norm": 0.3907821476459503, - "learning_rate": 1.7460760671828066e-05, - "loss": 0.116, + "epoch": 0.7617137925783035, + "grad_norm": 0.41197675466537476, + "learning_rate": 1.4921908049477978e-05, + "loss": 0.0748, "step": 15005 }, { - "epoch": 0.38101281888564537, - "grad_norm": 0.6451132297515869, - "learning_rate": 1.7459914540762365e-05, - "loss": 0.1183, + "epoch": 0.7619676125691659, + "grad_norm": 0.5557354092597961, + "learning_rate": 1.4920215916205563e-05, + "loss": 0.084, "step": 15010 }, { - "epoch": 0.3811397385455007, - "grad_norm": 0.4175844192504883, - "learning_rate": 1.7459068409696663e-05, - "loss": 0.1054, + "epoch": 0.7622214325600284, + "grad_norm": 0.39988163113594055, + "learning_rate": 1.4918523782933145e-05, + "loss": 0.0835, "step": 15015 }, { - "epoch": 0.381266658205356, - "grad_norm": 0.7675736546516418, - "learning_rate": 1.745822227863096e-05, - "loss": 0.1062, + "epoch": 0.7624752525508909, + "grad_norm": 0.3414739966392517, + "learning_rate": 1.4916831649660727e-05, + "loss": 0.0888, "step": 15020 }, { - "epoch": 0.38139357786521133, - "grad_norm": 0.6075971722602844, - "learning_rate": 1.745737614756526e-05, - "loss": 0.1077, + "epoch": 0.7627290725417534, + "grad_norm": 0.37848934531211853, + "learning_rate": 1.4915139516388312e-05, + "loss": 0.0784, "step": 15025 }, { - "epoch": 0.3815204975250666, - "grad_norm": 0.7210889458656311, - "learning_rate": 1.7456530016499558e-05, - "loss": 0.1137, + "epoch": 0.7629828925326159, + "grad_norm": 0.4667239189147949, + "learning_rate": 1.4913447383115896e-05, + "loss": 0.08, "step": 15030 }, { - "epoch": 0.38164741718492196, - "grad_norm": 0.4232451915740967, - "learning_rate": 1.7455683885433857e-05, - "loss": 0.0913, + "epoch": 0.7632367125234784, + "grad_norm": 0.42531412839889526, + "learning_rate": 1.4911755249843478e-05, + "loss": 0.0815, "step": 15035 }, { - "epoch": 0.38177433684477724, - "grad_norm": 0.5960383415222168, - "learning_rate": 1.745483775436815e-05, - "loss": 0.1074, + "epoch": 0.7634905325143408, + "grad_norm": 0.4677846133708954, + "learning_rate": 1.4910063116571063e-05, + "loss": 0.0779, "step": 15040 }, { - "epoch": 0.3819012565046326, - "grad_norm": 0.7938986420631409, - "learning_rate": 1.745399162330245e-05, - "loss": 0.108, + "epoch": 0.7637443525052033, + "grad_norm": 0.3771125078201294, + "learning_rate": 1.4908370983298645e-05, + "loss": 0.0764, "step": 15045 }, { - "epoch": 0.38202817616448786, - "grad_norm": 0.6558904051780701, - "learning_rate": 1.745314549223675e-05, - "loss": 0.1115, + "epoch": 0.7639981724960658, + "grad_norm": 0.4332607388496399, + "learning_rate": 1.4906678850026228e-05, + "loss": 0.0751, "step": 15050 }, { - "epoch": 0.3821550958243432, - "grad_norm": 0.8321184515953064, - "learning_rate": 1.7452299361171047e-05, - "loss": 0.1004, + "epoch": 0.7642519924869283, + "grad_norm": 0.47496718168258667, + "learning_rate": 1.4904986716753814e-05, + "loss": 0.0791, "step": 15055 }, { - "epoch": 0.3822820154841985, - "grad_norm": 0.6189089417457581, - "learning_rate": 1.7451453230105345e-05, - "loss": 0.1094, + "epoch": 0.7645058124777907, + "grad_norm": 0.3843788504600525, + "learning_rate": 1.4903294583481396e-05, + "loss": 0.0725, "step": 15060 }, { - "epoch": 0.38240893514405383, - "grad_norm": 0.8526270389556885, - "learning_rate": 1.7450607099039644e-05, - "loss": 0.1213, + "epoch": 0.7647596324686532, + "grad_norm": 0.44778764247894287, + "learning_rate": 1.490160245020898e-05, + "loss": 0.0842, "step": 15065 }, { - "epoch": 0.3825358548039091, - "grad_norm": 0.39124730229377747, - "learning_rate": 1.7449760967973942e-05, - "loss": 0.1015, + "epoch": 0.7650134524595157, + "grad_norm": 0.4595566391944885, + "learning_rate": 1.4899910316936563e-05, + "loss": 0.086, "step": 15070 }, { - "epoch": 0.38266277446376445, - "grad_norm": 0.7200321555137634, - "learning_rate": 1.744891483690824e-05, - "loss": 0.1146, + "epoch": 0.7652672724503782, + "grad_norm": 0.4201864004135132, + "learning_rate": 1.4898218183664146e-05, + "loss": 0.0818, "step": 15075 }, { - "epoch": 0.38278969412361974, - "grad_norm": 0.6593925356864929, - "learning_rate": 1.7448068705842535e-05, - "loss": 0.1243, + "epoch": 0.7655210924412407, + "grad_norm": 0.35480204224586487, + "learning_rate": 1.4896526050391731e-05, + "loss": 0.0768, "step": 15080 }, { - "epoch": 0.3829166137834751, - "grad_norm": 0.8487423062324524, - "learning_rate": 1.7447222574776834e-05, - "loss": 0.1113, + "epoch": 0.7657749124321032, + "grad_norm": 0.34363895654678345, + "learning_rate": 1.4894833917119313e-05, + "loss": 0.0735, "step": 15085 }, { - "epoch": 0.38304353344333036, - "grad_norm": 1.0360409021377563, - "learning_rate": 1.7446376443711132e-05, - "loss": 0.0933, + "epoch": 0.7660287324229657, + "grad_norm": 0.39018622040748596, + "learning_rate": 1.4893141783846897e-05, + "loss": 0.0808, "step": 15090 }, { - "epoch": 0.3831704531031857, - "grad_norm": 0.693565845489502, - "learning_rate": 1.744553031264543e-05, - "loss": 0.1136, + "epoch": 0.7662825524138281, + "grad_norm": 0.40973344445228577, + "learning_rate": 1.489144965057448e-05, + "loss": 0.0845, "step": 15095 }, { - "epoch": 0.383297372763041, - "grad_norm": 0.5898317694664001, - "learning_rate": 1.744468418157973e-05, - "loss": 0.1247, + "epoch": 0.7665363724046906, + "grad_norm": 1.6443856954574585, + "learning_rate": 1.4889757517302064e-05, + "loss": 0.0759, "step": 15100 }, { - "epoch": 0.38342429242289633, - "grad_norm": 0.7770888209342957, - "learning_rate": 1.7443838050514027e-05, - "loss": 0.1046, + "epoch": 0.7667901923955531, + "grad_norm": 0.5674831867218018, + "learning_rate": 1.4888065384029646e-05, + "loss": 0.0916, "step": 15105 }, { - "epoch": 0.3835512120827516, - "grad_norm": 0.5730153322219849, - "learning_rate": 1.7442991919448326e-05, - "loss": 0.0912, + "epoch": 0.7670440123864155, + "grad_norm": 0.6491461992263794, + "learning_rate": 1.4886373250757231e-05, + "loss": 0.0727, "step": 15110 }, { - "epoch": 0.38367813174260695, - "grad_norm": 0.8082908987998962, - "learning_rate": 1.7442145788382624e-05, - "loss": 0.0866, + "epoch": 0.767297832377278, + "grad_norm": 0.43828409910202026, + "learning_rate": 1.4884681117484815e-05, + "loss": 0.069, "step": 15115 }, { - "epoch": 0.38380505140246224, - "grad_norm": 0.6667442321777344, - "learning_rate": 1.744129965731692e-05, - "loss": 0.1152, + "epoch": 0.7675516523681405, + "grad_norm": 0.3990132212638855, + "learning_rate": 1.4882988984212398e-05, + "loss": 0.0752, "step": 15120 }, { - "epoch": 0.3839319710623176, - "grad_norm": 0.5781955718994141, - "learning_rate": 1.7440453526251218e-05, - "loss": 0.1195, + "epoch": 0.767805472359003, + "grad_norm": 0.6415324211120605, + "learning_rate": 1.4881296850939982e-05, + "loss": 0.0887, "step": 15125 }, { - "epoch": 0.38405889072217286, - "grad_norm": 0.971501886844635, - "learning_rate": 1.7439607395185516e-05, - "loss": 0.1103, + "epoch": 0.7680592923498655, + "grad_norm": 0.415190190076828, + "learning_rate": 1.4879604717667563e-05, + "loss": 0.0854, "step": 15130 }, { - "epoch": 0.3841858103820282, - "grad_norm": 0.7447302937507629, - "learning_rate": 1.7438761264119814e-05, - "loss": 0.1084, + "epoch": 0.768313112340728, + "grad_norm": 0.43115556240081787, + "learning_rate": 1.4877912584395149e-05, + "loss": 0.0841, "step": 15135 }, { - "epoch": 0.3843127300418835, - "grad_norm": 0.6369324326515198, - "learning_rate": 1.743791513305411e-05, - "loss": 0.1116, + "epoch": 0.7685669323315905, + "grad_norm": 0.4042651653289795, + "learning_rate": 1.4876220451122732e-05, + "loss": 0.0774, "step": 15140 }, { - "epoch": 0.3844396497017388, - "grad_norm": 0.7597374320030212, - "learning_rate": 1.7437069001988408e-05, - "loss": 0.1129, + "epoch": 0.768820752322453, + "grad_norm": 0.4800015985965729, + "learning_rate": 1.4874528317850314e-05, + "loss": 0.0781, "step": 15145 }, { - "epoch": 0.3845665693615941, - "grad_norm": 1.0419002771377563, - "learning_rate": 1.7436222870922706e-05, - "loss": 0.1073, + "epoch": 0.7690745723133154, + "grad_norm": 0.40148481726646423, + "learning_rate": 1.48728361845779e-05, + "loss": 0.0808, "step": 15150 }, { - "epoch": 0.3846934890214494, - "grad_norm": 0.6918029189109802, - "learning_rate": 1.7435376739857005e-05, - "loss": 0.1039, + "epoch": 0.7693283923041778, + "grad_norm": 0.44380658864974976, + "learning_rate": 1.4871144051305481e-05, + "loss": 0.0862, "step": 15155 }, { - "epoch": 0.38482040868130474, - "grad_norm": 1.437781572341919, - "learning_rate": 1.7434530608791303e-05, - "loss": 0.1294, + "epoch": 0.7695822122950403, + "grad_norm": 0.41752004623413086, + "learning_rate": 1.4869451918033066e-05, + "loss": 0.0728, "step": 15160 }, { - "epoch": 0.38494732834116, - "grad_norm": 0.7399141788482666, - "learning_rate": 1.74336844777256e-05, - "loss": 0.1289, + "epoch": 0.7698360322859028, + "grad_norm": 0.3876765966415405, + "learning_rate": 1.486775978476065e-05, + "loss": 0.0759, "step": 15165 }, { - "epoch": 0.38507424800101536, - "grad_norm": 0.5292037129402161, - "learning_rate": 1.74328383466599e-05, - "loss": 0.1239, + "epoch": 0.7700898522767653, + "grad_norm": 0.3961372971534729, + "learning_rate": 1.4866067651488232e-05, + "loss": 0.0676, "step": 15170 }, { - "epoch": 0.38520116766087065, - "grad_norm": 0.6041805744171143, - "learning_rate": 1.7431992215594198e-05, - "loss": 0.1041, + "epoch": 0.7703436722676278, + "grad_norm": 0.5213141441345215, + "learning_rate": 1.4864375518215817e-05, + "loss": 0.085, "step": 15175 }, { - "epoch": 0.385328087320726, - "grad_norm": 0.7142574787139893, - "learning_rate": 1.7431146084528493e-05, - "loss": 0.1058, + "epoch": 0.7705974922584903, + "grad_norm": 0.4002414643764496, + "learning_rate": 1.4862683384943399e-05, + "loss": 0.0855, "step": 15180 }, { - "epoch": 0.38545500698058127, - "grad_norm": 1.5874689817428589, - "learning_rate": 1.743029995346279e-05, - "loss": 0.1322, + "epoch": 0.7708513122493528, + "grad_norm": 0.26624107360839844, + "learning_rate": 1.4860991251670982e-05, + "loss": 0.0856, "step": 15185 }, { - "epoch": 0.3855819266404366, - "grad_norm": 0.43086597323417664, - "learning_rate": 1.742945382239709e-05, - "loss": 0.0917, + "epoch": 0.7711051322402153, + "grad_norm": 0.4855886697769165, + "learning_rate": 1.4859299118398568e-05, + "loss": 0.0772, "step": 15190 }, { - "epoch": 0.3857088463002919, - "grad_norm": 0.3930107355117798, - "learning_rate": 1.742860769133139e-05, - "loss": 0.0936, + "epoch": 0.7713589522310778, + "grad_norm": 0.5074960589408875, + "learning_rate": 1.485760698512615e-05, + "loss": 0.0737, "step": 15195 }, { - "epoch": 0.38583576596014724, - "grad_norm": 0.47973254323005676, - "learning_rate": 1.7427761560265687e-05, - "loss": 0.1015, + "epoch": 0.7716127722219402, + "grad_norm": 0.5205410122871399, + "learning_rate": 1.4855914851853731e-05, + "loss": 0.077, "step": 15200 }, { - "epoch": 0.3859626856200025, - "grad_norm": 0.5118927955627441, - "learning_rate": 1.7426915429199985e-05, - "loss": 0.0894, + "epoch": 0.7718665922128026, + "grad_norm": 0.4280433654785156, + "learning_rate": 1.4854222718581317e-05, + "loss": 0.0697, "step": 15205 }, { - "epoch": 0.38608960527985786, - "grad_norm": 0.7432391047477722, - "learning_rate": 1.7426069298134284e-05, - "loss": 0.0901, + "epoch": 0.7721204122036651, + "grad_norm": 0.3168952465057373, + "learning_rate": 1.48525305853089e-05, + "loss": 0.0795, "step": 15210 }, { - "epoch": 0.38621652493971315, - "grad_norm": 0.6850168704986572, - "learning_rate": 1.7425223167068582e-05, - "loss": 0.0898, + "epoch": 0.7723742321945276, + "grad_norm": 0.39712512493133545, + "learning_rate": 1.4850838452036484e-05, + "loss": 0.0829, "step": 15215 }, { - "epoch": 0.3863434445995685, - "grad_norm": 0.5362807512283325, - "learning_rate": 1.7424377036002877e-05, - "loss": 0.1251, + "epoch": 0.7726280521853901, + "grad_norm": 0.5094771385192871, + "learning_rate": 1.4849146318764067e-05, + "loss": 0.0805, "step": 15220 }, { - "epoch": 0.38647036425942377, - "grad_norm": 0.6501933932304382, - "learning_rate": 1.7423530904937175e-05, - "loss": 0.0938, + "epoch": 0.7728818721762526, + "grad_norm": 0.2554795444011688, + "learning_rate": 1.4847454185491649e-05, + "loss": 0.0655, "step": 15225 }, { - "epoch": 0.3865972839192791, - "grad_norm": 1.0756022930145264, - "learning_rate": 1.7422684773871474e-05, - "loss": 0.1365, + "epoch": 0.7731356921671151, + "grad_norm": 0.46590444445610046, + "learning_rate": 1.4845762052219234e-05, + "loss": 0.0816, "step": 15230 }, { - "epoch": 0.3867242035791344, - "grad_norm": 0.7274271249771118, - "learning_rate": 1.7421838642805772e-05, - "loss": 0.0995, + "epoch": 0.7733895121579776, + "grad_norm": 0.5981921553611755, + "learning_rate": 1.4844069918946818e-05, + "loss": 0.0721, "step": 15235 }, { - "epoch": 0.38685112323898974, - "grad_norm": 0.6637914180755615, - "learning_rate": 1.742099251174007e-05, - "loss": 0.0966, + "epoch": 0.7736433321488401, + "grad_norm": 0.35320785641670227, + "learning_rate": 1.48423777856744e-05, + "loss": 0.0768, "step": 15240 }, { - "epoch": 0.386978042898845, - "grad_norm": 0.5779120922088623, - "learning_rate": 1.742014638067437e-05, - "loss": 0.0904, + "epoch": 0.7738971521397026, + "grad_norm": 0.38590070605278015, + "learning_rate": 1.4840685652401985e-05, + "loss": 0.0823, "step": 15245 }, { - "epoch": 0.38710496255870036, - "grad_norm": 0.5498947501182556, - "learning_rate": 1.7419300249608667e-05, - "loss": 0.1028, + "epoch": 0.774150972130565, + "grad_norm": 0.4039193093776703, + "learning_rate": 1.4838993519129567e-05, + "loss": 0.0674, "step": 15250 }, { - "epoch": 0.38723188221855565, - "grad_norm": 0.6668274998664856, - "learning_rate": 1.7418454118542966e-05, - "loss": 0.1221, + "epoch": 0.7744047921214274, + "grad_norm": 0.423473060131073, + "learning_rate": 1.4837301385857152e-05, + "loss": 0.0734, "step": 15255 }, { - "epoch": 0.387358801878411, - "grad_norm": 1.184483289718628, - "learning_rate": 1.741760798747726e-05, - "loss": 0.1319, + "epoch": 0.7746586121122899, + "grad_norm": 0.4255438446998596, + "learning_rate": 1.4835609252584736e-05, + "loss": 0.0739, "step": 15260 }, { - "epoch": 0.38748572153826627, - "grad_norm": 0.7703475952148438, - "learning_rate": 1.741676185641156e-05, - "loss": 0.1125, + "epoch": 0.7749124321031524, + "grad_norm": 0.3805790841579437, + "learning_rate": 1.4833917119312317e-05, + "loss": 0.0755, "step": 15265 }, { - "epoch": 0.3876126411981216, - "grad_norm": 0.4857535660266876, - "learning_rate": 1.7415915725345858e-05, - "loss": 0.1036, + "epoch": 0.7751662520940149, + "grad_norm": 0.4251479506492615, + "learning_rate": 1.4832224986039903e-05, + "loss": 0.0817, "step": 15270 }, { - "epoch": 0.3877395608579769, - "grad_norm": 0.8808870315551758, - "learning_rate": 1.7415069594280156e-05, - "loss": 0.134, + "epoch": 0.7754200720848774, + "grad_norm": 0.6027906537055969, + "learning_rate": 1.4830532852767485e-05, + "loss": 0.0794, "step": 15275 }, { - "epoch": 0.38786648051783224, - "grad_norm": 0.47179922461509705, - "learning_rate": 1.741422346321445e-05, - "loss": 0.1106, + "epoch": 0.7756738920757399, + "grad_norm": 0.5641241669654846, + "learning_rate": 1.4828840719495068e-05, + "loss": 0.0707, "step": 15280 }, { - "epoch": 0.3879934001776875, - "grad_norm": 0.535582959651947, - "learning_rate": 1.741337733214875e-05, - "loss": 0.0848, + "epoch": 0.7759277120666024, + "grad_norm": 0.3357883393764496, + "learning_rate": 1.4827148586222653e-05, + "loss": 0.0798, "step": 15285 }, { - "epoch": 0.38812031983754286, - "grad_norm": 1.051949143409729, - "learning_rate": 1.7412531201083048e-05, - "loss": 0.1173, + "epoch": 0.7761815320574649, + "grad_norm": 0.6147197484970093, + "learning_rate": 1.4825456452950235e-05, + "loss": 0.0761, "step": 15290 }, { - "epoch": 0.38824723949739814, - "grad_norm": 1.0692683458328247, - "learning_rate": 1.7411685070017346e-05, - "loss": 0.1123, + "epoch": 0.7764353520483274, + "grad_norm": 0.30087563395500183, + "learning_rate": 1.4823764319677819e-05, + "loss": 0.0663, "step": 15295 }, { - "epoch": 0.3883741591572535, - "grad_norm": 0.6390770673751831, - "learning_rate": 1.7410838938951645e-05, - "loss": 0.1385, + "epoch": 0.7766891720391899, + "grad_norm": 0.3757729232311249, + "learning_rate": 1.4822072186405402e-05, + "loss": 0.0746, "step": 15300 }, { - "epoch": 0.38850107881710877, - "grad_norm": 0.8003516793251038, - "learning_rate": 1.7409992807885943e-05, - "loss": 0.1071, + "epoch": 0.7769429920300522, + "grad_norm": 0.38213640451431274, + "learning_rate": 1.4820380053132986e-05, + "loss": 0.0839, "step": 15305 }, { - "epoch": 0.3886279984769641, - "grad_norm": 0.6583903431892395, - "learning_rate": 1.740914667682024e-05, - "loss": 0.0953, + "epoch": 0.7771968120209147, + "grad_norm": 0.6010876297950745, + "learning_rate": 1.4818687919860571e-05, + "loss": 0.0756, "step": 15310 }, { - "epoch": 0.3887549181368194, - "grad_norm": 0.6282956004142761, - "learning_rate": 1.740830054575454e-05, - "loss": 0.0961, + "epoch": 0.7774506320117772, + "grad_norm": 0.39273327589035034, + "learning_rate": 1.4816995786588153e-05, + "loss": 0.0731, "step": 15315 }, { - "epoch": 0.3888818377966747, - "grad_norm": 1.3733015060424805, - "learning_rate": 1.7407454414688835e-05, - "loss": 0.1291, + "epoch": 0.7777044520026397, + "grad_norm": 0.5562280416488647, + "learning_rate": 1.4815303653315736e-05, + "loss": 0.0676, "step": 15320 }, { - "epoch": 0.38900875745653, - "grad_norm": 0.5793765187263489, - "learning_rate": 1.7406608283623133e-05, - "loss": 0.1236, + "epoch": 0.7779582719935022, + "grad_norm": 0.37354519963264465, + "learning_rate": 1.481361152004332e-05, + "loss": 0.0792, "step": 15325 }, { - "epoch": 0.3891356771163853, - "grad_norm": 0.5588862299919128, - "learning_rate": 1.7405762152557432e-05, - "loss": 0.118, + "epoch": 0.7782120919843647, + "grad_norm": 0.4271200895309448, + "learning_rate": 1.4811919386770904e-05, + "loss": 0.0678, "step": 15330 }, { - "epoch": 0.38926259677624064, - "grad_norm": 0.5452609062194824, - "learning_rate": 1.740491602149173e-05, - "loss": 0.1161, + "epoch": 0.7784659119752272, + "grad_norm": 0.4866403043270111, + "learning_rate": 1.4810227253498485e-05, + "loss": 0.0778, "step": 15335 }, { - "epoch": 0.38938951643609593, - "grad_norm": 1.0399929285049438, - "learning_rate": 1.740406989042603e-05, - "loss": 0.1191, + "epoch": 0.7787197319660897, + "grad_norm": 0.561621367931366, + "learning_rate": 1.480853512022607e-05, + "loss": 0.0753, "step": 15340 }, { - "epoch": 0.38951643609595127, - "grad_norm": 0.8371421694755554, - "learning_rate": 1.7403223759360327e-05, - "loss": 0.109, + "epoch": 0.7789735519569522, + "grad_norm": 0.38731345534324646, + "learning_rate": 1.4806842986953654e-05, + "loss": 0.0828, "step": 15345 }, { - "epoch": 0.38964335575580655, - "grad_norm": 0.43359047174453735, - "learning_rate": 1.7402377628294625e-05, - "loss": 0.0979, + "epoch": 0.7792273719478147, + "grad_norm": 0.3867878019809723, + "learning_rate": 1.4805150853681236e-05, + "loss": 0.0858, "step": 15350 }, { - "epoch": 0.3897702754156619, - "grad_norm": 0.4990333914756775, - "learning_rate": 1.7401531497228924e-05, - "loss": 0.1039, + "epoch": 0.779481191938677, + "grad_norm": 0.43254461884498596, + "learning_rate": 1.4803458720408821e-05, + "loss": 0.0646, "step": 15355 }, { - "epoch": 0.3898971950755172, - "grad_norm": 1.2112133502960205, - "learning_rate": 1.740068536616322e-05, - "loss": 0.1069, + "epoch": 0.7797350119295395, + "grad_norm": 0.35966265201568604, + "learning_rate": 1.4801766587136403e-05, + "loss": 0.0726, "step": 15360 }, { - "epoch": 0.3900241147353725, - "grad_norm": 0.5645557641983032, - "learning_rate": 1.7399839235097517e-05, - "loss": 0.1271, + "epoch": 0.779988831920402, + "grad_norm": 0.36605480313301086, + "learning_rate": 1.4800074453863988e-05, + "loss": 0.0721, "step": 15365 }, { - "epoch": 0.3901510343952278, - "grad_norm": 0.6957231163978577, - "learning_rate": 1.7398993104031816e-05, - "loss": 0.1148, + "epoch": 0.7802426519112645, + "grad_norm": 0.37214845418930054, + "learning_rate": 1.4798382320591572e-05, + "loss": 0.0836, "step": 15370 }, { - "epoch": 0.39027795405508314, - "grad_norm": 0.5913406610488892, - "learning_rate": 1.7398146972966114e-05, - "loss": 0.1025, + "epoch": 0.780496471902127, + "grad_norm": 0.43033382296562195, + "learning_rate": 1.4796690187319154e-05, + "loss": 0.0707, "step": 15375 }, { - "epoch": 0.3904048737149384, - "grad_norm": 0.8798678517341614, - "learning_rate": 1.7397300841900412e-05, - "loss": 0.1484, + "epoch": 0.7807502918929895, + "grad_norm": 0.41933897137641907, + "learning_rate": 1.4794998054046739e-05, + "loss": 0.074, "step": 15380 }, { - "epoch": 0.39053179337479377, - "grad_norm": 0.6314868927001953, - "learning_rate": 1.739645471083471e-05, - "loss": 0.1098, + "epoch": 0.781004111883852, + "grad_norm": 0.4193064570426941, + "learning_rate": 1.4793305920774321e-05, + "loss": 0.0732, "step": 15385 }, { - "epoch": 0.39065871303464905, - "grad_norm": 0.5135464072227478, - "learning_rate": 1.739560857976901e-05, - "loss": 0.0972, + "epoch": 0.7812579318747145, + "grad_norm": 0.35709092020988464, + "learning_rate": 1.4791613787501904e-05, + "loss": 0.0831, "step": 15390 }, { - "epoch": 0.3907856326945044, - "grad_norm": 0.8555848002433777, - "learning_rate": 1.7394762448703308e-05, - "loss": 0.1105, + "epoch": 0.781511751865577, + "grad_norm": 0.38249507546424866, + "learning_rate": 1.4789921654229488e-05, + "loss": 0.0753, "step": 15395 }, { - "epoch": 0.3909125523543597, - "grad_norm": 0.5874119400978088, - "learning_rate": 1.7393916317637603e-05, - "loss": 0.1223, + "epoch": 0.7817655718564395, + "grad_norm": 0.6960234045982361, + "learning_rate": 1.4788229520957071e-05, + "loss": 0.0752, "step": 15400 }, { - "epoch": 0.391039472014215, - "grad_norm": 0.5359694957733154, - "learning_rate": 1.73930701865719e-05, - "loss": 0.1139, + "epoch": 0.7820193918473018, + "grad_norm": 0.4672459065914154, + "learning_rate": 1.4786537387684657e-05, + "loss": 0.0851, "step": 15405 }, { - "epoch": 0.3911663916740703, - "grad_norm": 1.1434980630874634, - "learning_rate": 1.73922240555062e-05, - "loss": 0.1113, + "epoch": 0.7822732118381643, + "grad_norm": 0.3816010057926178, + "learning_rate": 1.4784845254412239e-05, + "loss": 0.0764, "step": 15410 }, { - "epoch": 0.39129331133392564, - "grad_norm": 0.5178923606872559, - "learning_rate": 1.7391377924440498e-05, - "loss": 0.1392, + "epoch": 0.7825270318290268, + "grad_norm": 0.7014495730400085, + "learning_rate": 1.4783153121139822e-05, + "loss": 0.0694, "step": 15415 }, { - "epoch": 0.3914202309937809, - "grad_norm": 0.5649380683898926, - "learning_rate": 1.7390531793374793e-05, - "loss": 0.1153, + "epoch": 0.7827808518198893, + "grad_norm": 0.327251136302948, + "learning_rate": 1.4781460987867406e-05, + "loss": 0.0716, "step": 15420 }, { - "epoch": 0.39154715065363627, - "grad_norm": 0.4204961657524109, - "learning_rate": 1.738968566230909e-05, - "loss": 0.1198, + "epoch": 0.7830346718107518, + "grad_norm": 0.5149769186973572, + "learning_rate": 1.477976885459499e-05, + "loss": 0.0734, "step": 15425 }, { - "epoch": 0.39167407031349155, - "grad_norm": 0.720840573310852, - "learning_rate": 1.738883953124339e-05, - "loss": 0.1104, + "epoch": 0.7832884918016143, + "grad_norm": 0.40763530135154724, + "learning_rate": 1.4778076721322571e-05, + "loss": 0.0812, "step": 15430 }, { - "epoch": 0.3918009899733469, - "grad_norm": 1.3657587766647339, - "learning_rate": 1.7387993400177688e-05, - "loss": 0.0883, + "epoch": 0.7835423117924768, + "grad_norm": 0.32969698309898376, + "learning_rate": 1.4776384588050156e-05, + "loss": 0.0799, "step": 15435 }, { - "epoch": 0.3919279096332022, - "grad_norm": 0.7248510122299194, - "learning_rate": 1.7387147269111986e-05, - "loss": 0.0924, + "epoch": 0.7837961317833393, + "grad_norm": 0.3335864245891571, + "learning_rate": 1.477469245477774e-05, + "loss": 0.0785, "step": 15440 }, { - "epoch": 0.3920548292930575, - "grad_norm": 0.9906402826309204, - "learning_rate": 1.7386301138046285e-05, - "loss": 0.1041, + "epoch": 0.7840499517742018, + "grad_norm": 0.48730531334877014, + "learning_rate": 1.4773000321505322e-05, + "loss": 0.0849, "step": 15445 }, { - "epoch": 0.3921817489529128, - "grad_norm": 0.6926352381706238, - "learning_rate": 1.7385455006980583e-05, - "loss": 0.1339, + "epoch": 0.7843037717650642, + "grad_norm": 0.7397474646568298, + "learning_rate": 1.4771308188232907e-05, + "loss": 0.0778, "step": 15450 }, { - "epoch": 0.39230866861276814, - "grad_norm": 0.5478496551513672, - "learning_rate": 1.738460887591488e-05, - "loss": 0.1248, + "epoch": 0.7845575917559267, + "grad_norm": 0.39907628297805786, + "learning_rate": 1.4769616054960489e-05, + "loss": 0.0729, "step": 15455 }, { - "epoch": 0.3924355882726234, - "grad_norm": 0.8500484228134155, - "learning_rate": 1.7383762744849177e-05, - "loss": 0.1049, + "epoch": 0.7848114117467891, + "grad_norm": 0.47134730219841003, + "learning_rate": 1.4767923921688074e-05, + "loss": 0.0771, "step": 15460 }, { - "epoch": 0.39256250793247877, - "grad_norm": 0.5460205078125, - "learning_rate": 1.7382916613783475e-05, - "loss": 0.1159, + "epoch": 0.7850652317376516, + "grad_norm": 0.41393592953681946, + "learning_rate": 1.4766231788415658e-05, + "loss": 0.0772, "step": 15465 }, { - "epoch": 0.39268942759233405, - "grad_norm": 0.9492348432540894, - "learning_rate": 1.7382070482717773e-05, - "loss": 0.1085, + "epoch": 0.7853190517285141, + "grad_norm": 0.3614552319049835, + "learning_rate": 1.476453965514324e-05, + "loss": 0.0718, "step": 15470 }, { - "epoch": 0.3928163472521894, - "grad_norm": 1.0264636278152466, - "learning_rate": 1.7381224351652072e-05, - "loss": 0.1027, + "epoch": 0.7855728717193766, + "grad_norm": 0.4395851194858551, + "learning_rate": 1.4762847521870825e-05, + "loss": 0.0825, "step": 15475 }, { - "epoch": 0.3929432669120447, - "grad_norm": 0.7041149735450745, - "learning_rate": 1.738037822058637e-05, - "loss": 0.1233, + "epoch": 0.7858266917102391, + "grad_norm": 0.3465043902397156, + "learning_rate": 1.4761155388598407e-05, + "loss": 0.0746, "step": 15480 }, { - "epoch": 0.39307018657189996, - "grad_norm": 0.6414458155632019, - "learning_rate": 1.737953208952067e-05, - "loss": 0.0921, + "epoch": 0.7860805117011016, + "grad_norm": 0.3366214632987976, + "learning_rate": 1.475946325532599e-05, + "loss": 0.071, "step": 15485 }, { - "epoch": 0.3931971062317553, - "grad_norm": 0.7552077174186707, - "learning_rate": 1.7378685958454967e-05, - "loss": 0.1366, + "epoch": 0.7863343316919641, + "grad_norm": 0.6985540986061096, + "learning_rate": 1.4757771122053575e-05, + "loss": 0.0701, "step": 15490 }, { - "epoch": 0.3933240258916106, - "grad_norm": 0.5532273650169373, - "learning_rate": 1.7377839827389265e-05, - "loss": 0.1063, + "epoch": 0.7865881516828266, + "grad_norm": 0.6810505390167236, + "learning_rate": 1.4756078988781157e-05, + "loss": 0.0881, "step": 15495 }, { - "epoch": 0.3934509455514659, - "grad_norm": 0.6273918747901917, - "learning_rate": 1.7376993696323564e-05, - "loss": 0.1105, + "epoch": 0.786841971673689, + "grad_norm": 0.8365420699119568, + "learning_rate": 1.4754386855508742e-05, + "loss": 0.0842, "step": 15500 }, { - "epoch": 0.3935778652113212, - "grad_norm": 0.747977077960968, - "learning_rate": 1.737614756525786e-05, - "loss": 0.1104, + "epoch": 0.7870957916645515, + "grad_norm": 0.4822964370250702, + "learning_rate": 1.4752694722236324e-05, + "loss": 0.07, "step": 15505 }, { - "epoch": 0.39370478487117655, - "grad_norm": 0.35354912281036377, - "learning_rate": 1.7375301434192157e-05, - "loss": 0.1035, + "epoch": 0.787349611655414, + "grad_norm": 0.3959048390388489, + "learning_rate": 1.4751002588963908e-05, + "loss": 0.078, "step": 15510 }, { - "epoch": 0.39383170453103183, - "grad_norm": 0.5893090963363647, - "learning_rate": 1.7374455303126456e-05, - "loss": 0.0893, + "epoch": 0.7876034316462764, + "grad_norm": 0.32304689288139343, + "learning_rate": 1.4749310455691493e-05, + "loss": 0.0814, "step": 15515 }, { - "epoch": 0.3939586241908872, - "grad_norm": 0.6597870588302612, - "learning_rate": 1.7373609172060754e-05, - "loss": 0.1039, + "epoch": 0.7878572516371389, + "grad_norm": 0.39487403631210327, + "learning_rate": 1.4747618322419075e-05, + "loss": 0.0779, "step": 15520 }, { - "epoch": 0.39408554385074246, - "grad_norm": 0.5120177865028381, - "learning_rate": 1.7372763040995052e-05, - "loss": 0.1286, + "epoch": 0.7881110716280014, + "grad_norm": 0.48532378673553467, + "learning_rate": 1.4745926189146658e-05, + "loss": 0.077, "step": 15525 }, { - "epoch": 0.3942124635105978, - "grad_norm": 0.7366992831230164, - "learning_rate": 1.737191690992935e-05, - "loss": 0.1233, + "epoch": 0.7883648916188639, + "grad_norm": 0.38463422656059265, + "learning_rate": 1.4744234055874242e-05, + "loss": 0.0725, "step": 15530 }, { - "epoch": 0.3943393831704531, - "grad_norm": 0.6016814112663269, - "learning_rate": 1.737107077886365e-05, - "loss": 0.109, + "epoch": 0.7886187116097264, + "grad_norm": 0.5803540945053101, + "learning_rate": 1.4742541922601826e-05, + "loss": 0.0746, "step": 15535 }, { - "epoch": 0.3944663028303084, - "grad_norm": 0.5142179727554321, - "learning_rate": 1.7370224647797948e-05, - "loss": 0.1118, + "epoch": 0.7888725316005889, + "grad_norm": 0.37139931321144104, + "learning_rate": 1.4740849789329407e-05, + "loss": 0.0752, "step": 15540 }, { - "epoch": 0.3945932224901637, - "grad_norm": 0.6006062030792236, - "learning_rate": 1.7369378516732243e-05, - "loss": 0.1021, + "epoch": 0.7891263515914514, + "grad_norm": 0.39508089423179626, + "learning_rate": 1.4739157656056993e-05, + "loss": 0.0804, "step": 15545 }, { - "epoch": 0.39472014215001905, - "grad_norm": 0.729427695274353, - "learning_rate": 1.736853238566654e-05, - "loss": 0.0931, + "epoch": 0.7893801715823138, + "grad_norm": 0.4758867919445038, + "learning_rate": 1.4737465522784576e-05, + "loss": 0.0706, "step": 15550 }, { - "epoch": 0.39484706180987433, - "grad_norm": 0.5234845280647278, - "learning_rate": 1.736768625460084e-05, - "loss": 0.1118, + "epoch": 0.7896339915731763, + "grad_norm": 0.3260742425918579, + "learning_rate": 1.473577338951216e-05, + "loss": 0.0762, "step": 15555 }, { - "epoch": 0.3949739814697297, - "grad_norm": 0.6482070684432983, - "learning_rate": 1.7366840123535138e-05, - "loss": 0.0952, + "epoch": 0.7898878115640388, + "grad_norm": 0.37266799807548523, + "learning_rate": 1.4734081256239743e-05, + "loss": 0.0875, "step": 15560 }, { - "epoch": 0.39510090112958496, - "grad_norm": 0.5253493785858154, - "learning_rate": 1.7365993992469433e-05, - "loss": 0.1038, + "epoch": 0.7901416315549012, + "grad_norm": 0.28276917338371277, + "learning_rate": 1.4732389122967325e-05, + "loss": 0.0633, "step": 15565 }, { - "epoch": 0.3952278207894403, - "grad_norm": 0.441997766494751, - "learning_rate": 1.736514786140373e-05, - "loss": 0.09, + "epoch": 0.7903954515457637, + "grad_norm": 0.4956200122833252, + "learning_rate": 1.473069698969491e-05, + "loss": 0.0704, "step": 15570 }, { - "epoch": 0.3953547404492956, - "grad_norm": 0.9004039764404297, - "learning_rate": 1.736430173033803e-05, - "loss": 0.0997, + "epoch": 0.7906492715366262, + "grad_norm": 0.5198813676834106, + "learning_rate": 1.4729004856422492e-05, + "loss": 0.0722, "step": 15575 }, { - "epoch": 0.3954816601091509, - "grad_norm": 0.5025791525840759, - "learning_rate": 1.7363455599272328e-05, - "loss": 0.1284, + "epoch": 0.7909030915274887, + "grad_norm": 0.40935298800468445, + "learning_rate": 1.4727312723150076e-05, + "loss": 0.0756, "step": 15580 }, { - "epoch": 0.3956085797690062, - "grad_norm": 0.55512934923172, - "learning_rate": 1.7362609468206627e-05, - "loss": 0.116, + "epoch": 0.7911569115183512, + "grad_norm": 0.3992593586444855, + "learning_rate": 1.4725620589877661e-05, + "loss": 0.0722, "step": 15585 }, { - "epoch": 0.39573549942886155, - "grad_norm": 0.4264630973339081, - "learning_rate": 1.7361763337140925e-05, - "loss": 0.1258, + "epoch": 0.7914107315092137, + "grad_norm": 0.3877473473548889, + "learning_rate": 1.4723928456605243e-05, + "loss": 0.0882, "step": 15590 }, { - "epoch": 0.39586241908871683, - "grad_norm": 0.8901561498641968, - "learning_rate": 1.7360917206075223e-05, - "loss": 0.0896, + "epoch": 0.7916645515000762, + "grad_norm": 0.3663567900657654, + "learning_rate": 1.4722236323332826e-05, + "loss": 0.0718, "step": 15595 }, { - "epoch": 0.3959893387485722, - "grad_norm": 0.945244312286377, - "learning_rate": 1.7360071075009522e-05, - "loss": 0.1255, + "epoch": 0.7919183714909386, + "grad_norm": 0.37373846769332886, + "learning_rate": 1.472054419006041e-05, + "loss": 0.0821, "step": 15600 }, { - "epoch": 0.39611625840842746, - "grad_norm": 0.7486288547515869, - "learning_rate": 1.7359224943943817e-05, - "loss": 0.0941, + "epoch": 0.7921721914818011, + "grad_norm": 0.45595207810401917, + "learning_rate": 1.4718852056787993e-05, + "loss": 0.0716, "step": 15605 }, { - "epoch": 0.3962431780682828, - "grad_norm": 0.9137313365936279, - "learning_rate": 1.7358378812878115e-05, - "loss": 0.1161, + "epoch": 0.7924260114726636, + "grad_norm": 0.5352175831794739, + "learning_rate": 1.4717159923515579e-05, + "loss": 0.0715, "step": 15610 }, { - "epoch": 0.3963700977281381, - "grad_norm": 0.7454603314399719, - "learning_rate": 1.7357532681812414e-05, - "loss": 0.1156, + "epoch": 0.792679831463526, + "grad_norm": 0.3262074589729309, + "learning_rate": 1.471546779024316e-05, + "loss": 0.0737, "step": 15615 }, { - "epoch": 0.3964970173879934, - "grad_norm": 0.7671962380409241, - "learning_rate": 1.7356686550746712e-05, - "loss": 0.1129, + "epoch": 0.7929336514543885, + "grad_norm": 0.36793214082717896, + "learning_rate": 1.4713775656970744e-05, + "loss": 0.0679, "step": 15620 }, { - "epoch": 0.3966239370478487, - "grad_norm": 0.5029982328414917, - "learning_rate": 1.735584041968101e-05, - "loss": 0.1062, + "epoch": 0.793187471445251, + "grad_norm": 0.41612109541893005, + "learning_rate": 1.4712083523698328e-05, + "loss": 0.0823, "step": 15625 }, { - "epoch": 0.39675085670770405, - "grad_norm": 0.5092881321907043, - "learning_rate": 1.735499428861531e-05, - "loss": 0.112, + "epoch": 0.7934412914361135, + "grad_norm": 0.40798622369766235, + "learning_rate": 1.4710391390425911e-05, + "loss": 0.0725, "step": 15630 }, { - "epoch": 0.39687777636755933, - "grad_norm": 0.47655346989631653, - "learning_rate": 1.7354148157549607e-05, - "loss": 0.0959, + "epoch": 0.793695111426976, + "grad_norm": 0.5767354369163513, + "learning_rate": 1.4708699257153493e-05, + "loss": 0.0813, "step": 15635 }, { - "epoch": 0.39700469602741467, - "grad_norm": 0.6070145964622498, - "learning_rate": 1.7353302026483906e-05, - "loss": 0.0885, + "epoch": 0.7939489314178385, + "grad_norm": 0.615403413772583, + "learning_rate": 1.4707007123881078e-05, + "loss": 0.0857, "step": 15640 }, { - "epoch": 0.39713161568726996, - "grad_norm": 0.6385906934738159, - "learning_rate": 1.73524558954182e-05, - "loss": 0.1145, + "epoch": 0.794202751408701, + "grad_norm": 0.34347468614578247, + "learning_rate": 1.4705314990608662e-05, + "loss": 0.0702, "step": 15645 }, { - "epoch": 0.3972585353471253, - "grad_norm": 0.7631597518920898, - "learning_rate": 1.73516097643525e-05, - "loss": 0.1197, + "epoch": 0.7944565713995634, + "grad_norm": 0.3589203655719757, + "learning_rate": 1.4703622857336245e-05, + "loss": 0.0724, "step": 15650 }, { - "epoch": 0.3973854550069806, - "grad_norm": 0.5415132641792297, - "learning_rate": 1.7350763633286797e-05, - "loss": 0.0844, + "epoch": 0.7947103913904259, + "grad_norm": 0.7738829851150513, + "learning_rate": 1.4701930724063829e-05, + "loss": 0.0768, "step": 15655 }, { - "epoch": 0.39751237466683587, - "grad_norm": 2.416133165359497, - "learning_rate": 1.7349917502221096e-05, - "loss": 0.1375, + "epoch": 0.7949642113812884, + "grad_norm": 0.3893224000930786, + "learning_rate": 1.470023859079141e-05, + "loss": 0.0675, "step": 15660 }, { - "epoch": 0.3976392943266912, - "grad_norm": 0.8562435507774353, - "learning_rate": 1.7349071371155394e-05, - "loss": 0.1064, + "epoch": 0.7952180313721509, + "grad_norm": 0.43465477228164673, + "learning_rate": 1.4698546457518996e-05, + "loss": 0.0744, "step": 15665 }, { - "epoch": 0.3977662139865465, - "grad_norm": 0.6495535373687744, - "learning_rate": 1.7348225240089693e-05, - "loss": 0.0883, + "epoch": 0.7954718513630133, + "grad_norm": 0.3958378732204437, + "learning_rate": 1.469685432424658e-05, + "loss": 0.0753, "step": 15670 }, { - "epoch": 0.39789313364640183, - "grad_norm": 0.5982926487922668, - "learning_rate": 1.734737910902399e-05, - "loss": 0.1123, + "epoch": 0.7957256713538758, + "grad_norm": 0.5245358347892761, + "learning_rate": 1.4695162190974161e-05, + "loss": 0.0748, "step": 15675 }, { - "epoch": 0.3980200533062571, - "grad_norm": 0.6697134971618652, - "learning_rate": 1.734653297795829e-05, - "loss": 0.0894, + "epoch": 0.7959794913447383, + "grad_norm": 0.3974064290523529, + "learning_rate": 1.4693470057701747e-05, + "loss": 0.0786, "step": 15680 }, { - "epoch": 0.39814697296611246, - "grad_norm": 0.5187225937843323, - "learning_rate": 1.7345686846892584e-05, - "loss": 0.0986, + "epoch": 0.7962333113356008, + "grad_norm": 0.4498586356639862, + "learning_rate": 1.4691777924429328e-05, + "loss": 0.0844, "step": 15685 }, { - "epoch": 0.39827389262596774, - "grad_norm": 0.5694058537483215, - "learning_rate": 1.7344840715826883e-05, - "loss": 0.1002, + "epoch": 0.7964871313264633, + "grad_norm": 0.4059452712535858, + "learning_rate": 1.4690085791156912e-05, + "loss": 0.0637, "step": 15690 }, { - "epoch": 0.3984008122858231, - "grad_norm": 0.5654703378677368, - "learning_rate": 1.734399458476118e-05, - "loss": 0.1076, + "epoch": 0.7967409513173258, + "grad_norm": 0.5134612321853638, + "learning_rate": 1.4688393657884497e-05, + "loss": 0.0767, "step": 15695 }, { - "epoch": 0.39852773194567837, - "grad_norm": 0.43118274211883545, - "learning_rate": 1.734314845369548e-05, - "loss": 0.1098, + "epoch": 0.7969947713081882, + "grad_norm": 0.4138130843639374, + "learning_rate": 1.4686701524612079e-05, + "loss": 0.0845, "step": 15700 }, { - "epoch": 0.3986546516055337, - "grad_norm": 1.1353785991668701, - "learning_rate": 1.7342302322629775e-05, - "loss": 0.079, + "epoch": 0.7972485912990507, + "grad_norm": 0.37701699137687683, + "learning_rate": 1.4685009391339664e-05, + "loss": 0.0743, "step": 15705 }, { - "epoch": 0.398781571265389, - "grad_norm": 0.6757036447525024, - "learning_rate": 1.7341456191564073e-05, - "loss": 0.103, + "epoch": 0.7975024112899132, + "grad_norm": 0.3598290681838989, + "learning_rate": 1.4683317258067246e-05, + "loss": 0.072, "step": 15710 }, { - "epoch": 0.39890849092524433, - "grad_norm": 0.5415642261505127, - "learning_rate": 1.734061006049837e-05, - "loss": 0.1033, + "epoch": 0.7977562312807757, + "grad_norm": 0.548689067363739, + "learning_rate": 1.468162512479483e-05, + "loss": 0.0662, "step": 15715 }, { - "epoch": 0.3990354105850996, - "grad_norm": 0.6002601385116577, - "learning_rate": 1.733976392943267e-05, - "loss": 0.1015, + "epoch": 0.7980100512716382, + "grad_norm": 0.3460603654384613, + "learning_rate": 1.4679932991522415e-05, + "loss": 0.0595, "step": 15720 }, { - "epoch": 0.39916233024495495, - "grad_norm": 0.49966537952423096, - "learning_rate": 1.7338917798366968e-05, - "loss": 0.098, + "epoch": 0.7982638712625006, + "grad_norm": 0.5318562388420105, + "learning_rate": 1.4678240858249997e-05, + "loss": 0.0726, "step": 15725 }, { - "epoch": 0.39928924990481024, - "grad_norm": 0.6708398461341858, - "learning_rate": 1.7338071667301267e-05, - "loss": 0.0995, + "epoch": 0.7985176912533631, + "grad_norm": 1.7499229907989502, + "learning_rate": 1.467654872497758e-05, + "loss": 0.0727, "step": 15730 }, { - "epoch": 0.3994161695646656, - "grad_norm": 0.5140540599822998, - "learning_rate": 1.7337225536235565e-05, - "loss": 0.1173, + "epoch": 0.7987715112442256, + "grad_norm": 0.6805680990219116, + "learning_rate": 1.4674856591705164e-05, + "loss": 0.0822, "step": 15735 }, { - "epoch": 0.39954308922452086, - "grad_norm": 0.5666049718856812, - "learning_rate": 1.7336379405169863e-05, - "loss": 0.0975, + "epoch": 0.7990253312350881, + "grad_norm": 0.6451191306114197, + "learning_rate": 1.4673164458432747e-05, + "loss": 0.0772, "step": 15740 }, { - "epoch": 0.3996700088843762, - "grad_norm": 0.8106690645217896, - "learning_rate": 1.733553327410416e-05, - "loss": 0.1278, + "epoch": 0.7992791512259505, + "grad_norm": 0.5430915951728821, + "learning_rate": 1.4671472325160333e-05, + "loss": 0.0869, "step": 15745 }, { - "epoch": 0.3997969285442315, - "grad_norm": 1.0115009546279907, - "learning_rate": 1.7334687143038457e-05, - "loss": 0.1144, + "epoch": 0.799532971216813, + "grad_norm": 0.4242735803127289, + "learning_rate": 1.4669780191887915e-05, + "loss": 0.0781, "step": 15750 }, { - "epoch": 0.39992384820408683, - "grad_norm": 0.7386209964752197, - "learning_rate": 1.7333841011972755e-05, - "loss": 0.0911, + "epoch": 0.7997867912076755, + "grad_norm": 0.4273703098297119, + "learning_rate": 1.4668088058615498e-05, + "loss": 0.0732, "step": 15755 }, { - "epoch": 0.4000507678639421, - "grad_norm": 0.7433488368988037, - "learning_rate": 1.7332994880907054e-05, - "loss": 0.1249, + "epoch": 0.800040611198538, + "grad_norm": 0.4131914973258972, + "learning_rate": 1.4666395925343082e-05, + "loss": 0.0792, "step": 15760 }, { - "epoch": 0.40017768752379745, - "grad_norm": 0.5527405142784119, - "learning_rate": 1.7332148749841352e-05, - "loss": 0.1084, + "epoch": 0.8002944311894005, + "grad_norm": 0.7269368171691895, + "learning_rate": 1.4664703792070665e-05, + "loss": 0.0706, "step": 15765 }, { - "epoch": 0.40030460718365274, - "grad_norm": 0.5471238493919373, - "learning_rate": 1.733130261877565e-05, - "loss": 0.0954, + "epoch": 0.800548251180263, + "grad_norm": 0.730088472366333, + "learning_rate": 1.4663011658798247e-05, + "loss": 0.0803, "step": 15770 }, { - "epoch": 0.4004315268435081, - "grad_norm": 0.44829538464546204, - "learning_rate": 1.733045648770995e-05, - "loss": 0.0836, + "epoch": 0.8008020711711255, + "grad_norm": 0.4482845962047577, + "learning_rate": 1.4661319525525832e-05, + "loss": 0.0718, "step": 15775 }, { - "epoch": 0.40055844650336336, - "grad_norm": 0.649516761302948, - "learning_rate": 1.7329610356644247e-05, - "loss": 0.1038, + "epoch": 0.8010558911619879, + "grad_norm": 0.4157123863697052, + "learning_rate": 1.4659627392253414e-05, + "loss": 0.0782, "step": 15780 }, { - "epoch": 0.4006853661632187, - "grad_norm": 0.708890438079834, - "learning_rate": 1.7328764225578542e-05, - "loss": 0.1186, + "epoch": 0.8013097111528504, + "grad_norm": 0.3921131491661072, + "learning_rate": 1.4657935258980998e-05, + "loss": 0.066, "step": 15785 }, { - "epoch": 0.400812285823074, - "grad_norm": 0.7144529223442078, - "learning_rate": 1.732791809451284e-05, - "loss": 0.0942, + "epoch": 0.8015635311437129, + "grad_norm": 0.33425718545913696, + "learning_rate": 1.4656243125708583e-05, + "loss": 0.0675, "step": 15790 }, { - "epoch": 0.40093920548292933, - "grad_norm": 0.6685452461242676, - "learning_rate": 1.732707196344714e-05, - "loss": 0.1472, + "epoch": 0.8018173511345753, + "grad_norm": 0.5760083794593811, + "learning_rate": 1.4654550992436165e-05, + "loss": 0.0846, "step": 15795 }, { - "epoch": 0.4010661251427846, - "grad_norm": 0.7981312870979309, - "learning_rate": 1.7326225832381437e-05, - "loss": 0.1331, + "epoch": 0.8020711711254378, + "grad_norm": 0.5779892206192017, + "learning_rate": 1.465285885916375e-05, + "loss": 0.0821, "step": 15800 }, { - "epoch": 0.40119304480263995, - "grad_norm": 0.7323702573776245, - "learning_rate": 1.7325379701315736e-05, - "loss": 0.1227, + "epoch": 0.8023249911163003, + "grad_norm": 0.3755846917629242, + "learning_rate": 1.4651166725891332e-05, + "loss": 0.0715, "step": 15805 }, { - "epoch": 0.40131996446249524, - "grad_norm": 0.49210238456726074, - "learning_rate": 1.7324533570250034e-05, - "loss": 0.1101, + "epoch": 0.8025788111071628, + "grad_norm": 0.3848758637905121, + "learning_rate": 1.4649474592618915e-05, + "loss": 0.0857, "step": 15810 }, { - "epoch": 0.4014468841223506, - "grad_norm": 0.6651603579521179, - "learning_rate": 1.7323687439184333e-05, - "loss": 0.117, + "epoch": 0.8028326310980253, + "grad_norm": 0.3276999890804291, + "learning_rate": 1.46477824593465e-05, + "loss": 0.0747, "step": 15815 }, { - "epoch": 0.40157380378220586, - "grad_norm": 0.6778621077537537, - "learning_rate": 1.732284130811863e-05, - "loss": 0.0975, + "epoch": 0.8030864510888878, + "grad_norm": 0.33379969000816345, + "learning_rate": 1.4646090326074082e-05, + "loss": 0.0722, "step": 15820 }, { - "epoch": 0.4017007234420612, - "grad_norm": 0.7432537078857422, - "learning_rate": 1.7321995177052926e-05, - "loss": 0.0995, + "epoch": 0.8033402710797503, + "grad_norm": 0.37067776918411255, + "learning_rate": 1.4644398192801666e-05, + "loss": 0.0737, "step": 15825 }, { - "epoch": 0.4018276431019165, - "grad_norm": 0.6327972412109375, - "learning_rate": 1.7321149045987225e-05, - "loss": 0.1229, + "epoch": 0.8035940910706127, + "grad_norm": 0.29267624020576477, + "learning_rate": 1.464270605952925e-05, + "loss": 0.063, "step": 15830 }, { - "epoch": 0.40195456276177177, - "grad_norm": 1.1968554258346558, - "learning_rate": 1.7320302914921523e-05, - "loss": 0.1251, + "epoch": 0.8038479110614752, + "grad_norm": 0.3490564227104187, + "learning_rate": 1.4641013926256833e-05, + "loss": 0.0909, "step": 15835 }, { - "epoch": 0.4020814824216271, - "grad_norm": 0.5668025016784668, - "learning_rate": 1.731945678385582e-05, - "loss": 0.1215, + "epoch": 0.8041017310523377, + "grad_norm": 0.4481734335422516, + "learning_rate": 1.4639321792984415e-05, + "loss": 0.0741, "step": 15840 }, { - "epoch": 0.4022084020814824, - "grad_norm": 0.822522759437561, - "learning_rate": 1.7318610652790116e-05, - "loss": 0.1158, + "epoch": 0.8043555510432001, + "grad_norm": 0.3517626225948334, + "learning_rate": 1.4637629659712e-05, + "loss": 0.0731, "step": 15845 }, { - "epoch": 0.40233532174133774, - "grad_norm": 0.5078701376914978, - "learning_rate": 1.7317764521724415e-05, - "loss": 0.0855, + "epoch": 0.8046093710340626, + "grad_norm": 0.4463517963886261, + "learning_rate": 1.4635937526439584e-05, + "loss": 0.0801, "step": 15850 }, { - "epoch": 0.402462241401193, - "grad_norm": 0.8222021460533142, - "learning_rate": 1.7316918390658713e-05, - "loss": 0.1061, + "epoch": 0.8048631910249251, + "grad_norm": 0.3373356759548187, + "learning_rate": 1.4634245393167167e-05, + "loss": 0.0672, "step": 15855 }, { - "epoch": 0.40258916106104836, - "grad_norm": 0.7171165943145752, - "learning_rate": 1.731607225959301e-05, - "loss": 0.1152, + "epoch": 0.8051170110157876, + "grad_norm": 0.4135652482509613, + "learning_rate": 1.463255325989475e-05, + "loss": 0.0605, "step": 15860 }, { - "epoch": 0.40271608072090365, - "grad_norm": 0.9530938267707825, - "learning_rate": 1.731522612852731e-05, - "loss": 0.0952, + "epoch": 0.8053708310066501, + "grad_norm": 0.4091756343841553, + "learning_rate": 1.4630861126622333e-05, + "loss": 0.0711, "step": 15865 }, { - "epoch": 0.402843000380759, - "grad_norm": 0.8077841997146606, - "learning_rate": 1.731437999746161e-05, - "loss": 0.1219, + "epoch": 0.8056246509975126, + "grad_norm": 0.4166070520877838, + "learning_rate": 1.4629168993349918e-05, + "loss": 0.0641, "step": 15870 }, { - "epoch": 0.40296992004061427, - "grad_norm": 0.5117992758750916, - "learning_rate": 1.7313533866395907e-05, - "loss": 0.1203, + "epoch": 0.8058784709883751, + "grad_norm": 0.37992623448371887, + "learning_rate": 1.4627476860077501e-05, + "loss": 0.0744, "step": 15875 }, { - "epoch": 0.4030968397004696, - "grad_norm": 0.5340364575386047, - "learning_rate": 1.7312687735330205e-05, - "loss": 0.1058, + "epoch": 0.8061322909792376, + "grad_norm": 0.39718085527420044, + "learning_rate": 1.4625784726805083e-05, + "loss": 0.0715, "step": 15880 }, { - "epoch": 0.4032237593603249, - "grad_norm": 0.5445460677146912, - "learning_rate": 1.73118416042645e-05, - "loss": 0.1239, + "epoch": 0.8063861109701, + "grad_norm": 0.3552018404006958, + "learning_rate": 1.4624092593532669e-05, + "loss": 0.0718, "step": 15885 }, { - "epoch": 0.40335067902018024, - "grad_norm": 0.7131897211074829, - "learning_rate": 1.73109954731988e-05, - "loss": 0.1075, + "epoch": 0.8066399309609625, + "grad_norm": 0.39325806498527527, + "learning_rate": 1.462240046026025e-05, + "loss": 0.0743, "step": 15890 }, { - "epoch": 0.4034775986800355, - "grad_norm": 0.6858046054840088, - "learning_rate": 1.7310149342133097e-05, - "loss": 0.1158, + "epoch": 0.8068937509518249, + "grad_norm": 0.3813045620918274, + "learning_rate": 1.4620708326987836e-05, + "loss": 0.0717, "step": 15895 }, { - "epoch": 0.40360451833989086, - "grad_norm": 0.6376552581787109, - "learning_rate": 1.7309303211067395e-05, - "loss": 0.1159, + "epoch": 0.8071475709426874, + "grad_norm": 0.5478102564811707, + "learning_rate": 1.461901619371542e-05, + "loss": 0.081, "step": 15900 }, { - "epoch": 0.40373143799974615, - "grad_norm": 0.7093445062637329, - "learning_rate": 1.7308457080001694e-05, - "loss": 0.1058, + "epoch": 0.8074013909335499, + "grad_norm": 0.7054280638694763, + "learning_rate": 1.4617324060443001e-05, + "loss": 0.0769, "step": 15905 }, { - "epoch": 0.4038583576596015, - "grad_norm": 0.8147251605987549, - "learning_rate": 1.7307610948935992e-05, - "loss": 0.1179, + "epoch": 0.8076552109244124, + "grad_norm": 0.5671230554580688, + "learning_rate": 1.4615631927170586e-05, + "loss": 0.068, "step": 15910 }, { - "epoch": 0.40398527731945677, - "grad_norm": 0.48918089270591736, - "learning_rate": 1.730676481787029e-05, - "loss": 0.1009, + "epoch": 0.8079090309152749, + "grad_norm": 0.6359015703201294, + "learning_rate": 1.4613939793898168e-05, + "loss": 0.06, "step": 15915 }, { - "epoch": 0.4041121969793121, - "grad_norm": 1.0363577604293823, - "learning_rate": 1.730591868680459e-05, - "loss": 0.0884, + "epoch": 0.8081628509061374, + "grad_norm": 0.4305456876754761, + "learning_rate": 1.4612247660625752e-05, + "loss": 0.065, "step": 15920 }, { - "epoch": 0.4042391166391674, - "grad_norm": 1.7854145765304565, - "learning_rate": 1.7305072555738884e-05, - "loss": 0.1125, + "epoch": 0.8084166708969999, + "grad_norm": 0.5645986199378967, + "learning_rate": 1.4610555527353337e-05, + "loss": 0.0746, "step": 15925 }, { - "epoch": 0.40436603629902274, - "grad_norm": 0.5563003420829773, - "learning_rate": 1.7304226424673182e-05, - "loss": 0.1108, + "epoch": 0.8086704908878624, + "grad_norm": 0.4272991716861725, + "learning_rate": 1.4608863394080919e-05, + "loss": 0.082, "step": 15930 }, { - "epoch": 0.404492955958878, - "grad_norm": 0.49315062165260315, - "learning_rate": 1.730338029360748e-05, - "loss": 0.1124, + "epoch": 0.8089243108787249, + "grad_norm": 0.4712943732738495, + "learning_rate": 1.4607171260808502e-05, + "loss": 0.0835, "step": 15935 }, { - "epoch": 0.40461987561873336, - "grad_norm": 0.7829304933547974, - "learning_rate": 1.730253416254178e-05, - "loss": 0.0853, + "epoch": 0.8091781308695873, + "grad_norm": 0.36924442648887634, + "learning_rate": 1.4605479127536086e-05, + "loss": 0.0757, "step": 15940 }, { - "epoch": 0.40474679527858864, - "grad_norm": 0.5807455778121948, - "learning_rate": 1.7301688031476078e-05, - "loss": 0.0948, + "epoch": 0.8094319508604497, + "grad_norm": 0.9191230535507202, + "learning_rate": 1.460378699426367e-05, + "loss": 0.0721, "step": 15945 }, { - "epoch": 0.404873714938444, - "grad_norm": 0.4329532980918884, - "learning_rate": 1.7300841900410376e-05, - "loss": 0.0796, + "epoch": 0.8096857708513122, + "grad_norm": 0.466630220413208, + "learning_rate": 1.4602094860991255e-05, + "loss": 0.0777, "step": 15950 }, { - "epoch": 0.40500063459829927, - "grad_norm": 0.6838335394859314, - "learning_rate": 1.7299995769344674e-05, - "loss": 0.1287, + "epoch": 0.8099395908421747, + "grad_norm": 0.6259939670562744, + "learning_rate": 1.4600402727718836e-05, + "loss": 0.0699, "step": 15955 }, { - "epoch": 0.4051275542581546, - "grad_norm": 0.4694925546646118, - "learning_rate": 1.7299149638278973e-05, - "loss": 0.1015, + "epoch": 0.8101934108330372, + "grad_norm": 0.31263235211372375, + "learning_rate": 1.4598710594446418e-05, + "loss": 0.0688, "step": 15960 }, { - "epoch": 0.4052544739180099, - "grad_norm": 0.818102240562439, - "learning_rate": 1.7298303507213268e-05, - "loss": 0.1344, + "epoch": 0.8104472308238997, + "grad_norm": 0.32614171504974365, + "learning_rate": 1.4597018461174004e-05, + "loss": 0.0667, "step": 15965 }, { - "epoch": 0.40538139357786523, - "grad_norm": 0.648195207118988, - "learning_rate": 1.7297457376147566e-05, - "loss": 0.0972, + "epoch": 0.8107010508147622, + "grad_norm": 0.551091730594635, + "learning_rate": 1.4595326327901587e-05, + "loss": 0.0614, "step": 15970 }, { - "epoch": 0.4055083132377205, - "grad_norm": 0.7501447200775146, - "learning_rate": 1.7296611245081865e-05, - "loss": 0.1099, + "epoch": 0.8109548708056247, + "grad_norm": 0.44654572010040283, + "learning_rate": 1.4593634194629169e-05, + "loss": 0.0857, "step": 15975 }, { - "epoch": 0.40563523289757586, - "grad_norm": 0.5141229629516602, - "learning_rate": 1.7295765114016163e-05, - "loss": 0.1293, + "epoch": 0.8112086907964872, + "grad_norm": 0.8355388641357422, + "learning_rate": 1.4591942061356754e-05, + "loss": 0.0589, "step": 15980 }, { - "epoch": 0.40576215255743114, - "grad_norm": 0.5608082413673401, - "learning_rate": 1.7294918982950458e-05, - "loss": 0.1147, + "epoch": 0.8114625107873497, + "grad_norm": 0.3554515242576599, + "learning_rate": 1.4590249928084336e-05, + "loss": 0.0672, "step": 15985 }, { - "epoch": 0.4058890722172865, - "grad_norm": 0.5700523257255554, - "learning_rate": 1.7294072851884756e-05, - "loss": 0.0994, + "epoch": 0.8117163307782121, + "grad_norm": 0.8215906023979187, + "learning_rate": 1.458855779481192e-05, + "loss": 0.0651, "step": 15990 }, { - "epoch": 0.40601599187714177, - "grad_norm": 0.508159875869751, - "learning_rate": 1.7293226720819055e-05, - "loss": 0.1073, + "epoch": 0.8119701507690745, + "grad_norm": 0.5176529884338379, + "learning_rate": 1.4586865661539505e-05, + "loss": 0.0811, "step": 15995 }, { - "epoch": 0.40614291153699705, - "grad_norm": 0.7670906782150269, - "learning_rate": 1.7292380589753353e-05, - "loss": 0.0932, + "epoch": 0.812223970759937, + "grad_norm": 0.37184882164001465, + "learning_rate": 1.4585173528267087e-05, + "loss": 0.0811, "step": 16000 }, { - "epoch": 0.4062698311968524, - "grad_norm": 0.5051582455635071, - "learning_rate": 1.729153445868765e-05, - "loss": 0.1048, + "epoch": 0.8124777907507995, + "grad_norm": 0.36488497257232666, + "learning_rate": 1.4583481394994672e-05, + "loss": 0.0694, "step": 16005 }, { - "epoch": 0.4063967508567077, - "grad_norm": 2.2033839225769043, - "learning_rate": 1.729068832762195e-05, - "loss": 0.128, + "epoch": 0.812731610741662, + "grad_norm": 0.29020920395851135, + "learning_rate": 1.4581789261722254e-05, + "loss": 0.0725, "step": 16010 }, { - "epoch": 0.406523670516563, - "grad_norm": 1.3851262331008911, - "learning_rate": 1.728984219655625e-05, - "loss": 0.1316, + "epoch": 0.8129854307325245, + "grad_norm": 0.41967329382896423, + "learning_rate": 1.4580097128449837e-05, + "loss": 0.0749, "step": 16015 }, { - "epoch": 0.4066505901764183, - "grad_norm": 0.7582750916481018, - "learning_rate": 1.7288996065490547e-05, - "loss": 0.1106, + "epoch": 0.813239250723387, + "grad_norm": 0.35643911361694336, + "learning_rate": 1.4578404995177423e-05, + "loss": 0.0728, "step": 16020 }, { - "epoch": 0.40677750983627364, - "grad_norm": 0.7486197352409363, - "learning_rate": 1.7288149934424845e-05, - "loss": 0.1127, + "epoch": 0.8134930707142495, + "grad_norm": 0.5488142967224121, + "learning_rate": 1.4576712861905004e-05, + "loss": 0.076, "step": 16025 }, { - "epoch": 0.4069044294961289, - "grad_norm": 0.6660436391830444, - "learning_rate": 1.728730380335914e-05, - "loss": 0.1005, + "epoch": 0.813746890705112, + "grad_norm": 0.6067050695419312, + "learning_rate": 1.4575020728632588e-05, + "loss": 0.0723, "step": 16030 }, { - "epoch": 0.40703134915598427, - "grad_norm": 0.47259747982025146, - "learning_rate": 1.728645767229344e-05, - "loss": 0.1003, + "epoch": 0.8140007106959745, + "grad_norm": 0.3196711838245392, + "learning_rate": 1.4573328595360171e-05, + "loss": 0.0752, "step": 16035 }, { - "epoch": 0.40715826881583955, - "grad_norm": 0.5743085741996765, - "learning_rate": 1.7285611541227737e-05, - "loss": 0.1144, + "epoch": 0.8142545306868368, + "grad_norm": 0.3165784478187561, + "learning_rate": 1.4571636462087755e-05, + "loss": 0.0608, "step": 16040 }, { - "epoch": 0.4072851884756949, - "grad_norm": 0.36923086643218994, - "learning_rate": 1.7284765410162035e-05, - "loss": 0.0868, + "epoch": 0.8145083506776993, + "grad_norm": 0.37109699845314026, + "learning_rate": 1.456994432881534e-05, + "loss": 0.077, "step": 16045 }, { - "epoch": 0.4074121081355502, - "grad_norm": 1.0259774923324585, - "learning_rate": 1.7283919279096334e-05, - "loss": 0.1372, + "epoch": 0.8147621706685618, + "grad_norm": 0.5756739377975464, + "learning_rate": 1.4568252195542922e-05, + "loss": 0.0746, "step": 16050 }, { - "epoch": 0.4075390277954055, - "grad_norm": 0.9785448908805847, - "learning_rate": 1.7283073148030632e-05, - "loss": 0.1233, + "epoch": 0.8150159906594243, + "grad_norm": 0.5761229395866394, + "learning_rate": 1.4566560062270506e-05, + "loss": 0.0769, "step": 16055 }, { - "epoch": 0.4076659474552608, - "grad_norm": 0.6073505878448486, - "learning_rate": 1.728222701696493e-05, - "loss": 0.1039, + "epoch": 0.8152698106502868, + "grad_norm": 0.6240622401237488, + "learning_rate": 1.456486792899809e-05, + "loss": 0.0838, "step": 16060 }, { - "epoch": 0.40779286711511614, - "grad_norm": 1.1163922548294067, - "learning_rate": 1.728138088589923e-05, - "loss": 0.096, + "epoch": 0.8155236306411493, + "grad_norm": 0.40298014879226685, + "learning_rate": 1.4563175795725673e-05, + "loss": 0.0701, "step": 16065 }, { - "epoch": 0.4079197867749714, - "grad_norm": 0.5511506199836731, - "learning_rate": 1.7280534754833524e-05, - "loss": 0.102, + "epoch": 0.8157774506320118, + "grad_norm": 0.4362527132034302, + "learning_rate": 1.4561483662453255e-05, + "loss": 0.0739, "step": 16070 }, { - "epoch": 0.40804670643482677, - "grad_norm": 1.540884017944336, - "learning_rate": 1.7279688623767822e-05, - "loss": 0.0955, + "epoch": 0.8160312706228743, + "grad_norm": 0.37834423780441284, + "learning_rate": 1.455979152918084e-05, + "loss": 0.079, "step": 16075 }, { - "epoch": 0.40817362609468205, - "grad_norm": 0.5006808042526245, - "learning_rate": 1.727884249270212e-05, - "loss": 0.1189, + "epoch": 0.8162850906137368, + "grad_norm": 0.3305414319038391, + "learning_rate": 1.4558099395908423e-05, + "loss": 0.0662, "step": 16080 }, { - "epoch": 0.4083005457545374, - "grad_norm": 0.6059737205505371, - "learning_rate": 1.727799636163642e-05, - "loss": 0.1134, + "epoch": 0.8165389106045993, + "grad_norm": 0.45846548676490784, + "learning_rate": 1.4556407262636005e-05, + "loss": 0.0704, "step": 16085 }, { - "epoch": 0.4084274654143927, - "grad_norm": 0.9114747643470764, - "learning_rate": 1.7277150230570718e-05, - "loss": 0.1102, + "epoch": 0.8167927305954616, + "grad_norm": 0.42119476199150085, + "learning_rate": 1.455471512936359e-05, + "loss": 0.0734, "step": 16090 }, { - "epoch": 0.408554385074248, - "grad_norm": 0.6590103507041931, - "learning_rate": 1.7276304099505016e-05, - "loss": 0.1018, + "epoch": 0.8170465505863241, + "grad_norm": 0.4292179346084595, + "learning_rate": 1.4553022996091172e-05, + "loss": 0.0665, "step": 16095 }, { - "epoch": 0.4086813047341033, - "grad_norm": 0.5281713604927063, - "learning_rate": 1.7275457968439315e-05, - "loss": 0.0977, + "epoch": 0.8173003705771866, + "grad_norm": 0.3152535855770111, + "learning_rate": 1.4551330862818758e-05, + "loss": 0.0719, "step": 16100 }, { - "epoch": 0.40880822439395864, - "grad_norm": 0.5342009663581848, - "learning_rate": 1.7274611837373613e-05, - "loss": 0.1125, + "epoch": 0.8175541905680491, + "grad_norm": 0.3996585011482239, + "learning_rate": 1.4549638729546341e-05, + "loss": 0.0616, "step": 16105 }, { - "epoch": 0.4089351440538139, - "grad_norm": 0.5929030179977417, - "learning_rate": 1.7273765706307908e-05, - "loss": 0.1174, + "epoch": 0.8178080105589116, + "grad_norm": 0.3468000292778015, + "learning_rate": 1.4547946596273923e-05, + "loss": 0.074, "step": 16110 }, { - "epoch": 0.40906206371366927, - "grad_norm": 0.6910762190818787, - "learning_rate": 1.7272919575242206e-05, - "loss": 0.0923, + "epoch": 0.8180618305497741, + "grad_norm": 0.4962630569934845, + "learning_rate": 1.4546254463001508e-05, + "loss": 0.0705, "step": 16115 }, { - "epoch": 0.40918898337352455, - "grad_norm": 0.6761794686317444, - "learning_rate": 1.7272073444176505e-05, - "loss": 0.0964, + "epoch": 0.8183156505406366, + "grad_norm": 0.48265331983566284, + "learning_rate": 1.454456232972909e-05, + "loss": 0.0718, "step": 16120 }, { - "epoch": 0.4093159030333799, - "grad_norm": 0.9711748957633972, - "learning_rate": 1.7271227313110803e-05, - "loss": 0.1249, + "epoch": 0.8185694705314991, + "grad_norm": 0.4469800293445587, + "learning_rate": 1.4542870196456674e-05, + "loss": 0.0932, "step": 16125 }, { - "epoch": 0.4094428226932352, - "grad_norm": 0.5205610394477844, - "learning_rate": 1.7270381182045098e-05, - "loss": 0.0996, + "epoch": 0.8188232905223616, + "grad_norm": 0.6275296211242676, + "learning_rate": 1.4541178063184259e-05, + "loss": 0.0755, "step": 16130 }, { - "epoch": 0.4095697423530905, - "grad_norm": 0.9700564742088318, - "learning_rate": 1.7269535050979397e-05, - "loss": 0.1148, + "epoch": 0.8190771105132241, + "grad_norm": 0.5437877178192139, + "learning_rate": 1.453948592991184e-05, + "loss": 0.0762, "step": 16135 }, { - "epoch": 0.4096966620129458, - "grad_norm": 0.8956917524337769, - "learning_rate": 1.7268688919913695e-05, - "loss": 0.1066, + "epoch": 0.8193309305040865, + "grad_norm": 0.3738441467285156, + "learning_rate": 1.4537793796639426e-05, + "loss": 0.0734, "step": 16140 }, { - "epoch": 0.40982358167280114, - "grad_norm": 0.8899663090705872, - "learning_rate": 1.7267842788847993e-05, - "loss": 0.1031, + "epoch": 0.8195847504949489, + "grad_norm": 0.41383683681488037, + "learning_rate": 1.4536101663367008e-05, + "loss": 0.0834, "step": 16145 }, { - "epoch": 0.4099505013326564, - "grad_norm": 0.5758920311927795, - "learning_rate": 1.7266996657782292e-05, - "loss": 0.1333, + "epoch": 0.8198385704858114, + "grad_norm": 0.49464190006256104, + "learning_rate": 1.4534409530094591e-05, + "loss": 0.0773, "step": 16150 }, { - "epoch": 0.41007742099251177, - "grad_norm": 2.9715194702148438, - "learning_rate": 1.726615052671659e-05, - "loss": 0.1043, + "epoch": 0.8200923904766739, + "grad_norm": 0.5497142672538757, + "learning_rate": 1.4532717396822177e-05, + "loss": 0.0715, "step": 16155 }, { - "epoch": 0.41020434065236705, - "grad_norm": 0.46324700117111206, - "learning_rate": 1.726530439565089e-05, - "loss": 0.1069, + "epoch": 0.8203462104675364, + "grad_norm": 1.2365076541900635, + "learning_rate": 1.4531025263549758e-05, + "loss": 0.0737, "step": 16160 }, { - "epoch": 0.4103312603122224, - "grad_norm": 0.45850226283073425, - "learning_rate": 1.7264458264585187e-05, - "loss": 0.0935, + "epoch": 0.8206000304583989, + "grad_norm": 0.47389277815818787, + "learning_rate": 1.452933313027734e-05, + "loss": 0.0762, "step": 16165 }, { - "epoch": 0.4104581799720777, - "grad_norm": 0.7194899320602417, - "learning_rate": 1.7263612133519482e-05, - "loss": 0.1069, + "epoch": 0.8208538504492614, + "grad_norm": 0.33445248007774353, + "learning_rate": 1.4527640997004925e-05, + "loss": 0.0774, "step": 16170 }, { - "epoch": 0.41058509963193296, - "grad_norm": 0.6243958473205566, - "learning_rate": 1.726276600245378e-05, - "loss": 0.0898, + "epoch": 0.8211076704401239, + "grad_norm": 0.4635807275772095, + "learning_rate": 1.4525948863732509e-05, + "loss": 0.0739, "step": 16175 }, { - "epoch": 0.4107120192917883, - "grad_norm": 0.5835657119750977, - "learning_rate": 1.726191987138808e-05, - "loss": 0.0943, + "epoch": 0.8213614904309864, + "grad_norm": 0.7265046834945679, + "learning_rate": 1.4524256730460091e-05, + "loss": 0.0809, "step": 16180 }, { - "epoch": 0.4108389389516436, - "grad_norm": 0.6834063529968262, - "learning_rate": 1.7261073740322377e-05, - "loss": 0.1152, + "epoch": 0.8216153104218489, + "grad_norm": 0.3342432379722595, + "learning_rate": 1.4522564597187676e-05, + "loss": 0.0651, "step": 16185 }, { - "epoch": 0.4109658586114989, - "grad_norm": 0.762104868888855, - "learning_rate": 1.7260227609256676e-05, - "loss": 0.0849, + "epoch": 0.8218691304127113, + "grad_norm": 0.38529881834983826, + "learning_rate": 1.4520872463915258e-05, + "loss": 0.082, "step": 16190 }, { - "epoch": 0.4110927782713542, - "grad_norm": 0.827873706817627, - "learning_rate": 1.7259381478190974e-05, - "loss": 0.1137, + "epoch": 0.8221229504035737, + "grad_norm": 0.45302921533584595, + "learning_rate": 1.4519180330642843e-05, + "loss": 0.0688, "step": 16195 }, { - "epoch": 0.41121969793120955, - "grad_norm": 0.9640071392059326, - "learning_rate": 1.7258535347125272e-05, - "loss": 0.095, + "epoch": 0.8223767703944362, + "grad_norm": 0.33290159702301025, + "learning_rate": 1.4517488197370427e-05, + "loss": 0.075, "step": 16200 }, { - "epoch": 0.41134661759106483, - "grad_norm": 0.6519110798835754, - "learning_rate": 1.725768921605957e-05, - "loss": 0.1032, + "epoch": 0.8226305903852987, + "grad_norm": 0.34083133935928345, + "learning_rate": 1.4515796064098009e-05, + "loss": 0.0755, "step": 16205 }, { - "epoch": 0.4114735372509202, - "grad_norm": 0.4959271252155304, - "learning_rate": 1.7256843084993866e-05, - "loss": 0.1077, + "epoch": 0.8228844103761612, + "grad_norm": 0.3705480992794037, + "learning_rate": 1.4514103930825594e-05, + "loss": 0.0721, "step": 16210 }, { - "epoch": 0.41160045691077546, - "grad_norm": 0.7615448832511902, - "learning_rate": 1.7255996953928164e-05, - "loss": 0.1145, + "epoch": 0.8231382303670237, + "grad_norm": 0.39600300788879395, + "learning_rate": 1.4512411797553176e-05, + "loss": 0.0708, "step": 16215 }, { - "epoch": 0.4117273765706308, - "grad_norm": 0.811072587966919, - "learning_rate": 1.7255150822862463e-05, - "loss": 0.1058, + "epoch": 0.8233920503578862, + "grad_norm": 0.32571184635162354, + "learning_rate": 1.451071966428076e-05, + "loss": 0.0758, "step": 16220 }, { - "epoch": 0.4118542962304861, - "grad_norm": 0.833181619644165, - "learning_rate": 1.725430469179676e-05, - "loss": 0.1159, + "epoch": 0.8236458703487487, + "grad_norm": 0.791690468788147, + "learning_rate": 1.4509027531008344e-05, + "loss": 0.084, "step": 16225 }, { - "epoch": 0.4119812158903414, - "grad_norm": 0.7173975110054016, - "learning_rate": 1.725345856073106e-05, - "loss": 0.1278, + "epoch": 0.8238996903396112, + "grad_norm": 0.3258461058139801, + "learning_rate": 1.4507335397735926e-05, + "loss": 0.0741, "step": 16230 }, { - "epoch": 0.4121081355501967, - "grad_norm": 0.5112674236297607, - "learning_rate": 1.7252612429665358e-05, - "loss": 0.0844, + "epoch": 0.8241535103304737, + "grad_norm": 0.3211754262447357, + "learning_rate": 1.450564326446351e-05, + "loss": 0.079, "step": 16235 }, { - "epoch": 0.41223505521005205, - "grad_norm": 0.4662214517593384, - "learning_rate": 1.7251766298599656e-05, - "loss": 0.1087, + "epoch": 0.8244073303213361, + "grad_norm": 0.3197133541107178, + "learning_rate": 1.4503951131191093e-05, + "loss": 0.0674, "step": 16240 }, { - "epoch": 0.41236197486990733, - "grad_norm": 0.6411307454109192, - "learning_rate": 1.7250920167533955e-05, - "loss": 0.1091, + "epoch": 0.8246611503121986, + "grad_norm": 0.39093726873397827, + "learning_rate": 1.4502258997918677e-05, + "loss": 0.0686, "step": 16245 }, { - "epoch": 0.4124888945297627, - "grad_norm": 0.602817714214325, - "learning_rate": 1.725007403646825e-05, - "loss": 0.1028, + "epoch": 0.824914970303061, + "grad_norm": 0.4963763356208801, + "learning_rate": 1.4500566864646262e-05, + "loss": 0.0775, "step": 16250 }, { - "epoch": 0.41261581418961796, - "grad_norm": 0.7346264123916626, - "learning_rate": 1.7249227905402548e-05, - "loss": 0.1061, + "epoch": 0.8251687902939235, + "grad_norm": 0.40643295645713806, + "learning_rate": 1.4498874731373844e-05, + "loss": 0.0698, "step": 16255 }, { - "epoch": 0.4127427338494733, - "grad_norm": 0.7947094440460205, - "learning_rate": 1.7248381774336846e-05, - "loss": 0.1148, + "epoch": 0.825422610284786, + "grad_norm": 0.39619845151901245, + "learning_rate": 1.4497182598101428e-05, + "loss": 0.0713, "step": 16260 }, { - "epoch": 0.4128696535093286, - "grad_norm": 0.531777560710907, - "learning_rate": 1.7247535643271145e-05, - "loss": 0.1103, + "epoch": 0.8256764302756485, + "grad_norm": 0.4349762201309204, + "learning_rate": 1.4495490464829011e-05, + "loss": 0.0682, "step": 16265 }, { - "epoch": 0.4129965731691839, - "grad_norm": 0.6914690732955933, - "learning_rate": 1.724668951220544e-05, - "loss": 0.1138, + "epoch": 0.825930250266511, + "grad_norm": 0.5058432817459106, + "learning_rate": 1.4493798331556595e-05, + "loss": 0.0729, "step": 16270 }, { - "epoch": 0.4131234928290392, - "grad_norm": 0.8389149308204651, - "learning_rate": 1.7245843381139738e-05, - "loss": 0.0893, + "epoch": 0.8261840702573735, + "grad_norm": 0.3339829444885254, + "learning_rate": 1.4492106198284177e-05, + "loss": 0.0715, "step": 16275 }, { - "epoch": 0.41325041248889455, - "grad_norm": 1.2480319738388062, - "learning_rate": 1.7244997250074037e-05, - "loss": 0.1069, + "epoch": 0.826437890248236, + "grad_norm": 0.33098337054252625, + "learning_rate": 1.4490414065011762e-05, + "loss": 0.0671, "step": 16280 }, { - "epoch": 0.41337733214874983, - "grad_norm": 0.4666741192340851, - "learning_rate": 1.7244151119008335e-05, - "loss": 0.1, + "epoch": 0.8266917102390984, + "grad_norm": 0.42175155878067017, + "learning_rate": 1.4488721931739345e-05, + "loss": 0.071, "step": 16285 }, { - "epoch": 0.41350425180860517, - "grad_norm": 2.1755332946777344, - "learning_rate": 1.7243304987942633e-05, - "loss": 0.138, + "epoch": 0.8269455302299609, + "grad_norm": 0.8576765656471252, + "learning_rate": 1.4487029798466929e-05, + "loss": 0.0698, "step": 16290 }, { - "epoch": 0.41363117146846046, - "grad_norm": 0.5225837230682373, - "learning_rate": 1.7242458856876932e-05, - "loss": 0.1129, + "epoch": 0.8271993502208234, + "grad_norm": 1.0109632015228271, + "learning_rate": 1.4485337665194512e-05, + "loss": 0.0794, "step": 16295 }, { - "epoch": 0.4137580911283158, - "grad_norm": 0.6421528458595276, - "learning_rate": 1.724161272581123e-05, - "loss": 0.0938, + "epoch": 0.8274531702116859, + "grad_norm": 0.4387573301792145, + "learning_rate": 1.4483645531922094e-05, + "loss": 0.0758, "step": 16300 }, { - "epoch": 0.4138850107881711, - "grad_norm": 0.8704067468643188, - "learning_rate": 1.724076659474553e-05, - "loss": 0.0837, + "epoch": 0.8277069902025483, + "grad_norm": 0.3630094826221466, + "learning_rate": 1.448195339864968e-05, + "loss": 0.0605, "step": 16305 }, { - "epoch": 0.4140119304480264, - "grad_norm": 0.9892425537109375, - "learning_rate": 1.7239920463679824e-05, - "loss": 0.1366, + "epoch": 0.8279608101934108, + "grad_norm": 0.4075300991535187, + "learning_rate": 1.4480261265377263e-05, + "loss": 0.0648, "step": 16310 }, { - "epoch": 0.4141388501078817, - "grad_norm": 0.6149999499320984, - "learning_rate": 1.7239074332614122e-05, - "loss": 0.0912, + "epoch": 0.8282146301842733, + "grad_norm": 0.37269020080566406, + "learning_rate": 1.4478569132104845e-05, + "loss": 0.0723, "step": 16315 }, { - "epoch": 0.41426576976773705, - "grad_norm": 0.7228572964668274, - "learning_rate": 1.723822820154842e-05, - "loss": 0.1302, + "epoch": 0.8284684501751358, + "grad_norm": 0.6695398092269897, + "learning_rate": 1.447687699883243e-05, + "loss": 0.0678, "step": 16320 }, { - "epoch": 0.41439268942759233, - "grad_norm": 0.509646475315094, - "learning_rate": 1.723738207048272e-05, - "loss": 0.0958, + "epoch": 0.8287222701659983, + "grad_norm": 0.36002203822135925, + "learning_rate": 1.4475184865560012e-05, + "loss": 0.0634, "step": 16325 }, { - "epoch": 0.41451960908744767, - "grad_norm": 0.39916691184043884, - "learning_rate": 1.7236535939417017e-05, - "loss": 0.0941, + "epoch": 0.8289760901568608, + "grad_norm": 0.6074070334434509, + "learning_rate": 1.4473492732287596e-05, + "loss": 0.0842, "step": 16330 }, { - "epoch": 0.41464652874730296, - "grad_norm": 0.366619348526001, - "learning_rate": 1.7235689808351316e-05, - "loss": 0.0928, + "epoch": 0.8292299101477232, + "grad_norm": 0.37209317088127136, + "learning_rate": 1.447180059901518e-05, + "loss": 0.0701, "step": 16335 }, { - "epoch": 0.4147734484071583, - "grad_norm": 0.6313339471817017, - "learning_rate": 1.7234843677285614e-05, - "loss": 0.0964, + "epoch": 0.8294837301385857, + "grad_norm": 0.3380017578601837, + "learning_rate": 1.4470108465742763e-05, + "loss": 0.0701, "step": 16340 }, { - "epoch": 0.4149003680670136, - "grad_norm": 0.8188174366950989, - "learning_rate": 1.7233997546219912e-05, - "loss": 0.1038, + "epoch": 0.8297375501294482, + "grad_norm": 0.6713643074035645, + "learning_rate": 1.4468416332470348e-05, + "loss": 0.0703, "step": 16345 }, { - "epoch": 0.41502728772686887, - "grad_norm": 0.4911155104637146, - "learning_rate": 1.7233151415154207e-05, - "loss": 0.1053, + "epoch": 0.8299913701203107, + "grad_norm": 0.9763821959495544, + "learning_rate": 1.446672419919793e-05, + "loss": 0.0714, "step": 16350 }, { - "epoch": 0.4151542073867242, - "grad_norm": 0.8965951800346375, - "learning_rate": 1.7232305284088506e-05, - "loss": 0.1009, + "epoch": 0.8302451901111731, + "grad_norm": 0.3959788680076599, + "learning_rate": 1.4465032065925513e-05, + "loss": 0.0737, "step": 16355 }, { - "epoch": 0.4152811270465795, - "grad_norm": 0.6396322250366211, - "learning_rate": 1.7231459153022804e-05, - "loss": 0.1042, + "epoch": 0.8304990101020356, + "grad_norm": 1.2258071899414062, + "learning_rate": 1.4463339932653098e-05, + "loss": 0.0774, "step": 16360 }, { - "epoch": 0.41540804670643483, - "grad_norm": 1.1758553981781006, - "learning_rate": 1.7230613021957103e-05, - "loss": 0.1416, + "epoch": 0.8307528300928981, + "grad_norm": 0.34479087591171265, + "learning_rate": 1.446164779938068e-05, + "loss": 0.0748, "step": 16365 }, { - "epoch": 0.4155349663662901, - "grad_norm": 0.8491977453231812, - "learning_rate": 1.72297668908914e-05, - "loss": 0.0935, + "epoch": 0.8310066500837606, + "grad_norm": 0.39507466554641724, + "learning_rate": 1.4459955666108262e-05, + "loss": 0.0747, "step": 16370 }, { - "epoch": 0.41566188602614546, - "grad_norm": 0.6496829986572266, - "learning_rate": 1.72289207598257e-05, - "loss": 0.1205, + "epoch": 0.8312604700746231, + "grad_norm": 0.5069799423217773, + "learning_rate": 1.4458263532835847e-05, + "loss": 0.0771, "step": 16375 }, { - "epoch": 0.41578880568600074, - "grad_norm": 0.46064651012420654, - "learning_rate": 1.7228074628759998e-05, - "loss": 0.0907, + "epoch": 0.8315142900654856, + "grad_norm": 0.5372797846794128, + "learning_rate": 1.4456571399563431e-05, + "loss": 0.0886, "step": 16380 }, { - "epoch": 0.4159157253458561, - "grad_norm": 0.6255616545677185, - "learning_rate": 1.7227228497694296e-05, - "loss": 0.0843, + "epoch": 0.831768110056348, + "grad_norm": 0.5318045616149902, + "learning_rate": 1.4454879266291015e-05, + "loss": 0.0835, "step": 16385 }, { - "epoch": 0.41604264500571136, - "grad_norm": 0.5221133828163147, - "learning_rate": 1.722638236662859e-05, - "loss": 0.099, + "epoch": 0.8320219300472105, + "grad_norm": 0.3830041289329529, + "learning_rate": 1.4453187133018598e-05, + "loss": 0.0646, "step": 16390 }, { - "epoch": 0.4161695646655667, - "grad_norm": 1.6042160987854004, - "learning_rate": 1.722553623556289e-05, - "loss": 0.1051, + "epoch": 0.832275750038073, + "grad_norm": 0.40192267298698425, + "learning_rate": 1.445149499974618e-05, + "loss": 0.074, "step": 16395 }, { - "epoch": 0.416296484325422, - "grad_norm": 0.7296250462532043, - "learning_rate": 1.7224690104497188e-05, - "loss": 0.1195, + "epoch": 0.8325295700289355, + "grad_norm": 0.6122369170188904, + "learning_rate": 1.4449802866473765e-05, + "loss": 0.0659, "step": 16400 }, { - "epoch": 0.41642340398527733, - "grad_norm": 3.5063931941986084, - "learning_rate": 1.7223843973431487e-05, - "loss": 0.1106, + "epoch": 0.832783390019798, + "grad_norm": 0.455731064081192, + "learning_rate": 1.4448110733201349e-05, + "loss": 0.08, "step": 16405 }, { - "epoch": 0.4165503236451326, - "grad_norm": 0.5203003883361816, - "learning_rate": 1.722299784236578e-05, - "loss": 0.1059, + "epoch": 0.8330372100106604, + "grad_norm": 0.3991694450378418, + "learning_rate": 1.444641859992893e-05, + "loss": 0.0677, "step": 16410 }, { - "epoch": 0.41667724330498795, - "grad_norm": 1.122910499572754, - "learning_rate": 1.722215171130008e-05, - "loss": 0.1056, + "epoch": 0.8332910300015229, + "grad_norm": 0.49834415316581726, + "learning_rate": 1.4444726466656516e-05, + "loss": 0.0779, "step": 16415 }, { - "epoch": 0.41680416296484324, - "grad_norm": 0.6401264071464539, - "learning_rate": 1.722130558023438e-05, - "loss": 0.0997, + "epoch": 0.8335448499923854, + "grad_norm": 0.3147116005420685, + "learning_rate": 1.4443034333384098e-05, + "loss": 0.0675, "step": 16420 }, { - "epoch": 0.4169310826246986, - "grad_norm": 0.6834256052970886, - "learning_rate": 1.7220459449168677e-05, - "loss": 0.0898, + "epoch": 0.8337986699832479, + "grad_norm": 0.4364480674266815, + "learning_rate": 1.4441342200111681e-05, + "loss": 0.0724, "step": 16425 }, { - "epoch": 0.41705800228455386, - "grad_norm": 0.6591717004776001, - "learning_rate": 1.7219613318102975e-05, - "loss": 0.1067, + "epoch": 0.8340524899741104, + "grad_norm": 0.3125317096710205, + "learning_rate": 1.4439650066839266e-05, + "loss": 0.0704, "step": 16430 }, { - "epoch": 0.4171849219444092, - "grad_norm": 0.8429279327392578, - "learning_rate": 1.7218767187037274e-05, - "loss": 0.1139, + "epoch": 0.8343063099649728, + "grad_norm": 0.3873157799243927, + "learning_rate": 1.4437957933566848e-05, + "loss": 0.0661, "step": 16435 }, { - "epoch": 0.4173118416042645, - "grad_norm": 0.6689391136169434, - "learning_rate": 1.7217921055971572e-05, - "loss": 0.1014, + "epoch": 0.8345601299558353, + "grad_norm": 0.24474631249904633, + "learning_rate": 1.4436265800294434e-05, + "loss": 0.0687, "step": 16440 }, { - "epoch": 0.41743876126411983, - "grad_norm": 1.238481879234314, - "learning_rate": 1.721707492490587e-05, - "loss": 0.096, + "epoch": 0.8348139499466978, + "grad_norm": 0.37103623151779175, + "learning_rate": 1.4434573667022015e-05, + "loss": 0.0598, "step": 16445 }, { - "epoch": 0.4175656809239751, - "grad_norm": 0.6219508051872253, - "learning_rate": 1.7216228793840165e-05, - "loss": 0.1048, + "epoch": 0.8350677699375603, + "grad_norm": 0.2716129422187805, + "learning_rate": 1.4432881533749599e-05, + "loss": 0.0674, "step": 16450 }, { - "epoch": 0.41769260058383045, - "grad_norm": 0.6942674517631531, - "learning_rate": 1.7215382662774464e-05, - "loss": 0.1162, + "epoch": 0.8353215899284228, + "grad_norm": 0.39446020126342773, + "learning_rate": 1.4431189400477184e-05, + "loss": 0.0708, "step": 16455 }, { - "epoch": 0.41781952024368574, - "grad_norm": 1.1117899417877197, - "learning_rate": 1.7214536531708762e-05, - "loss": 0.0985, + "epoch": 0.8355754099192853, + "grad_norm": 0.39217862486839294, + "learning_rate": 1.4429497267204766e-05, + "loss": 0.0718, "step": 16460 }, { - "epoch": 0.4179464399035411, - "grad_norm": 1.0928601026535034, - "learning_rate": 1.721369040064306e-05, - "loss": 0.1132, + "epoch": 0.8358292299101477, + "grad_norm": 0.5310700535774231, + "learning_rate": 1.442780513393235e-05, + "loss": 0.078, "step": 16465 }, { - "epoch": 0.41807335956339636, - "grad_norm": 0.6021097898483276, - "learning_rate": 1.721284426957736e-05, - "loss": 0.0967, + "epoch": 0.8360830499010102, + "grad_norm": 0.337480753660202, + "learning_rate": 1.4426113000659933e-05, + "loss": 0.0693, "step": 16470 }, { - "epoch": 0.4182002792232517, - "grad_norm": 0.703839898109436, - "learning_rate": 1.7211998138511657e-05, - "loss": 0.1097, + "epoch": 0.8363368698918727, + "grad_norm": 0.45497629046440125, + "learning_rate": 1.4424420867387517e-05, + "loss": 0.0831, "step": 16475 }, { - "epoch": 0.418327198883107, - "grad_norm": 0.8077850937843323, - "learning_rate": 1.7211152007445956e-05, - "loss": 0.0959, + "epoch": 0.8365906898827352, + "grad_norm": 0.3556462526321411, + "learning_rate": 1.4422728734115098e-05, + "loss": 0.0653, "step": 16480 }, { - "epoch": 0.41845411854296233, - "grad_norm": 0.7691506743431091, - "learning_rate": 1.7210305876380254e-05, - "loss": 0.1218, + "epoch": 0.8368445098735976, + "grad_norm": 0.43825340270996094, + "learning_rate": 1.4421036600842684e-05, + "loss": 0.0688, "step": 16485 }, { - "epoch": 0.4185810382028176, - "grad_norm": 0.5315225124359131, - "learning_rate": 1.720945974531455e-05, - "loss": 0.1171, + "epoch": 0.8370983298644601, + "grad_norm": 0.4429861605167389, + "learning_rate": 1.4419344467570267e-05, + "loss": 0.0736, "step": 16490 }, { - "epoch": 0.41870795786267295, - "grad_norm": 0.44382038712501526, - "learning_rate": 1.7208613614248848e-05, - "loss": 0.1071, + "epoch": 0.8373521498553226, + "grad_norm": 0.35255303978919983, + "learning_rate": 1.441765233429785e-05, + "loss": 0.066, "step": 16495 }, { - "epoch": 0.41883487752252824, - "grad_norm": 0.7430035471916199, - "learning_rate": 1.7207767483183146e-05, - "loss": 0.1259, + "epoch": 0.8376059698461851, + "grad_norm": 0.532727062702179, + "learning_rate": 1.4415960201025434e-05, + "loss": 0.0857, "step": 16500 }, { - "epoch": 0.4189617971823836, - "grad_norm": 0.5134223699569702, - "learning_rate": 1.7206921352117444e-05, - "loss": 0.0923, + "epoch": 0.8378597898370476, + "grad_norm": 0.38991039991378784, + "learning_rate": 1.4414268067753016e-05, + "loss": 0.0721, "step": 16505 }, { - "epoch": 0.41908871684223886, - "grad_norm": 0.5185253024101257, - "learning_rate": 1.7206075221051743e-05, - "loss": 0.0899, + "epoch": 0.83811360982791, + "grad_norm": 0.38206273317337036, + "learning_rate": 1.4412575934480601e-05, + "loss": 0.0816, "step": 16510 }, { - "epoch": 0.41921563650209415, - "grad_norm": 0.8403283953666687, - "learning_rate": 1.720522908998604e-05, - "loss": 0.1237, + "epoch": 0.8383674298187725, + "grad_norm": 0.271957665681839, + "learning_rate": 1.4410883801208185e-05, + "loss": 0.0779, "step": 16515 }, { - "epoch": 0.4193425561619495, - "grad_norm": 0.6538458466529846, - "learning_rate": 1.720438295892034e-05, - "loss": 0.0995, + "epoch": 0.838621249809635, + "grad_norm": 0.3314172327518463, + "learning_rate": 1.4409191667935767e-05, + "loss": 0.0634, "step": 16520 }, { - "epoch": 0.41946947582180477, - "grad_norm": 0.8574736714363098, - "learning_rate": 1.7203536827854638e-05, - "loss": 0.0759, + "epoch": 0.8388750698004975, + "grad_norm": 0.5224104523658752, + "learning_rate": 1.4407499534663352e-05, + "loss": 0.077, "step": 16525 }, { - "epoch": 0.4195963954816601, - "grad_norm": 0.5382481813430786, - "learning_rate": 1.7202690696788936e-05, - "loss": 0.1103, + "epoch": 0.83912888979136, + "grad_norm": 0.7262583374977112, + "learning_rate": 1.4405807401390934e-05, + "loss": 0.0909, "step": 16530 }, { - "epoch": 0.4197233151415154, - "grad_norm": 0.48336103558540344, - "learning_rate": 1.720184456572323e-05, - "loss": 0.1026, + "epoch": 0.8393827097822224, + "grad_norm": 0.45320960879325867, + "learning_rate": 1.440411526811852e-05, + "loss": 0.0732, "step": 16535 }, { - "epoch": 0.41985023480137074, - "grad_norm": 0.4102477431297302, - "learning_rate": 1.720099843465753e-05, - "loss": 0.0876, + "epoch": 0.8396365297730849, + "grad_norm": 0.3000989854335785, + "learning_rate": 1.4402423134846103e-05, + "loss": 0.0695, "step": 16540 }, { - "epoch": 0.419977154461226, - "grad_norm": 0.5358933210372925, - "learning_rate": 1.7200152303591828e-05, - "loss": 0.082, + "epoch": 0.8398903497639474, + "grad_norm": 0.6859510540962219, + "learning_rate": 1.4400731001573685e-05, + "loss": 0.0705, "step": 16545 }, { - "epoch": 0.42010407412108136, - "grad_norm": 0.4915980398654938, - "learning_rate": 1.7199306172526127e-05, - "loss": 0.1009, + "epoch": 0.8401441697548099, + "grad_norm": 0.4406552314758301, + "learning_rate": 1.439903886830127e-05, + "loss": 0.0774, "step": 16550 }, { - "epoch": 0.42023099378093665, - "grad_norm": 0.5052036643028259, - "learning_rate": 1.719846004146042e-05, - "loss": 0.101, + "epoch": 0.8403979897456724, + "grad_norm": 0.3408234715461731, + "learning_rate": 1.4397346735028852e-05, + "loss": 0.0699, "step": 16555 }, { - "epoch": 0.420357913440792, - "grad_norm": 0.4437134563922882, - "learning_rate": 1.719761391039472e-05, - "loss": 0.1057, + "epoch": 0.8406518097365349, + "grad_norm": 1.4002503156661987, + "learning_rate": 1.4395654601756435e-05, + "loss": 0.0739, "step": 16560 }, { - "epoch": 0.42048483310064727, - "grad_norm": 0.6295338273048401, - "learning_rate": 1.719676777932902e-05, - "loss": 0.1113, + "epoch": 0.8409056297273974, + "grad_norm": 0.5265543460845947, + "learning_rate": 1.4393962468484019e-05, + "loss": 0.0817, "step": 16565 }, { - "epoch": 0.4206117527605026, - "grad_norm": 0.6350697875022888, - "learning_rate": 1.7195921648263317e-05, - "loss": 0.1001, + "epoch": 0.8411594497182598, + "grad_norm": 0.45000794529914856, + "learning_rate": 1.4392270335211602e-05, + "loss": 0.0729, "step": 16570 }, { - "epoch": 0.4207386724203579, - "grad_norm": 1.1081359386444092, - "learning_rate": 1.7195075517197615e-05, - "loss": 0.091, + "epoch": 0.8414132697091223, + "grad_norm": 0.2590148150920868, + "learning_rate": 1.4390578201939184e-05, + "loss": 0.0773, "step": 16575 }, { - "epoch": 0.42086559208021324, - "grad_norm": 0.5117188692092896, - "learning_rate": 1.7194229386131914e-05, - "loss": 0.1072, + "epoch": 0.8416670896999847, + "grad_norm": 0.28776997327804565, + "learning_rate": 1.438888606866677e-05, + "loss": 0.0686, "step": 16580 }, { - "epoch": 0.4209925117400685, - "grad_norm": 0.6932100057601929, - "learning_rate": 1.7193383255066212e-05, - "loss": 0.1057, + "epoch": 0.8419209096908472, + "grad_norm": 0.6979045271873474, + "learning_rate": 1.4387193935394353e-05, + "loss": 0.0793, "step": 16585 }, { - "epoch": 0.42111943139992386, - "grad_norm": 3.1406619548797607, - "learning_rate": 1.719253712400051e-05, - "loss": 0.1181, + "epoch": 0.8421747296817097, + "grad_norm": 0.33594322204589844, + "learning_rate": 1.4385501802121936e-05, + "loss": 0.0708, "step": 16590 }, { - "epoch": 0.42124635105977914, - "grad_norm": 0.7875078320503235, - "learning_rate": 1.7191690992934805e-05, - "loss": 0.099, + "epoch": 0.8424285496725722, + "grad_norm": 0.4978824555873871, + "learning_rate": 1.438380966884952e-05, + "loss": 0.0698, "step": 16595 }, { - "epoch": 0.4213732707196345, - "grad_norm": 0.44278696179389954, - "learning_rate": 1.7190844861869104e-05, - "loss": 0.1178, + "epoch": 0.8426823696634347, + "grad_norm": 0.41426607966423035, + "learning_rate": 1.4382117535577102e-05, + "loss": 0.0734, "step": 16600 }, { - "epoch": 0.42150019037948977, - "grad_norm": 0.5858845114707947, - "learning_rate": 1.7189998730803402e-05, - "loss": 0.0951, + "epoch": 0.8429361896542972, + "grad_norm": 0.44715553522109985, + "learning_rate": 1.4380425402304687e-05, + "loss": 0.0693, "step": 16605 }, { - "epoch": 0.4216271100393451, - "grad_norm": 1.032819151878357, - "learning_rate": 1.71891525997377e-05, - "loss": 0.1015, + "epoch": 0.8431900096451597, + "grad_norm": 0.33712249994277954, + "learning_rate": 1.437873326903227e-05, + "loss": 0.0695, "step": 16610 }, { - "epoch": 0.4217540296992004, - "grad_norm": 0.9256723523139954, - "learning_rate": 1.7188306468672e-05, - "loss": 0.0856, + "epoch": 0.8434438296360222, + "grad_norm": 0.4558604061603546, + "learning_rate": 1.4377041135759852e-05, + "loss": 0.0802, "step": 16615 }, { - "epoch": 0.42188094935905573, - "grad_norm": 0.4745270609855652, - "learning_rate": 1.7187460337606297e-05, - "loss": 0.0863, + "epoch": 0.8436976496268846, + "grad_norm": 0.4930158853530884, + "learning_rate": 1.4375349002487438e-05, + "loss": 0.066, "step": 16620 }, { - "epoch": 0.422007869018911, - "grad_norm": 0.7188496589660645, - "learning_rate": 1.7186614206540596e-05, - "loss": 0.095, + "epoch": 0.8439514696177471, + "grad_norm": 0.42849859595298767, + "learning_rate": 1.437365686921502e-05, + "loss": 0.0672, "step": 16625 }, { - "epoch": 0.42213478867876636, - "grad_norm": 0.5646257996559143, - "learning_rate": 1.7185768075474894e-05, - "loss": 0.0909, + "epoch": 0.8442052896086095, + "grad_norm": 0.4327075183391571, + "learning_rate": 1.4371964735942605e-05, + "loss": 0.0713, "step": 16630 }, { - "epoch": 0.42226170833862164, - "grad_norm": 1.02000892162323, - "learning_rate": 1.718492194440919e-05, - "loss": 0.1142, + "epoch": 0.844459109599472, + "grad_norm": 0.4134119749069214, + "learning_rate": 1.4370272602670188e-05, + "loss": 0.0785, "step": 16635 }, { - "epoch": 0.422388627998477, - "grad_norm": 0.48815375566482544, - "learning_rate": 1.7184075813343488e-05, - "loss": 0.1069, + "epoch": 0.8447129295903345, + "grad_norm": 0.4089963436126709, + "learning_rate": 1.436858046939777e-05, + "loss": 0.0609, "step": 16640 }, { - "epoch": 0.42251554765833227, - "grad_norm": 0.6409180164337158, - "learning_rate": 1.7183229682277786e-05, - "loss": 0.1083, + "epoch": 0.844966749581197, + "grad_norm": 0.3104897737503052, + "learning_rate": 1.4366888336125355e-05, + "loss": 0.0628, "step": 16645 }, { - "epoch": 0.4226424673181876, - "grad_norm": 0.5614832043647766, - "learning_rate": 1.7182383551212085e-05, - "loss": 0.1132, + "epoch": 0.8452205695720595, + "grad_norm": 0.40896421670913696, + "learning_rate": 1.4365196202852937e-05, + "loss": 0.0745, "step": 16650 }, { - "epoch": 0.4227693869780429, - "grad_norm": 0.4181899130344391, - "learning_rate": 1.7181537420146383e-05, - "loss": 0.0948, + "epoch": 0.845474389562922, + "grad_norm": 0.5574126839637756, + "learning_rate": 1.436350406958052e-05, + "loss": 0.0812, "step": 16655 }, { - "epoch": 0.42289630663789823, - "grad_norm": 1.7241290807724, - "learning_rate": 1.718069128908068e-05, - "loss": 0.1021, + "epoch": 0.8457282095537845, + "grad_norm": 0.3526981770992279, + "learning_rate": 1.4361811936308106e-05, + "loss": 0.074, "step": 16660 }, { - "epoch": 0.4230232262977535, - "grad_norm": 0.6321777105331421, - "learning_rate": 1.717984515801498e-05, - "loss": 0.0853, + "epoch": 0.845982029544647, + "grad_norm": 0.37888669967651367, + "learning_rate": 1.4360119803035688e-05, + "loss": 0.0769, "step": 16665 }, { - "epoch": 0.42315014595760886, - "grad_norm": 0.7028844356536865, - "learning_rate": 1.7178999026949278e-05, - "loss": 0.1173, + "epoch": 0.8462358495355095, + "grad_norm": 0.41363152861595154, + "learning_rate": 1.4358427669763271e-05, + "loss": 0.0788, "step": 16670 }, { - "epoch": 0.42327706561746414, - "grad_norm": 0.6503133177757263, - "learning_rate": 1.7178152895883573e-05, - "loss": 0.1229, + "epoch": 0.846489669526372, + "grad_norm": 1.3909887075424194, + "learning_rate": 1.4356735536490855e-05, + "loss": 0.0736, "step": 16675 }, { - "epoch": 0.4234039852773195, - "grad_norm": 0.9177921414375305, - "learning_rate": 1.717730676481787e-05, - "loss": 0.1364, + "epoch": 0.8467434895172343, + "grad_norm": 0.40531304478645325, + "learning_rate": 1.4355043403218439e-05, + "loss": 0.0637, "step": 16680 }, { - "epoch": 0.42353090493717477, - "grad_norm": 0.46237418055534363, - "learning_rate": 1.717646063375217e-05, - "loss": 0.1316, + "epoch": 0.8469973095080968, + "grad_norm": 0.291532963514328, + "learning_rate": 1.4353351269946024e-05, + "loss": 0.0682, "step": 16685 }, { - "epoch": 0.42365782459703005, - "grad_norm": 0.5082370042800903, - "learning_rate": 1.717561450268647e-05, - "loss": 0.1193, + "epoch": 0.8472511294989593, + "grad_norm": 0.3950155973434448, + "learning_rate": 1.4351659136673606e-05, + "loss": 0.0655, "step": 16690 }, { - "epoch": 0.4237847442568854, - "grad_norm": 0.6334792971611023, - "learning_rate": 1.7174768371620763e-05, - "loss": 0.0926, + "epoch": 0.8475049494898218, + "grad_norm": 0.44813594222068787, + "learning_rate": 1.434996700340119e-05, + "loss": 0.0853, "step": 16695 }, { - "epoch": 0.4239116639167407, - "grad_norm": 0.5970088839530945, - "learning_rate": 1.7173922240555062e-05, - "loss": 0.1239, + "epoch": 0.8477587694806843, + "grad_norm": 0.38060590624809265, + "learning_rate": 1.4348274870128773e-05, + "loss": 0.0691, "step": 16700 }, { - "epoch": 0.424038583576596, - "grad_norm": 0.7590364813804626, - "learning_rate": 1.717307610948936e-05, - "loss": 0.0949, + "epoch": 0.8480125894715468, + "grad_norm": 0.546687126159668, + "learning_rate": 1.4346582736856356e-05, + "loss": 0.0684, "step": 16705 }, { - "epoch": 0.4241655032364513, - "grad_norm": 0.4581683278083801, - "learning_rate": 1.717222997842366e-05, - "loss": 0.1191, + "epoch": 0.8482664094624093, + "grad_norm": 0.22715900838375092, + "learning_rate": 1.4344890603583938e-05, + "loss": 0.059, "step": 16710 }, { - "epoch": 0.42429242289630664, - "grad_norm": 0.5975123047828674, - "learning_rate": 1.7171383847357957e-05, - "loss": 0.1283, + "epoch": 0.8485202294532718, + "grad_norm": 0.290096253156662, + "learning_rate": 1.4343198470311523e-05, + "loss": 0.0594, "step": 16715 }, { - "epoch": 0.4244193425561619, - "grad_norm": 0.7377816438674927, - "learning_rate": 1.7170537716292255e-05, - "loss": 0.0899, + "epoch": 0.8487740494441343, + "grad_norm": 0.384205162525177, + "learning_rate": 1.4341506337039107e-05, + "loss": 0.0711, "step": 16720 }, { - "epoch": 0.42454626221601727, - "grad_norm": 0.6679713129997253, - "learning_rate": 1.7169691585226554e-05, - "loss": 0.1117, + "epoch": 0.8490278694349968, + "grad_norm": 0.2885967195034027, + "learning_rate": 1.4339814203766689e-05, + "loss": 0.0674, "step": 16725 }, { - "epoch": 0.42467318187587255, - "grad_norm": 0.5669720768928528, - "learning_rate": 1.7168845454160852e-05, - "loss": 0.1006, + "epoch": 0.8492816894258591, + "grad_norm": 0.5165429711341858, + "learning_rate": 1.4338122070494274e-05, + "loss": 0.0674, "step": 16730 }, { - "epoch": 0.4248001015357279, - "grad_norm": 0.6479017734527588, - "learning_rate": 1.7167999323095147e-05, - "loss": 0.1259, + "epoch": 0.8495355094167216, + "grad_norm": 0.36283811926841736, + "learning_rate": 1.4336429937221856e-05, + "loss": 0.0731, "step": 16735 }, { - "epoch": 0.4249270211955832, - "grad_norm": 0.5846966505050659, - "learning_rate": 1.7167153192029446e-05, - "loss": 0.0841, + "epoch": 0.8497893294075841, + "grad_norm": 0.4545622766017914, + "learning_rate": 1.4334737803949441e-05, + "loss": 0.0768, "step": 16740 }, { - "epoch": 0.4250539408554385, - "grad_norm": 0.45702266693115234, - "learning_rate": 1.7166307060963744e-05, - "loss": 0.1227, + "epoch": 0.8500431493984466, + "grad_norm": 0.30138736963272095, + "learning_rate": 1.4333045670677023e-05, + "loss": 0.0595, "step": 16745 }, { - "epoch": 0.4251808605152938, - "grad_norm": 0.688960611820221, - "learning_rate": 1.7165460929898042e-05, - "loss": 0.0962, + "epoch": 0.8502969693893091, + "grad_norm": 0.38298335671424866, + "learning_rate": 1.4331353537404606e-05, + "loss": 0.0751, "step": 16750 }, { - "epoch": 0.42530778017514914, - "grad_norm": 1.1833771467208862, - "learning_rate": 1.716461479883234e-05, - "loss": 0.1017, + "epoch": 0.8505507893801716, + "grad_norm": 0.6622296571731567, + "learning_rate": 1.4329661404132192e-05, + "loss": 0.0842, "step": 16755 }, { - "epoch": 0.4254346998350044, - "grad_norm": 0.5811829566955566, - "learning_rate": 1.716376866776664e-05, - "loss": 0.1136, + "epoch": 0.8508046093710341, + "grad_norm": 0.5098762512207031, + "learning_rate": 1.4327969270859774e-05, + "loss": 0.0754, "step": 16760 }, { - "epoch": 0.42556161949485977, - "grad_norm": 0.6459028720855713, - "learning_rate": 1.7162922536700938e-05, - "loss": 0.1262, + "epoch": 0.8510584293618966, + "grad_norm": 0.4182951748371124, + "learning_rate": 1.4326277137587357e-05, + "loss": 0.055, "step": 16765 }, { - "epoch": 0.42568853915471505, - "grad_norm": 0.6365668773651123, - "learning_rate": 1.7162076405635236e-05, - "loss": 0.0939, + "epoch": 0.8513122493527591, + "grad_norm": 0.34056827425956726, + "learning_rate": 1.432458500431494e-05, + "loss": 0.0762, "step": 16770 }, { - "epoch": 0.4258154588145704, - "grad_norm": 0.49362170696258545, - "learning_rate": 1.716123027456953e-05, - "loss": 0.1061, + "epoch": 0.8515660693436216, + "grad_norm": 0.4602746367454529, + "learning_rate": 1.4322892871042524e-05, + "loss": 0.0731, "step": 16775 }, { - "epoch": 0.4259423784744257, - "grad_norm": 1.200665831565857, - "learning_rate": 1.716038414350383e-05, - "loss": 0.0881, + "epoch": 0.8518198893344839, + "grad_norm": 0.46367478370666504, + "learning_rate": 1.432120073777011e-05, + "loss": 0.0662, "step": 16780 }, { - "epoch": 0.426069298134281, - "grad_norm": 0.6435304880142212, - "learning_rate": 1.7159538012438128e-05, - "loss": 0.1148, + "epoch": 0.8520737093253464, + "grad_norm": 0.3692100942134857, + "learning_rate": 1.4319508604497691e-05, + "loss": 0.0732, "step": 16785 }, { - "epoch": 0.4261962177941363, - "grad_norm": 0.9775551557540894, - "learning_rate": 1.7158691881372426e-05, - "loss": 0.1225, + "epoch": 0.8523275293162089, + "grad_norm": 0.32194337248802185, + "learning_rate": 1.4317816471225275e-05, + "loss": 0.0694, "step": 16790 }, { - "epoch": 0.42632313745399164, - "grad_norm": 0.5612630248069763, - "learning_rate": 1.7157845750306725e-05, - "loss": 0.1221, + "epoch": 0.8525813493070714, + "grad_norm": 0.4015413820743561, + "learning_rate": 1.4316124337952858e-05, + "loss": 0.066, "step": 16795 }, { - "epoch": 0.4264500571138469, - "grad_norm": 0.9725382328033447, - "learning_rate": 1.7156999619241023e-05, - "loss": 0.1394, + "epoch": 0.8528351692979339, + "grad_norm": 0.4497564435005188, + "learning_rate": 1.4314432204680442e-05, + "loss": 0.0801, "step": 16800 }, { - "epoch": 0.42657697677370227, - "grad_norm": 0.6142752170562744, - "learning_rate": 1.715615348817532e-05, - "loss": 0.1081, + "epoch": 0.8530889892887964, + "grad_norm": 0.5693073272705078, + "learning_rate": 1.4312740071408024e-05, + "loss": 0.0772, "step": 16805 }, { - "epoch": 0.42670389643355755, - "grad_norm": 0.4788876473903656, - "learning_rate": 1.715530735710962e-05, - "loss": 0.0956, + "epoch": 0.8533428092796589, + "grad_norm": 0.7299516201019287, + "learning_rate": 1.4311047938135609e-05, + "loss": 0.0772, "step": 16810 }, { - "epoch": 0.4268308160934129, - "grad_norm": 0.4710952937602997, - "learning_rate": 1.7154461226043915e-05, - "loss": 0.0859, + "epoch": 0.8535966292705214, + "grad_norm": 0.370604544878006, + "learning_rate": 1.4309355804863193e-05, + "loss": 0.0696, "step": 16815 }, { - "epoch": 0.4269577357532682, - "grad_norm": 0.4548252820968628, - "learning_rate": 1.7153615094978213e-05, - "loss": 0.0993, + "epoch": 0.8538504492613839, + "grad_norm": 0.33534955978393555, + "learning_rate": 1.4307663671590774e-05, + "loss": 0.0736, "step": 16820 }, { - "epoch": 0.4270846554131235, - "grad_norm": 1.1431694030761719, - "learning_rate": 1.715276896391251e-05, - "loss": 0.1102, + "epoch": 0.8541042692522464, + "grad_norm": 0.5034054517745972, + "learning_rate": 1.430597153831836e-05, + "loss": 0.0769, "step": 16825 }, { - "epoch": 0.4272115750729788, - "grad_norm": 0.7848405241966248, - "learning_rate": 1.715192283284681e-05, - "loss": 0.0886, + "epoch": 0.8543580892431087, + "grad_norm": 0.2479061633348465, + "learning_rate": 1.4304279405045942e-05, + "loss": 0.0654, "step": 16830 }, { - "epoch": 0.42733849473283414, - "grad_norm": 0.6190905570983887, - "learning_rate": 1.7151076701781105e-05, - "loss": 0.1056, + "epoch": 0.8546119092339712, + "grad_norm": 0.5529633164405823, + "learning_rate": 1.4302587271773527e-05, + "loss": 0.069, "step": 16835 }, { - "epoch": 0.4274654143926894, - "grad_norm": 0.7819855213165283, - "learning_rate": 1.7150230570715403e-05, - "loss": 0.0996, + "epoch": 0.8548657292248337, + "grad_norm": 0.4025406837463379, + "learning_rate": 1.430089513850111e-05, + "loss": 0.0684, "step": 16840 }, { - "epoch": 0.42759233405254476, - "grad_norm": 0.8912913799285889, - "learning_rate": 1.7149384439649702e-05, - "loss": 0.0819, + "epoch": 0.8551195492156962, + "grad_norm": 0.3485677242279053, + "learning_rate": 1.4299203005228692e-05, + "loss": 0.0658, "step": 16845 }, { - "epoch": 0.42771925371240005, - "grad_norm": 0.512338399887085, - "learning_rate": 1.7148538308584e-05, - "loss": 0.0971, + "epoch": 0.8553733692065587, + "grad_norm": 0.46029365062713623, + "learning_rate": 1.4297510871956277e-05, + "loss": 0.0692, "step": 16850 }, { - "epoch": 0.4278461733722554, - "grad_norm": 0.8463872075080872, - "learning_rate": 1.71476921775183e-05, - "loss": 0.1199, + "epoch": 0.8556271891974212, + "grad_norm": 0.47386330366134644, + "learning_rate": 1.429581873868386e-05, + "loss": 0.0738, "step": 16855 }, { - "epoch": 0.4279730930321107, - "grad_norm": 0.4684475064277649, - "learning_rate": 1.7146846046452597e-05, - "loss": 0.099, + "epoch": 0.8558810091882837, + "grad_norm": 0.3624202311038971, + "learning_rate": 1.4294126605411443e-05, + "loss": 0.0723, "step": 16860 }, { - "epoch": 0.42810001269196596, - "grad_norm": 0.5950973629951477, - "learning_rate": 1.7145999915386895e-05, - "loss": 0.1085, + "epoch": 0.8561348291791462, + "grad_norm": 1.0836365222930908, + "learning_rate": 1.4292434472139028e-05, + "loss": 0.0803, "step": 16865 }, { - "epoch": 0.4282269323518213, - "grad_norm": 0.5752646327018738, - "learning_rate": 1.7145153784321194e-05, - "loss": 0.1291, + "epoch": 0.8563886491700087, + "grad_norm": 0.45594584941864014, + "learning_rate": 1.429074233886661e-05, + "loss": 0.0678, "step": 16870 }, { - "epoch": 0.4283538520116766, - "grad_norm": 0.6379378437995911, - "learning_rate": 1.714430765325549e-05, - "loss": 0.1104, + "epoch": 0.8566424691608711, + "grad_norm": 0.34718087315559387, + "learning_rate": 1.4289050205594195e-05, + "loss": 0.0613, "step": 16875 }, { - "epoch": 0.4284807716715319, - "grad_norm": 0.8746981620788574, - "learning_rate": 1.7143461522189787e-05, - "loss": 0.1131, + "epoch": 0.8568962891517335, + "grad_norm": 0.30333346128463745, + "learning_rate": 1.4287358072321777e-05, + "loss": 0.0635, "step": 16880 }, { - "epoch": 0.4286076913313872, - "grad_norm": 0.503105103969574, - "learning_rate": 1.7142615391124086e-05, - "loss": 0.1006, + "epoch": 0.857150109142596, + "grad_norm": 0.44795432686805725, + "learning_rate": 1.428566593904936e-05, + "loss": 0.0678, "step": 16885 }, { - "epoch": 0.42873461099124255, - "grad_norm": 0.49455127120018005, - "learning_rate": 1.7141769260058384e-05, - "loss": 0.0985, + "epoch": 0.8574039291334585, + "grad_norm": 0.33551299571990967, + "learning_rate": 1.4283973805776946e-05, + "loss": 0.0641, "step": 16890 }, { - "epoch": 0.42886153065109783, - "grad_norm": 0.7200365662574768, - "learning_rate": 1.7140923128992682e-05, - "loss": 0.097, + "epoch": 0.857657749124321, + "grad_norm": 0.42668506503105164, + "learning_rate": 1.4282281672504528e-05, + "loss": 0.0738, "step": 16895 }, { - "epoch": 0.4289884503109532, - "grad_norm": 0.44699540734291077, - "learning_rate": 1.714007699792698e-05, - "loss": 0.0935, + "epoch": 0.8579115691151835, + "grad_norm": 0.5033948421478271, + "learning_rate": 1.4280589539232111e-05, + "loss": 0.0791, "step": 16900 }, { - "epoch": 0.42911536997080846, - "grad_norm": 0.5047271251678467, - "learning_rate": 1.713923086686128e-05, - "loss": 0.0815, + "epoch": 0.858165389106046, + "grad_norm": 0.5407020449638367, + "learning_rate": 1.4278897405959695e-05, + "loss": 0.0817, "step": 16905 }, { - "epoch": 0.4292422896306638, - "grad_norm": 0.6258827447891235, - "learning_rate": 1.7138384735795578e-05, - "loss": 0.0979, + "epoch": 0.8584192090969085, + "grad_norm": 1.9471144676208496, + "learning_rate": 1.4277205272687278e-05, + "loss": 0.0785, "step": 16910 }, { - "epoch": 0.4293692092905191, - "grad_norm": 0.5265569686889648, - "learning_rate": 1.7137538604729873e-05, - "loss": 0.1012, + "epoch": 0.858673029087771, + "grad_norm": 0.38863787055015564, + "learning_rate": 1.427551313941486e-05, + "loss": 0.067, "step": 16915 }, { - "epoch": 0.4294961289503744, - "grad_norm": 0.5979445576667786, - "learning_rate": 1.713669247366417e-05, - "loss": 0.0911, + "epoch": 0.8589268490786335, + "grad_norm": 0.5478694438934326, + "learning_rate": 1.4273821006142445e-05, + "loss": 0.0718, "step": 16920 }, { - "epoch": 0.4296230486102297, - "grad_norm": 0.4975804090499878, - "learning_rate": 1.713584634259847e-05, - "loss": 0.0874, + "epoch": 0.8591806690694959, + "grad_norm": 0.5812584757804871, + "learning_rate": 1.4272128872870029e-05, + "loss": 0.0845, "step": 16925 }, { - "epoch": 0.42974996827008505, - "grad_norm": 1.0541746616363525, - "learning_rate": 1.7135000211532768e-05, - "loss": 0.1153, + "epoch": 0.8594344890603584, + "grad_norm": 0.4321596324443817, + "learning_rate": 1.4270436739597612e-05, + "loss": 0.0734, "step": 16930 }, { - "epoch": 0.42987688792994033, - "grad_norm": 0.9782557487487793, - "learning_rate": 1.7134154080467066e-05, - "loss": 0.1064, + "epoch": 0.8596883090512208, + "grad_norm": 0.512316107749939, + "learning_rate": 1.4268744606325196e-05, + "loss": 0.065, "step": 16935 }, { - "epoch": 0.4300038075897957, - "grad_norm": 0.7924244999885559, - "learning_rate": 1.7133307949401365e-05, - "loss": 0.1206, + "epoch": 0.8599421290420833, + "grad_norm": 0.4328327775001526, + "learning_rate": 1.4267052473052778e-05, + "loss": 0.0685, "step": 16940 }, { - "epoch": 0.43013072724965096, - "grad_norm": 0.41686344146728516, - "learning_rate": 1.7132461818335663e-05, - "loss": 0.0993, + "epoch": 0.8601959490329458, + "grad_norm": 0.5791327357292175, + "learning_rate": 1.4265360339780363e-05, + "loss": 0.0775, "step": 16945 }, { - "epoch": 0.4302576469095063, - "grad_norm": 1.2059576511383057, - "learning_rate": 1.713161568726996e-05, - "loss": 0.1001, + "epoch": 0.8604497690238083, + "grad_norm": 0.387787401676178, + "learning_rate": 1.4263668206507945e-05, + "loss": 0.069, "step": 16950 }, { - "epoch": 0.4303845665693616, - "grad_norm": 0.5404472947120667, - "learning_rate": 1.7130769556204257e-05, - "loss": 0.0872, + "epoch": 0.8607035890146708, + "grad_norm": 0.38102301955223083, + "learning_rate": 1.4261976073235528e-05, + "loss": 0.067, "step": 16955 }, { - "epoch": 0.4305114862292169, - "grad_norm": 0.44960370659828186, - "learning_rate": 1.7129923425138555e-05, - "loss": 0.1004, + "epoch": 0.8609574090055333, + "grad_norm": 0.3059747815132141, + "learning_rate": 1.4260283939963114e-05, + "loss": 0.0582, "step": 16960 }, { - "epoch": 0.4306384058890722, - "grad_norm": 0.4417228102684021, - "learning_rate": 1.7129077294072853e-05, - "loss": 0.0872, + "epoch": 0.8612112289963958, + "grad_norm": 0.38691335916519165, + "learning_rate": 1.4258591806690696e-05, + "loss": 0.0761, "step": 16965 }, { - "epoch": 0.43076532554892755, - "grad_norm": 0.5987323522567749, - "learning_rate": 1.7128231163007152e-05, - "loss": 0.1028, + "epoch": 0.8614650489872583, + "grad_norm": 0.3976000249385834, + "learning_rate": 1.4256899673418279e-05, + "loss": 0.0695, "step": 16970 }, { - "epoch": 0.43089224520878283, - "grad_norm": 0.4943171441555023, - "learning_rate": 1.7127385031941447e-05, - "loss": 0.1175, + "epoch": 0.8617188689781207, + "grad_norm": 0.4070408344268799, + "learning_rate": 1.4255207540145863e-05, + "loss": 0.0702, "step": 16975 }, { - "epoch": 0.43101916486863817, - "grad_norm": 0.5931038856506348, - "learning_rate": 1.7126538900875745e-05, - "loss": 0.0987, + "epoch": 0.8619726889689832, + "grad_norm": 0.6455148458480835, + "learning_rate": 1.4253515406873446e-05, + "loss": 0.0797, "step": 16980 }, { - "epoch": 0.43114608452849346, - "grad_norm": 0.5258564352989197, - "learning_rate": 1.7125692769810044e-05, - "loss": 0.103, + "epoch": 0.8622265089598457, + "grad_norm": 0.36329150199890137, + "learning_rate": 1.4251823273601031e-05, + "loss": 0.0669, "step": 16985 }, { - "epoch": 0.4312730041883488, - "grad_norm": 0.7468342781066895, - "learning_rate": 1.7124846638744342e-05, - "loss": 0.105, + "epoch": 0.8624803289507081, + "grad_norm": 0.36581847071647644, + "learning_rate": 1.4250131140328613e-05, + "loss": 0.0639, "step": 16990 }, { - "epoch": 0.4313999238482041, - "grad_norm": 0.9156855344772339, - "learning_rate": 1.712400050767864e-05, - "loss": 0.1053, + "epoch": 0.8627341489415706, + "grad_norm": 0.2543756067752838, + "learning_rate": 1.4248439007056197e-05, + "loss": 0.064, "step": 16995 }, { - "epoch": 0.4315268435080594, - "grad_norm": 0.6839688420295715, - "learning_rate": 1.712315437661294e-05, - "loss": 0.1071, + "epoch": 0.8629879689324331, + "grad_norm": 0.7665139436721802, + "learning_rate": 1.424674687378378e-05, + "loss": 0.072, "step": 17000 }, { - "epoch": 0.4316537631679147, - "grad_norm": 0.7245674133300781, - "learning_rate": 1.7122308245547237e-05, - "loss": 0.0982, + "epoch": 0.8632417889232956, + "grad_norm": 0.2957867383956909, + "learning_rate": 1.4245054740511364e-05, + "loss": 0.0718, "step": 17005 }, { - "epoch": 0.43178068282777005, - "grad_norm": 0.7460171580314636, - "learning_rate": 1.7121462114481536e-05, - "loss": 0.0958, + "epoch": 0.8634956089141581, + "grad_norm": 0.5713722705841064, + "learning_rate": 1.4243362607238946e-05, + "loss": 0.0748, "step": 17010 }, { - "epoch": 0.43190760248762533, - "grad_norm": 0.9004694223403931, - "learning_rate": 1.712061598341583e-05, - "loss": 0.1227, + "epoch": 0.8637494289050206, + "grad_norm": 0.43129876255989075, + "learning_rate": 1.4241670473966531e-05, + "loss": 0.085, "step": 17015 }, { - "epoch": 0.43203452214748067, - "grad_norm": 0.8155450820922852, - "learning_rate": 1.711976985235013e-05, - "loss": 0.1218, + "epoch": 0.8640032488958831, + "grad_norm": 0.6972838640213013, + "learning_rate": 1.4239978340694115e-05, + "loss": 0.0695, "step": 17020 }, { - "epoch": 0.43216144180733596, - "grad_norm": 0.8082932233810425, - "learning_rate": 1.7118923721284427e-05, - "loss": 0.0995, + "epoch": 0.8642570688867455, + "grad_norm": 0.8373203873634338, + "learning_rate": 1.4238286207421698e-05, + "loss": 0.0758, "step": 17025 }, { - "epoch": 0.4322883614671913, - "grad_norm": 0.708988606929779, - "learning_rate": 1.7118077590218726e-05, - "loss": 0.0931, + "epoch": 0.864510888877608, + "grad_norm": 0.2909460961818695, + "learning_rate": 1.4236594074149282e-05, + "loss": 0.0682, "step": 17030 }, { - "epoch": 0.4324152811270466, - "grad_norm": 0.5357007384300232, - "learning_rate": 1.7117231459153024e-05, - "loss": 0.1116, + "epoch": 0.8647647088684705, + "grad_norm": 0.30259981751441956, + "learning_rate": 1.4234901940876863e-05, + "loss": 0.0677, "step": 17035 }, { - "epoch": 0.43254220078690186, - "grad_norm": 0.5000183582305908, - "learning_rate": 1.7116385328087323e-05, - "loss": 0.0819, + "epoch": 0.865018528859333, + "grad_norm": 0.36737337708473206, + "learning_rate": 1.4233209807604449e-05, + "loss": 0.0763, "step": 17040 }, { - "epoch": 0.4326691204467572, - "grad_norm": 0.7967339754104614, - "learning_rate": 1.711553919702162e-05, - "loss": 0.112, + "epoch": 0.8652723488501954, + "grad_norm": 0.32847535610198975, + "learning_rate": 1.4231517674332032e-05, + "loss": 0.0706, "step": 17045 }, { - "epoch": 0.4327960401066125, - "grad_norm": 0.7078977823257446, - "learning_rate": 1.711469306595592e-05, - "loss": 0.1044, + "epoch": 0.8655261688410579, + "grad_norm": 0.4564855992794037, + "learning_rate": 1.4229825541059614e-05, + "loss": 0.0808, "step": 17050 }, { - "epoch": 0.43292295976646783, - "grad_norm": 0.7258083820343018, - "learning_rate": 1.7113846934890218e-05, - "loss": 0.1212, + "epoch": 0.8657799888319204, + "grad_norm": 0.4900711178779602, + "learning_rate": 1.42281334077872e-05, + "loss": 0.0595, "step": 17055 }, { - "epoch": 0.4330498794263231, - "grad_norm": 0.7934380173683167, - "learning_rate": 1.7113000803824513e-05, - "loss": 0.0969, + "epoch": 0.8660338088227829, + "grad_norm": 0.6517011523246765, + "learning_rate": 1.4226441274514781e-05, + "loss": 0.0665, "step": 17060 }, { - "epoch": 0.43317679908617845, - "grad_norm": 0.7781998515129089, - "learning_rate": 1.711215467275881e-05, - "loss": 0.1056, + "epoch": 0.8662876288136454, + "grad_norm": 0.3914799094200134, + "learning_rate": 1.4224749141242365e-05, + "loss": 0.0696, "step": 17065 }, { - "epoch": 0.43330371874603374, - "grad_norm": 0.7970994114875793, - "learning_rate": 1.711130854169311e-05, - "loss": 0.1087, + "epoch": 0.8665414488045079, + "grad_norm": 0.42431968450546265, + "learning_rate": 1.422305700796995e-05, + "loss": 0.0654, "step": 17070 }, { - "epoch": 0.4334306384058891, - "grad_norm": 0.9312442541122437, - "learning_rate": 1.7110462410627408e-05, - "loss": 0.1019, + "epoch": 0.8667952687953703, + "grad_norm": 0.34742167592048645, + "learning_rate": 1.4221364874697532e-05, + "loss": 0.0769, "step": 17075 }, { - "epoch": 0.43355755806574436, - "grad_norm": 0.5298026204109192, - "learning_rate": 1.7109616279561706e-05, - "loss": 0.0807, + "epoch": 0.8670490887862328, + "grad_norm": 0.8822436928749084, + "learning_rate": 1.4219672741425117e-05, + "loss": 0.0787, "step": 17080 }, { - "epoch": 0.4336844777255997, - "grad_norm": 0.5395737290382385, - "learning_rate": 1.7108770148496005e-05, - "loss": 0.0775, + "epoch": 0.8673029087770953, + "grad_norm": 0.6568184494972229, + "learning_rate": 1.4217980608152699e-05, + "loss": 0.0593, "step": 17085 }, { - "epoch": 0.433811397385455, - "grad_norm": 0.6156777739524841, - "learning_rate": 1.7107924017430303e-05, - "loss": 0.1029, + "epoch": 0.8675567287679578, + "grad_norm": 0.53938227891922, + "learning_rate": 1.4216288474880282e-05, + "loss": 0.083, "step": 17090 }, { - "epoch": 0.43393831704531033, - "grad_norm": 0.5954700708389282, - "learning_rate": 1.71070778863646e-05, - "loss": 0.1052, + "epoch": 0.8678105487588202, + "grad_norm": 0.4471435248851776, + "learning_rate": 1.4214596341607868e-05, + "loss": 0.0744, "step": 17095 }, { - "epoch": 0.4340652367051656, - "grad_norm": 1.077143669128418, - "learning_rate": 1.7106231755298897e-05, - "loss": 0.1041, + "epoch": 0.8680643687496827, + "grad_norm": 0.3798215389251709, + "learning_rate": 1.421290420833545e-05, + "loss": 0.0754, "step": 17100 }, { - "epoch": 0.43419215636502095, - "grad_norm": 0.6895633935928345, - "learning_rate": 1.7105385624233195e-05, - "loss": 0.1041, + "epoch": 0.8683181887405452, + "grad_norm": 0.5007672309875488, + "learning_rate": 1.4211212075063033e-05, + "loss": 0.071, "step": 17105 }, { - "epoch": 0.43431907602487624, - "grad_norm": 0.604110598564148, - "learning_rate": 1.7104539493167493e-05, - "loss": 0.1181, + "epoch": 0.8685720087314077, + "grad_norm": 0.4671100676059723, + "learning_rate": 1.4209519941790617e-05, + "loss": 0.0858, "step": 17110 }, { - "epoch": 0.4344459956847316, - "grad_norm": 0.5625286102294922, - "learning_rate": 1.7103693362101792e-05, - "loss": 0.1003, + "epoch": 0.8688258287222702, + "grad_norm": 0.4374212920665741, + "learning_rate": 1.42078278085182e-05, + "loss": 0.0684, "step": 17115 }, { - "epoch": 0.43457291534458686, - "grad_norm": 0.620840847492218, - "learning_rate": 1.7102847231036087e-05, - "loss": 0.0897, + "epoch": 0.8690796487131326, + "grad_norm": 0.46207401156425476, + "learning_rate": 1.4206135675245785e-05, + "loss": 0.0733, "step": 17120 }, { - "epoch": 0.4346998350044422, - "grad_norm": 0.5177582502365112, - "learning_rate": 1.7102001099970385e-05, - "loss": 0.0937, + "epoch": 0.8693334687039951, + "grad_norm": 0.4559246301651001, + "learning_rate": 1.4204443541973367e-05, + "loss": 0.0798, "step": 17125 }, { - "epoch": 0.4348267546642975, - "grad_norm": 0.4963666796684265, - "learning_rate": 1.7101154968904684e-05, - "loss": 0.0985, + "epoch": 0.8695872886948576, + "grad_norm": 0.47378304600715637, + "learning_rate": 1.4202751408700949e-05, + "loss": 0.0763, "step": 17130 }, { - "epoch": 0.43495367432415283, - "grad_norm": 0.4380079507827759, - "learning_rate": 1.7100308837838982e-05, - "loss": 0.0906, + "epoch": 0.8698411086857201, + "grad_norm": 0.36055704951286316, + "learning_rate": 1.4201059275428534e-05, + "loss": 0.0663, "step": 17135 }, { - "epoch": 0.4350805939840081, - "grad_norm": 0.47568178176879883, - "learning_rate": 1.709946270677328e-05, - "loss": 0.0924, + "epoch": 0.8700949286765826, + "grad_norm": 0.4687144160270691, + "learning_rate": 1.4199367142156118e-05, + "loss": 0.072, "step": 17140 }, { - "epoch": 0.43520751364386345, - "grad_norm": 0.4715767204761505, - "learning_rate": 1.709861657570758e-05, - "loss": 0.1129, + "epoch": 0.870348748667445, + "grad_norm": 0.565639078617096, + "learning_rate": 1.41976750088837e-05, + "loss": 0.0769, "step": 17145 }, { - "epoch": 0.43533443330371874, - "grad_norm": 0.7426924705505371, - "learning_rate": 1.7097770444641877e-05, - "loss": 0.1029, + "epoch": 0.8706025686583075, + "grad_norm": 0.3410448431968689, + "learning_rate": 1.4195982875611285e-05, + "loss": 0.0585, "step": 17150 }, { - "epoch": 0.4354613529635741, - "grad_norm": 0.7131223082542419, - "learning_rate": 1.7096924313576176e-05, - "loss": 0.0912, + "epoch": 0.87085638864917, + "grad_norm": 0.42370325326919556, + "learning_rate": 1.4194290742338867e-05, + "loss": 0.0721, "step": 17155 }, { - "epoch": 0.43558827262342936, - "grad_norm": 0.7556177973747253, - "learning_rate": 1.709607818251047e-05, - "loss": 0.0826, + "epoch": 0.8711102086400325, + "grad_norm": 0.6147048473358154, + "learning_rate": 1.419259860906645e-05, + "loss": 0.0712, "step": 17160 }, { - "epoch": 0.4357151922832847, - "grad_norm": 0.4428437650203705, - "learning_rate": 1.709523205144477e-05, - "loss": 0.1011, + "epoch": 0.871364028630895, + "grad_norm": 0.3107832074165344, + "learning_rate": 1.4190906475794036e-05, + "loss": 0.0717, "step": 17165 }, { - "epoch": 0.43584211194314, - "grad_norm": 0.6786155104637146, - "learning_rate": 1.7094385920379068e-05, - "loss": 0.0879, + "epoch": 0.8716178486217574, + "grad_norm": 0.4038301706314087, + "learning_rate": 1.4189214342521617e-05, + "loss": 0.0792, "step": 17170 }, { - "epoch": 0.4359690316029953, - "grad_norm": 0.7842277884483337, - "learning_rate": 1.7093539789313366e-05, - "loss": 0.0955, + "epoch": 0.8718716686126199, + "grad_norm": 0.3050367832183838, + "learning_rate": 1.4187522209249203e-05, + "loss": 0.0693, "step": 17175 }, { - "epoch": 0.4360959512628506, - "grad_norm": 0.4630521535873413, - "learning_rate": 1.7092693658247664e-05, - "loss": 0.0938, + "epoch": 0.8721254886034824, + "grad_norm": 0.3587893843650818, + "learning_rate": 1.4185830075976785e-05, + "loss": 0.0657, "step": 17180 }, { - "epoch": 0.43622287092270595, - "grad_norm": 0.6444492936134338, - "learning_rate": 1.7091847527181963e-05, - "loss": 0.1081, + "epoch": 0.8723793085943449, + "grad_norm": 0.3330988883972168, + "learning_rate": 1.4184137942704368e-05, + "loss": 0.0652, "step": 17185 }, { - "epoch": 0.43634979058256124, - "grad_norm": 0.630743682384491, - "learning_rate": 1.709100139611626e-05, - "loss": 0.1048, + "epoch": 0.8726331285852074, + "grad_norm": 0.3522055745124817, + "learning_rate": 1.4182445809431953e-05, + "loss": 0.0708, "step": 17190 }, { - "epoch": 0.4364767102424166, - "grad_norm": 0.48989617824554443, - "learning_rate": 1.709015526505056e-05, - "loss": 0.0867, + "epoch": 0.8728869485760699, + "grad_norm": 0.35526609420776367, + "learning_rate": 1.4180753676159535e-05, + "loss": 0.0776, "step": 17195 }, { - "epoch": 0.43660362990227186, - "grad_norm": 0.43044647574424744, - "learning_rate": 1.7089309133984855e-05, - "loss": 0.0871, + "epoch": 0.8731407685669323, + "grad_norm": 0.41325056552886963, + "learning_rate": 1.4179061542887119e-05, + "loss": 0.0582, "step": 17200 }, { - "epoch": 0.43673054956212715, - "grad_norm": 0.3993884027004242, - "learning_rate": 1.7088463002919153e-05, - "loss": 0.0951, + "epoch": 0.8733945885577948, + "grad_norm": 0.3183586001396179, + "learning_rate": 1.4177369409614702e-05, + "loss": 0.0649, "step": 17205 }, { - "epoch": 0.4368574692219825, - "grad_norm": 0.5579514503479004, - "learning_rate": 1.708761687185345e-05, - "loss": 0.0886, + "epoch": 0.8736484085486573, + "grad_norm": 0.3800111711025238, + "learning_rate": 1.4175677276342286e-05, + "loss": 0.0656, "step": 17210 }, { - "epoch": 0.43698438888183777, - "grad_norm": 0.5787482857704163, - "learning_rate": 1.708677074078775e-05, - "loss": 0.0914, + "epoch": 0.8739022285395198, + "grad_norm": 0.31832781434059143, + "learning_rate": 1.4173985143069868e-05, + "loss": 0.075, "step": 17215 }, { - "epoch": 0.4371113085416931, - "grad_norm": 0.49303245544433594, - "learning_rate": 1.7085924609722048e-05, - "loss": 0.1021, + "epoch": 0.8741560485303822, + "grad_norm": 0.3157210350036621, + "learning_rate": 1.4172293009797453e-05, + "loss": 0.075, "step": 17220 }, { - "epoch": 0.4372382282015484, - "grad_norm": 0.8807342052459717, - "learning_rate": 1.7085078478656347e-05, - "loss": 0.1086, + "epoch": 0.8744098685212447, + "grad_norm": 0.36594507098197937, + "learning_rate": 1.4170600876525036e-05, + "loss": 0.0729, "step": 17225 }, { - "epoch": 0.43736514786140374, - "grad_norm": 0.7194502353668213, - "learning_rate": 1.7084232347590645e-05, - "loss": 0.1013, + "epoch": 0.8746636885121072, + "grad_norm": 0.4369713366031647, + "learning_rate": 1.416890874325262e-05, + "loss": 0.0576, "step": 17230 }, { - "epoch": 0.437492067521259, - "grad_norm": 1.2901110649108887, - "learning_rate": 1.7083386216524943e-05, - "loss": 0.0991, + "epoch": 0.8749175085029697, + "grad_norm": 0.3689056932926178, + "learning_rate": 1.4167216609980204e-05, + "loss": 0.072, "step": 17235 }, { - "epoch": 0.43761898718111436, - "grad_norm": 0.7974382042884827, - "learning_rate": 1.708254008545924e-05, - "loss": 0.1067, + "epoch": 0.8751713284938322, + "grad_norm": 0.2857867181301117, + "learning_rate": 1.4165524476707785e-05, + "loss": 0.0661, "step": 17240 }, { - "epoch": 0.43774590684096965, - "grad_norm": 0.7388405203819275, - "learning_rate": 1.7081693954393537e-05, - "loss": 0.1224, + "epoch": 0.8754251484846947, + "grad_norm": 0.4573242962360382, + "learning_rate": 1.416383234343537e-05, + "loss": 0.0656, "step": 17245 }, { - "epoch": 0.437872826500825, - "grad_norm": 0.4950510263442993, - "learning_rate": 1.7080847823327835e-05, - "loss": 0.1106, + "epoch": 0.8756789684755572, + "grad_norm": 0.48289331793785095, + "learning_rate": 1.4162140210162954e-05, + "loss": 0.0797, "step": 17250 }, { - "epoch": 0.43799974616068027, - "grad_norm": 0.9155250787734985, - "learning_rate": 1.7080001692262134e-05, - "loss": 0.1042, + "epoch": 0.8759327884664196, + "grad_norm": 0.4658796191215515, + "learning_rate": 1.4160448076890536e-05, + "loss": 0.0687, "step": 17255 }, { - "epoch": 0.4381266658205356, - "grad_norm": 0.6812478303909302, - "learning_rate": 1.707915556119643e-05, - "loss": 0.0998, + "epoch": 0.8761866084572821, + "grad_norm": 0.8445097804069519, + "learning_rate": 1.4158755943618121e-05, + "loss": 0.0683, "step": 17260 }, { - "epoch": 0.4382535854803909, - "grad_norm": 1.037249207496643, - "learning_rate": 1.7078309430130727e-05, - "loss": 0.0879, + "epoch": 0.8764404284481446, + "grad_norm": 0.49470093846321106, + "learning_rate": 1.4157063810345703e-05, + "loss": 0.0727, "step": 17265 }, { - "epoch": 0.43838050514024623, - "grad_norm": 0.5537189841270447, - "learning_rate": 1.7077463299065025e-05, - "loss": 0.0969, + "epoch": 0.876694248439007, + "grad_norm": 0.5909218788146973, + "learning_rate": 1.4155371677073288e-05, + "loss": 0.0631, "step": 17270 }, { - "epoch": 0.4385074248001015, - "grad_norm": 0.428606778383255, - "learning_rate": 1.7076617167999324e-05, - "loss": 0.125, + "epoch": 0.8769480684298695, + "grad_norm": 0.33578768372535706, + "learning_rate": 1.4153679543800872e-05, + "loss": 0.0719, "step": 17275 }, { - "epoch": 0.43863434445995686, - "grad_norm": 0.6113749742507935, - "learning_rate": 1.7075771036933622e-05, - "loss": 0.1279, + "epoch": 0.877201888420732, + "grad_norm": 0.3844059109687805, + "learning_rate": 1.4151987410528454e-05, + "loss": 0.0691, "step": 17280 }, { - "epoch": 0.43876126411981214, - "grad_norm": 0.7207615375518799, - "learning_rate": 1.707492490586792e-05, - "loss": 0.1087, + "epoch": 0.8774557084115945, + "grad_norm": 0.7239956259727478, + "learning_rate": 1.4150295277256039e-05, + "loss": 0.0643, "step": 17285 }, { - "epoch": 0.4388881837796675, - "grad_norm": 0.5028282999992371, - "learning_rate": 1.707407877480222e-05, - "loss": 0.11, + "epoch": 0.877709528402457, + "grad_norm": 0.4169676601886749, + "learning_rate": 1.414860314398362e-05, + "loss": 0.0677, "step": 17290 }, { - "epoch": 0.43901510343952277, - "grad_norm": 0.9778391718864441, - "learning_rate": 1.7073232643736517e-05, - "loss": 0.1227, + "epoch": 0.8779633483933195, + "grad_norm": 0.5026949644088745, + "learning_rate": 1.4146911010711204e-05, + "loss": 0.0826, "step": 17295 }, { - "epoch": 0.4391420230993781, - "grad_norm": 0.9866783618927002, - "learning_rate": 1.7072386512670812e-05, - "loss": 0.1186, + "epoch": 0.878217168384182, + "grad_norm": 0.353205144405365, + "learning_rate": 1.414521887743879e-05, + "loss": 0.0703, "step": 17300 }, { - "epoch": 0.4392689427592334, - "grad_norm": 0.5354117751121521, - "learning_rate": 1.707154038160511e-05, - "loss": 0.0948, + "epoch": 0.8784709883750444, + "grad_norm": 0.5128482580184937, + "learning_rate": 1.4143526744166371e-05, + "loss": 0.0733, "step": 17305 }, { - "epoch": 0.43939586241908873, - "grad_norm": 0.6758864521980286, - "learning_rate": 1.707069425053941e-05, - "loss": 0.1114, + "epoch": 0.8787248083659069, + "grad_norm": 0.43205496668815613, + "learning_rate": 1.4141834610893953e-05, + "loss": 0.0741, "step": 17310 }, { - "epoch": 0.439522782078944, - "grad_norm": 0.5247126221656799, - "learning_rate": 1.7069848119473708e-05, - "loss": 0.0883, + "epoch": 0.8789786283567694, + "grad_norm": 0.36526092886924744, + "learning_rate": 1.4140142477621539e-05, + "loss": 0.0685, "step": 17315 }, { - "epoch": 0.43964970173879936, - "grad_norm": 1.1431208848953247, - "learning_rate": 1.7069001988408006e-05, - "loss": 0.0963, + "epoch": 0.8792324483476318, + "grad_norm": 1.4037833213806152, + "learning_rate": 1.4138450344349122e-05, + "loss": 0.0829, "step": 17320 }, { - "epoch": 0.43977662139865464, - "grad_norm": 0.6198017001152039, - "learning_rate": 1.7068155857342304e-05, - "loss": 0.0994, + "epoch": 0.8794862683384943, + "grad_norm": 0.5729219317436218, + "learning_rate": 1.4136758211076707e-05, + "loss": 0.0733, "step": 17325 }, { - "epoch": 0.43990354105851, - "grad_norm": 0.6036967635154724, - "learning_rate": 1.7067309726276603e-05, - "loss": 0.1146, + "epoch": 0.8797400883293568, + "grad_norm": 0.421556681394577, + "learning_rate": 1.413506607780429e-05, + "loss": 0.0655, "step": 17330 }, { - "epoch": 0.44003046071836527, - "grad_norm": 0.5508009195327759, - "learning_rate": 1.70664635952109e-05, - "loss": 0.1264, + "epoch": 0.8799939083202193, + "grad_norm": 0.46088892221450806, + "learning_rate": 1.4133373944531871e-05, + "loss": 0.069, "step": 17335 }, { - "epoch": 0.4401573803782206, - "grad_norm": 0.5151662230491638, - "learning_rate": 1.7065617464145196e-05, - "loss": 0.092, + "epoch": 0.8802477283110818, + "grad_norm": 0.29208675026893616, + "learning_rate": 1.4131681811259456e-05, + "loss": 0.0572, "step": 17340 }, { - "epoch": 0.4402843000380759, - "grad_norm": 0.6897189617156982, - "learning_rate": 1.7064771333079495e-05, - "loss": 0.1114, + "epoch": 0.8805015483019443, + "grad_norm": 0.5103023648262024, + "learning_rate": 1.412998967798704e-05, + "loss": 0.0623, "step": 17345 }, { - "epoch": 0.44041121969793123, - "grad_norm": 0.6573798060417175, - "learning_rate": 1.7063925202013793e-05, - "loss": 0.1, + "epoch": 0.8807553682928068, + "grad_norm": 0.4015849232673645, + "learning_rate": 1.4128297544714622e-05, + "loss": 0.0709, "step": 17350 }, { - "epoch": 0.4405381393577865, - "grad_norm": 0.5872990489006042, - "learning_rate": 1.706307907094809e-05, - "loss": 0.0773, + "epoch": 0.8810091882836693, + "grad_norm": 0.2928534150123596, + "learning_rate": 1.4126605411442207e-05, + "loss": 0.0688, "step": 17355 }, { - "epoch": 0.44066505901764186, - "grad_norm": 0.4231064021587372, - "learning_rate": 1.706223293988239e-05, - "loss": 0.1086, + "epoch": 0.8812630082745317, + "grad_norm": 0.2520070970058441, + "learning_rate": 1.4124913278169789e-05, + "loss": 0.0692, "step": 17360 }, { - "epoch": 0.44079197867749714, - "grad_norm": 0.7791629433631897, - "learning_rate": 1.7061386808816688e-05, - "loss": 0.085, + "epoch": 0.8815168282653942, + "grad_norm": 0.736379086971283, + "learning_rate": 1.4123221144897372e-05, + "loss": 0.077, "step": 17365 }, { - "epoch": 0.4409188983373525, - "grad_norm": 0.5783230662345886, - "learning_rate": 1.7060540677750987e-05, - "loss": 0.0997, + "epoch": 0.8817706482562566, + "grad_norm": 0.3595697283744812, + "learning_rate": 1.4121529011624958e-05, + "loss": 0.0589, "step": 17370 }, { - "epoch": 0.44104581799720777, - "grad_norm": 0.5464281439781189, - "learning_rate": 1.7059694546685285e-05, - "loss": 0.091, + "epoch": 0.8820244682471191, + "grad_norm": 0.3573293685913086, + "learning_rate": 1.411983687835254e-05, + "loss": 0.071, "step": 17375 }, { - "epoch": 0.44117273765706305, - "grad_norm": 0.3424980938434601, - "learning_rate": 1.705884841561958e-05, - "loss": 0.0795, + "epoch": 0.8822782882379816, + "grad_norm": 0.5311951637268066, + "learning_rate": 1.4118144745080125e-05, + "loss": 0.0707, "step": 17380 }, { - "epoch": 0.4412996573169184, - "grad_norm": 0.6647641062736511, - "learning_rate": 1.705800228455388e-05, - "loss": 0.1019, + "epoch": 0.8825321082288441, + "grad_norm": 0.4468015432357788, + "learning_rate": 1.4116452611807706e-05, + "loss": 0.0693, "step": 17385 }, { - "epoch": 0.4414265769767737, - "grad_norm": 0.6311555504798889, - "learning_rate": 1.7057156153488177e-05, - "loss": 0.0872, + "epoch": 0.8827859282197066, + "grad_norm": 0.5135281085968018, + "learning_rate": 1.411476047853529e-05, + "loss": 0.0648, "step": 17390 }, { - "epoch": 0.441553496636629, - "grad_norm": 0.7964804768562317, - "learning_rate": 1.7056310022422475e-05, - "loss": 0.1081, + "epoch": 0.8830397482105691, + "grad_norm": 0.36561042070388794, + "learning_rate": 1.4113068345262875e-05, + "loss": 0.0621, "step": 17395 }, { - "epoch": 0.4416804162964843, - "grad_norm": 0.4290269613265991, - "learning_rate": 1.705546389135677e-05, - "loss": 0.0848, + "epoch": 0.8832935682014316, + "grad_norm": 0.6639304757118225, + "learning_rate": 1.4111376211990457e-05, + "loss": 0.065, "step": 17400 }, { - "epoch": 0.44180733595633964, - "grad_norm": 0.5706046223640442, - "learning_rate": 1.705461776029107e-05, - "loss": 0.0982, + "epoch": 0.8835473881922941, + "grad_norm": 0.4147944450378418, + "learning_rate": 1.410968407871804e-05, + "loss": 0.0744, "step": 17405 }, { - "epoch": 0.4419342556161949, - "grad_norm": 0.5521743893623352, - "learning_rate": 1.7053771629225367e-05, - "loss": 0.0991, + "epoch": 0.8838012081831566, + "grad_norm": 0.4257824420928955, + "learning_rate": 1.4107991945445624e-05, + "loss": 0.0732, "step": 17410 }, { - "epoch": 0.44206117527605027, - "grad_norm": 2.2720301151275635, - "learning_rate": 1.7052925498159665e-05, - "loss": 0.101, + "epoch": 0.8840550281740189, + "grad_norm": 0.30751651525497437, + "learning_rate": 1.4106299812173208e-05, + "loss": 0.0694, "step": 17415 }, { - "epoch": 0.44218809493590555, - "grad_norm": 0.4159591794013977, - "learning_rate": 1.7052079367093964e-05, - "loss": 0.1011, + "epoch": 0.8843088481648814, + "grad_norm": 0.3239120543003082, + "learning_rate": 1.4104607678900793e-05, + "loss": 0.0672, "step": 17420 }, { - "epoch": 0.4423150145957609, - "grad_norm": 0.5744797587394714, - "learning_rate": 1.7051233236028262e-05, - "loss": 0.1196, + "epoch": 0.8845626681557439, + "grad_norm": 0.4700276553630829, + "learning_rate": 1.4102915545628375e-05, + "loss": 0.0714, "step": 17425 }, { - "epoch": 0.4424419342556162, - "grad_norm": 0.6028972268104553, - "learning_rate": 1.705038710496256e-05, - "loss": 0.1137, + "epoch": 0.8848164881466064, + "grad_norm": 0.3352143466472626, + "learning_rate": 1.4101223412355958e-05, + "loss": 0.0731, "step": 17430 }, { - "epoch": 0.4425688539154715, - "grad_norm": 0.7641376256942749, - "learning_rate": 1.704954097389686e-05, - "loss": 0.1123, + "epoch": 0.8850703081374689, + "grad_norm": 0.34849241375923157, + "learning_rate": 1.4099531279083542e-05, + "loss": 0.067, "step": 17435 }, { - "epoch": 0.4426957735753268, - "grad_norm": 1.0234646797180176, - "learning_rate": 1.7048694842831154e-05, - "loss": 0.0983, + "epoch": 0.8853241281283314, + "grad_norm": 0.48389196395874023, + "learning_rate": 1.4097839145811125e-05, + "loss": 0.0659, "step": 17440 }, { - "epoch": 0.44282269323518214, - "grad_norm": 0.41882407665252686, - "learning_rate": 1.7047848711765453e-05, - "loss": 0.0839, + "epoch": 0.8855779481191939, + "grad_norm": 0.4253050684928894, + "learning_rate": 1.4096147012538707e-05, + "loss": 0.071, "step": 17445 }, { - "epoch": 0.4429496128950374, - "grad_norm": 0.53977370262146, - "learning_rate": 1.704700258069975e-05, - "loss": 0.0904, + "epoch": 0.8858317681100564, + "grad_norm": 0.34934577345848083, + "learning_rate": 1.4094454879266293e-05, + "loss": 0.068, "step": 17450 }, { - "epoch": 0.44307653255489277, - "grad_norm": 0.7233864068984985, - "learning_rate": 1.704615644963405e-05, - "loss": 0.0926, + "epoch": 0.8860855881009189, + "grad_norm": 0.3193303644657135, + "learning_rate": 1.4092762745993876e-05, + "loss": 0.065, "step": 17455 }, { - "epoch": 0.44320345221474805, - "grad_norm": 0.6048493385314941, - "learning_rate": 1.7045310318568348e-05, - "loss": 0.0979, + "epoch": 0.8863394080917814, + "grad_norm": 0.5447885990142822, + "learning_rate": 1.4091070612721458e-05, + "loss": 0.0703, "step": 17460 }, { - "epoch": 0.4433303718746034, - "grad_norm": 0.8083519339561462, - "learning_rate": 1.7044464187502646e-05, - "loss": 0.1042, + "epoch": 0.8865932280826437, + "grad_norm": 0.4106573462486267, + "learning_rate": 1.4089378479449043e-05, + "loss": 0.0764, "step": 17465 }, { - "epoch": 0.4434572915344587, - "grad_norm": 0.7588837742805481, - "learning_rate": 1.7043618056436945e-05, - "loss": 0.0941, + "epoch": 0.8868470480735062, + "grad_norm": 0.35047876834869385, + "learning_rate": 1.4087686346176625e-05, + "loss": 0.0725, "step": 17470 }, { - "epoch": 0.443584211194314, - "grad_norm": 0.5267123579978943, - "learning_rate": 1.7042771925371243e-05, - "loss": 0.0903, + "epoch": 0.8871008680643687, + "grad_norm": 0.5358198881149292, + "learning_rate": 1.408599421290421e-05, + "loss": 0.0748, "step": 17475 }, { - "epoch": 0.4437111308541693, - "grad_norm": 0.529613733291626, - "learning_rate": 1.7041925794305538e-05, - "loss": 0.0994, + "epoch": 0.8873546880552312, + "grad_norm": 0.5286594033241272, + "learning_rate": 1.4084302079631794e-05, + "loss": 0.0695, "step": 17480 }, { - "epoch": 0.44383805051402464, - "grad_norm": 0.5367969274520874, - "learning_rate": 1.7041079663239836e-05, - "loss": 0.1026, + "epoch": 0.8876085080460937, + "grad_norm": 0.7509157657623291, + "learning_rate": 1.4082609946359376e-05, + "loss": 0.0677, "step": 17485 }, { - "epoch": 0.4439649701738799, - "grad_norm": 0.7298158407211304, - "learning_rate": 1.7040233532174135e-05, - "loss": 0.1095, + "epoch": 0.8878623280369562, + "grad_norm": 0.44565531611442566, + "learning_rate": 1.4080917813086961e-05, + "loss": 0.0675, "step": 17490 }, { - "epoch": 0.44409188983373526, - "grad_norm": 0.49832236766815186, - "learning_rate": 1.7039387401108433e-05, - "loss": 0.111, + "epoch": 0.8881161480278187, + "grad_norm": 0.403942734003067, + "learning_rate": 1.4079225679814543e-05, + "loss": 0.0695, "step": 17495 }, { - "epoch": 0.44421880949359055, - "grad_norm": 0.6671977639198303, - "learning_rate": 1.703854127004273e-05, - "loss": 0.1194, + "epoch": 0.8883699680186812, + "grad_norm": 0.653939425945282, + "learning_rate": 1.4077533546542126e-05, + "loss": 0.0751, "step": 17500 }, { - "epoch": 0.4443457291534459, - "grad_norm": 0.4719528257846832, - "learning_rate": 1.703769513897703e-05, - "loss": 0.0975, + "epoch": 0.8886237880095437, + "grad_norm": 0.3768764138221741, + "learning_rate": 1.4075841413269712e-05, + "loss": 0.0654, "step": 17505 }, { - "epoch": 0.4444726488133012, - "grad_norm": 0.6901575922966003, - "learning_rate": 1.703684900791133e-05, - "loss": 0.1037, + "epoch": 0.8888776080004062, + "grad_norm": 0.33701881766319275, + "learning_rate": 1.4074149279997293e-05, + "loss": 0.0663, "step": 17510 }, { - "epoch": 0.4445995684731565, - "grad_norm": 0.47094953060150146, - "learning_rate": 1.7036002876845627e-05, - "loss": 0.097, + "epoch": 0.8891314279912685, + "grad_norm": 0.3610353171825409, + "learning_rate": 1.4072457146724879e-05, + "loss": 0.0632, "step": 17515 }, { - "epoch": 0.4447264881330118, - "grad_norm": 0.5614836812019348, - "learning_rate": 1.7035156745779922e-05, - "loss": 0.0888, + "epoch": 0.889385247982131, + "grad_norm": 0.5949681997299194, + "learning_rate": 1.407076501345246e-05, + "loss": 0.0694, "step": 17520 }, { - "epoch": 0.44485340779286714, - "grad_norm": 0.8260118365287781, - "learning_rate": 1.703431061471422e-05, - "loss": 0.0935, + "epoch": 0.8896390679729935, + "grad_norm": 0.34294983744621277, + "learning_rate": 1.4069072880180044e-05, + "loss": 0.062, "step": 17525 }, { - "epoch": 0.4449803274527224, - "grad_norm": 1.061068058013916, - "learning_rate": 1.703346448364852e-05, - "loss": 0.0773, + "epoch": 0.889892887963856, + "grad_norm": 0.3811400830745697, + "learning_rate": 1.406738074690763e-05, + "loss": 0.061, "step": 17530 }, { - "epoch": 0.44510724711257776, - "grad_norm": 0.8494129776954651, - "learning_rate": 1.7032618352582817e-05, - "loss": 0.1021, + "epoch": 0.8901467079547185, + "grad_norm": 0.3286953270435333, + "learning_rate": 1.4065688613635211e-05, + "loss": 0.0635, "step": 17535 }, { - "epoch": 0.44523416677243305, - "grad_norm": 0.5907729864120483, - "learning_rate": 1.7031772221517112e-05, - "loss": 0.1164, + "epoch": 0.890400527945581, + "grad_norm": 0.4377914071083069, + "learning_rate": 1.4063996480362793e-05, + "loss": 0.0699, "step": 17540 }, { - "epoch": 0.4453610864322884, - "grad_norm": 0.44984832406044006, - "learning_rate": 1.703092609045141e-05, - "loss": 0.0998, + "epoch": 0.8906543479364435, + "grad_norm": 0.4538041949272156, + "learning_rate": 1.4062304347090378e-05, + "loss": 0.0776, "step": 17545 }, { - "epoch": 0.4454880060921437, - "grad_norm": 0.7240840792655945, - "learning_rate": 1.703007995938571e-05, - "loss": 0.0951, + "epoch": 0.890908167927306, + "grad_norm": 0.3821781277656555, + "learning_rate": 1.4060612213817962e-05, + "loss": 0.073, "step": 17550 }, { - "epoch": 0.44561492575199896, - "grad_norm": 0.5075101852416992, - "learning_rate": 1.7029233828320007e-05, - "loss": 0.0877, + "epoch": 0.8911619879181685, + "grad_norm": 0.5155601501464844, + "learning_rate": 1.4058920080545544e-05, + "loss": 0.0629, "step": 17555 }, { - "epoch": 0.4457418454118543, - "grad_norm": 0.7027420997619629, - "learning_rate": 1.7028387697254306e-05, - "loss": 0.1199, + "epoch": 0.891415807909031, + "grad_norm": 0.452945739030838, + "learning_rate": 1.4057227947273129e-05, + "loss": 0.0632, "step": 17560 }, { - "epoch": 0.4458687650717096, - "grad_norm": 0.6703844666481018, - "learning_rate": 1.7027541566188604e-05, - "loss": 0.0958, + "epoch": 0.8916696278998933, + "grad_norm": 0.48677828907966614, + "learning_rate": 1.405553581400071e-05, + "loss": 0.0751, "step": 17565 }, { - "epoch": 0.4459956847315649, - "grad_norm": 0.6154130697250366, - "learning_rate": 1.7026695435122902e-05, - "loss": 0.1058, + "epoch": 0.8919234478907558, + "grad_norm": 0.5773299932479858, + "learning_rate": 1.4053843680728296e-05, + "loss": 0.0657, "step": 17570 }, { - "epoch": 0.4461226043914202, - "grad_norm": 0.7175818085670471, - "learning_rate": 1.70258493040572e-05, - "loss": 0.1032, + "epoch": 0.8921772678816183, + "grad_norm": 0.47708895802497864, + "learning_rate": 1.405215154745588e-05, + "loss": 0.0694, "step": 17575 }, { - "epoch": 0.44624952405127555, - "grad_norm": 0.5333501696586609, - "learning_rate": 1.70250031729915e-05, - "loss": 0.0948, + "epoch": 0.8924310878724808, + "grad_norm": 0.3603571951389313, + "learning_rate": 1.4050459414183461e-05, + "loss": 0.07, "step": 17580 }, { - "epoch": 0.44637644371113083, - "grad_norm": 1.2747620344161987, - "learning_rate": 1.7024157041925794e-05, - "loss": 0.0942, + "epoch": 0.8926849078633433, + "grad_norm": 0.40807899832725525, + "learning_rate": 1.4048767280911047e-05, + "loss": 0.0733, "step": 17585 }, { - "epoch": 0.4465033633709862, - "grad_norm": 0.545042097568512, - "learning_rate": 1.7023310910860093e-05, - "loss": 0.1238, + "epoch": 0.8929387278542058, + "grad_norm": 0.3264356255531311, + "learning_rate": 1.4047075147638628e-05, + "loss": 0.0724, "step": 17590 }, { - "epoch": 0.44663028303084146, - "grad_norm": 1.0066654682159424, - "learning_rate": 1.702246477979439e-05, - "loss": 0.1188, + "epoch": 0.8931925478450683, + "grad_norm": 0.4679695665836334, + "learning_rate": 1.4045383014366212e-05, + "loss": 0.0772, "step": 17595 }, { - "epoch": 0.4467572026906968, - "grad_norm": 0.49726343154907227, - "learning_rate": 1.702161864872869e-05, - "loss": 0.0799, + "epoch": 0.8934463678359308, + "grad_norm": 0.3341139853000641, + "learning_rate": 1.4043690881093797e-05, + "loss": 0.0745, "step": 17600 }, { - "epoch": 0.4468841223505521, - "grad_norm": 0.5029115676879883, - "learning_rate": 1.7020772517662988e-05, - "loss": 0.1055, + "epoch": 0.8937001878267933, + "grad_norm": 0.3257398307323456, + "learning_rate": 1.4041998747821379e-05, + "loss": 0.0736, "step": 17605 }, { - "epoch": 0.4470110420104074, - "grad_norm": 0.7307389378547668, - "learning_rate": 1.7019926386597286e-05, - "loss": 0.1156, + "epoch": 0.8939540078176558, + "grad_norm": 0.34771645069122314, + "learning_rate": 1.4040306614548963e-05, + "loss": 0.0663, "step": 17610 }, { - "epoch": 0.4471379616702627, - "grad_norm": 0.5359386205673218, - "learning_rate": 1.7019080255531585e-05, - "loss": 0.0989, + "epoch": 0.8942078278085182, + "grad_norm": 0.2789120674133301, + "learning_rate": 1.4038614481276546e-05, + "loss": 0.0727, "step": 17615 }, { - "epoch": 0.44726488133011805, - "grad_norm": 0.8410775065422058, - "learning_rate": 1.7018234124465883e-05, - "loss": 0.0998, + "epoch": 0.8944616477993806, + "grad_norm": 0.3300907611846924, + "learning_rate": 1.403692234800413e-05, + "loss": 0.0609, "step": 17620 }, { - "epoch": 0.44739180098997333, - "grad_norm": 0.6761133670806885, - "learning_rate": 1.7017387993400178e-05, - "loss": 0.125, + "epoch": 0.8947154677902431, + "grad_norm": 0.4132726490497589, + "learning_rate": 1.4035230214731715e-05, + "loss": 0.0677, "step": 17625 }, { - "epoch": 0.44751872064982867, - "grad_norm": 0.5837348103523254, - "learning_rate": 1.7016541862334476e-05, - "loss": 0.0956, + "epoch": 0.8949692877811056, + "grad_norm": 0.47992974519729614, + "learning_rate": 1.4033538081459297e-05, + "loss": 0.0826, "step": 17630 }, { - "epoch": 0.44764564030968396, - "grad_norm": 0.5061163902282715, - "learning_rate": 1.7015695731268775e-05, - "loss": 0.0989, + "epoch": 0.8952231077719681, + "grad_norm": 0.3706139028072357, + "learning_rate": 1.403184594818688e-05, + "loss": 0.0766, "step": 17635 }, { - "epoch": 0.4477725599695393, - "grad_norm": 0.6976045370101929, - "learning_rate": 1.7014849600203073e-05, - "loss": 0.1112, + "epoch": 0.8954769277628306, + "grad_norm": 0.4228927791118622, + "learning_rate": 1.4030153814914464e-05, + "loss": 0.0827, "step": 17640 }, { - "epoch": 0.4478994796293946, - "grad_norm": 0.954590916633606, - "learning_rate": 1.701400346913737e-05, - "loss": 0.0926, + "epoch": 0.8957307477536931, + "grad_norm": 0.5431283116340637, + "learning_rate": 1.4028461681642047e-05, + "loss": 0.0683, "step": 17645 }, { - "epoch": 0.4480263992892499, - "grad_norm": 0.40621820092201233, - "learning_rate": 1.701315733807167e-05, - "loss": 0.1027, + "epoch": 0.8959845677445556, + "grad_norm": 0.33838915824890137, + "learning_rate": 1.402676954836963e-05, + "loss": 0.058, "step": 17650 }, { - "epoch": 0.4481533189491052, - "grad_norm": 0.8290960788726807, - "learning_rate": 1.701231120700597e-05, - "loss": 0.1031, + "epoch": 0.8962383877354181, + "grad_norm": 0.3902619481086731, + "learning_rate": 1.4025077415097215e-05, + "loss": 0.078, "step": 17655 }, { - "epoch": 0.44828023860896055, - "grad_norm": 0.7294934391975403, - "learning_rate": 1.7011465075940267e-05, - "loss": 0.0858, + "epoch": 0.8964922077262806, + "grad_norm": 0.38549157977104187, + "learning_rate": 1.4023385281824798e-05, + "loss": 0.0741, "step": 17660 }, { - "epoch": 0.44840715826881583, - "grad_norm": 0.4287014901638031, - "learning_rate": 1.7010618944874562e-05, - "loss": 0.0721, + "epoch": 0.896746027717143, + "grad_norm": 0.4042127728462219, + "learning_rate": 1.4021693148552382e-05, + "loss": 0.0687, "step": 17665 }, { - "epoch": 0.44853407792867117, - "grad_norm": 0.5582754015922546, - "learning_rate": 1.700977281380886e-05, - "loss": 0.0858, + "epoch": 0.8969998477080054, + "grad_norm": 0.452392041683197, + "learning_rate": 1.4020001015279965e-05, + "loss": 0.0739, "step": 17670 }, { - "epoch": 0.44866099758852646, - "grad_norm": 0.6257230639457703, - "learning_rate": 1.700892668274316e-05, - "loss": 0.102, + "epoch": 0.8972536676988679, + "grad_norm": 0.45912966132164, + "learning_rate": 1.4018308882007547e-05, + "loss": 0.0725, "step": 17675 }, { - "epoch": 0.4487879172483818, - "grad_norm": 0.7308250665664673, - "learning_rate": 1.7008080551677457e-05, - "loss": 0.1423, + "epoch": 0.8975074876897304, + "grad_norm": 0.4095827043056488, + "learning_rate": 1.4016616748735132e-05, + "loss": 0.075, "step": 17680 }, { - "epoch": 0.4489148369082371, - "grad_norm": 0.5138436555862427, - "learning_rate": 1.7007234420611752e-05, - "loss": 0.1045, + "epoch": 0.8977613076805929, + "grad_norm": 0.3190585672855377, + "learning_rate": 1.4014924615462716e-05, + "loss": 0.0717, "step": 17685 }, { - "epoch": 0.4490417565680924, - "grad_norm": 0.46744006872177124, - "learning_rate": 1.700638828954605e-05, - "loss": 0.1015, + "epoch": 0.8980151276714554, + "grad_norm": 0.6912304759025574, + "learning_rate": 1.4013232482190298e-05, + "loss": 0.0708, "step": 17690 }, { - "epoch": 0.4491686762279477, - "grad_norm": 0.7824543118476868, - "learning_rate": 1.700554215848035e-05, - "loss": 0.1009, + "epoch": 0.8982689476623179, + "grad_norm": 0.7174093723297119, + "learning_rate": 1.4011540348917883e-05, + "loss": 0.069, "step": 17695 }, { - "epoch": 0.44929559588780305, - "grad_norm": 0.43724361062049866, - "learning_rate": 1.7004696027414647e-05, - "loss": 0.0888, + "epoch": 0.8985227676531804, + "grad_norm": 0.30754783749580383, + "learning_rate": 1.4009848215645465e-05, + "loss": 0.0618, "step": 17700 }, { - "epoch": 0.44942251554765833, - "grad_norm": 0.5417038202285767, - "learning_rate": 1.7003849896348946e-05, - "loss": 0.1034, + "epoch": 0.8987765876440429, + "grad_norm": 0.7452945113182068, + "learning_rate": 1.4008156082373048e-05, + "loss": 0.0766, "step": 17705 }, { - "epoch": 0.44954943520751367, - "grad_norm": 0.42264753580093384, - "learning_rate": 1.7003003765283244e-05, - "loss": 0.1087, + "epoch": 0.8990304076349053, + "grad_norm": 0.3052619695663452, + "learning_rate": 1.4006463949100633e-05, + "loss": 0.0701, "step": 17710 }, { - "epoch": 0.44967635486736895, - "grad_norm": 0.7105047702789307, - "learning_rate": 1.7002157634217542e-05, - "loss": 0.1077, + "epoch": 0.8992842276257678, + "grad_norm": 0.6337426900863647, + "learning_rate": 1.4004771815828215e-05, + "loss": 0.0775, "step": 17715 }, { - "epoch": 0.44980327452722424, - "grad_norm": 0.49369654059410095, - "learning_rate": 1.700131150315184e-05, - "loss": 0.0842, + "epoch": 0.8995380476166303, + "grad_norm": 0.35096266865730286, + "learning_rate": 1.40030796825558e-05, + "loss": 0.0629, "step": 17720 }, { - "epoch": 0.4499301941870796, - "grad_norm": 0.5037845969200134, - "learning_rate": 1.7000465372086136e-05, - "loss": 0.1034, + "epoch": 0.8997918676074927, + "grad_norm": 0.40677130222320557, + "learning_rate": 1.4001387549283382e-05, + "loss": 0.0777, "step": 17725 }, { - "epoch": 0.45005711384693486, - "grad_norm": 1.3118078708648682, - "learning_rate": 1.6999619241020434e-05, - "loss": 0.0851, + "epoch": 0.9000456875983552, + "grad_norm": 0.4063386023044586, + "learning_rate": 1.3999695416010966e-05, + "loss": 0.0565, "step": 17730 }, { - "epoch": 0.4501840335067902, - "grad_norm": 0.4506221413612366, - "learning_rate": 1.6998773109954733e-05, - "loss": 0.0929, + "epoch": 0.9002995075892177, + "grad_norm": 0.30389711260795593, + "learning_rate": 1.399800328273855e-05, + "loss": 0.0671, "step": 17735 }, { - "epoch": 0.4503109531666455, - "grad_norm": 0.9212222695350647, - "learning_rate": 1.699792697888903e-05, - "loss": 0.1037, + "epoch": 0.9005533275800802, + "grad_norm": 0.26976916193962097, + "learning_rate": 1.3996311149466133e-05, + "loss": 0.0717, "step": 17740 }, { - "epoch": 0.45043787282650083, - "grad_norm": 0.43103042244911194, - "learning_rate": 1.699708084782333e-05, - "loss": 0.1013, + "epoch": 0.9008071475709427, + "grad_norm": 0.4910055994987488, + "learning_rate": 1.3994619016193715e-05, + "loss": 0.065, "step": 17745 }, { - "epoch": 0.4505647924863561, - "grad_norm": 0.4984724223613739, - "learning_rate": 1.6996234716757628e-05, - "loss": 0.1194, + "epoch": 0.9010609675618052, + "grad_norm": 0.3902299702167511, + "learning_rate": 1.39929268829213e-05, + "loss": 0.0636, "step": 17750 }, { - "epoch": 0.45069171214621145, - "grad_norm": 0.44575783610343933, - "learning_rate": 1.6995388585691926e-05, - "loss": 0.0743, + "epoch": 0.9013147875526677, + "grad_norm": 0.9779106378555298, + "learning_rate": 1.3991234749648884e-05, + "loss": 0.0642, "step": 17755 }, { - "epoch": 0.45081863180606674, - "grad_norm": 0.641629695892334, - "learning_rate": 1.6994542454626225e-05, - "loss": 0.108, + "epoch": 0.9015686075435301, + "grad_norm": 0.48715925216674805, + "learning_rate": 1.3989542616376467e-05, + "loss": 0.0749, "step": 17760 }, { - "epoch": 0.4509455514659221, - "grad_norm": 1.2461881637573242, - "learning_rate": 1.699369632356052e-05, - "loss": 0.1069, + "epoch": 0.9018224275343926, + "grad_norm": 0.9629639387130737, + "learning_rate": 1.398785048310405e-05, + "loss": 0.0692, "step": 17765 }, { - "epoch": 0.45107247112577736, - "grad_norm": 0.4661979377269745, - "learning_rate": 1.6992850192494818e-05, - "loss": 0.0886, + "epoch": 0.9020762475252551, + "grad_norm": 0.39406028389930725, + "learning_rate": 1.3986158349831633e-05, + "loss": 0.0673, "step": 17770 }, { - "epoch": 0.4511993907856327, - "grad_norm": 0.43469861149787903, - "learning_rate": 1.6992004061429117e-05, - "loss": 0.0883, + "epoch": 0.9023300675161176, + "grad_norm": 0.3680139482021332, + "learning_rate": 1.3984466216559218e-05, + "loss": 0.0706, "step": 17775 }, { - "epoch": 0.451326310445488, - "grad_norm": 0.8349996209144592, - "learning_rate": 1.6991157930363415e-05, - "loss": 0.1131, + "epoch": 0.90258388750698, + "grad_norm": 0.3493126928806305, + "learning_rate": 1.3982774083286801e-05, + "loss": 0.0711, "step": 17780 }, { - "epoch": 0.45145323010534333, - "grad_norm": 0.782729983329773, - "learning_rate": 1.6990311799297713e-05, - "loss": 0.1043, + "epoch": 0.9028377074978425, + "grad_norm": 0.35266679525375366, + "learning_rate": 1.3981081950014383e-05, + "loss": 0.058, "step": 17785 }, { - "epoch": 0.4515801497651986, - "grad_norm": 0.8678102493286133, - "learning_rate": 1.6989465668232012e-05, - "loss": 0.0933, + "epoch": 0.903091527488705, + "grad_norm": 0.5064133405685425, + "learning_rate": 1.3979389816741969e-05, + "loss": 0.09, "step": 17790 }, { - "epoch": 0.45170706942505395, - "grad_norm": 0.6455554962158203, - "learning_rate": 1.698861953716631e-05, - "loss": 0.0981, + "epoch": 0.9033453474795675, + "grad_norm": 0.6205065250396729, + "learning_rate": 1.397769768346955e-05, + "loss": 0.0656, "step": 17795 }, { - "epoch": 0.45183398908490924, - "grad_norm": 0.6780105829238892, - "learning_rate": 1.698777340610061e-05, - "loss": 0.0963, + "epoch": 0.90359916747043, + "grad_norm": 0.36125099658966064, + "learning_rate": 1.3976005550197134e-05, + "loss": 0.0662, "step": 17800 }, { - "epoch": 0.4519609087447646, - "grad_norm": 0.573142945766449, - "learning_rate": 1.6986927275034904e-05, - "loss": 0.1005, + "epoch": 0.9038529874612925, + "grad_norm": 0.369780570268631, + "learning_rate": 1.3974313416924719e-05, + "loss": 0.0625, "step": 17805 }, { - "epoch": 0.45208782840461986, - "grad_norm": 1.0088489055633545, - "learning_rate": 1.6986081143969202e-05, - "loss": 0.0891, + "epoch": 0.9041068074521549, + "grad_norm": 0.3109322786331177, + "learning_rate": 1.3972621283652301e-05, + "loss": 0.0709, "step": 17810 }, { - "epoch": 0.4522147480644752, - "grad_norm": 0.5898669362068176, - "learning_rate": 1.69852350129035e-05, - "loss": 0.1193, + "epoch": 0.9043606274430174, + "grad_norm": 0.44117799401283264, + "learning_rate": 1.3970929150379886e-05, + "loss": 0.0566, "step": 17815 }, { - "epoch": 0.4523416677243305, - "grad_norm": 0.47523847222328186, - "learning_rate": 1.69843888818378e-05, - "loss": 0.0988, + "epoch": 0.9046144474338799, + "grad_norm": 0.41141143441200256, + "learning_rate": 1.3969237017107468e-05, + "loss": 0.066, "step": 17820 }, { - "epoch": 0.4524685873841858, - "grad_norm": 0.821695864200592, - "learning_rate": 1.6983542750772094e-05, - "loss": 0.1076, + "epoch": 0.9048682674247424, + "grad_norm": 0.38892972469329834, + "learning_rate": 1.3967544883835052e-05, + "loss": 0.0785, "step": 17825 }, { - "epoch": 0.4525955070440411, - "grad_norm": 0.6234042644500732, - "learning_rate": 1.6982696619706392e-05, - "loss": 0.1032, + "epoch": 0.9051220874156048, + "grad_norm": 0.42113691568374634, + "learning_rate": 1.3965852750562637e-05, + "loss": 0.0747, "step": 17830 }, { - "epoch": 0.45272242670389645, - "grad_norm": 0.40646445751190186, - "learning_rate": 1.698185048864069e-05, - "loss": 0.0915, + "epoch": 0.9053759074064673, + "grad_norm": 0.33473747968673706, + "learning_rate": 1.3964160617290219e-05, + "loss": 0.0732, "step": 17835 }, { - "epoch": 0.45284934636375174, - "grad_norm": 0.43104541301727295, - "learning_rate": 1.698100435757499e-05, - "loss": 0.0952, + "epoch": 0.9056297273973298, + "grad_norm": 0.4112972915172577, + "learning_rate": 1.3962468484017802e-05, + "loss": 0.0689, "step": 17840 }, { - "epoch": 0.4529762660236071, - "grad_norm": 0.3852890729904175, - "learning_rate": 1.6980158226509287e-05, - "loss": 0.0867, + "epoch": 0.9058835473881923, + "grad_norm": 0.31149786710739136, + "learning_rate": 1.3960776350745386e-05, + "loss": 0.0665, "step": 17845 }, { - "epoch": 0.45310318568346236, - "grad_norm": 0.4943218231201172, - "learning_rate": 1.6979312095443586e-05, - "loss": 0.1055, + "epoch": 0.9061373673790548, + "grad_norm": 0.4009893238544464, + "learning_rate": 1.395908421747297e-05, + "loss": 0.0708, "step": 17850 }, { - "epoch": 0.4532301053433177, - "grad_norm": 0.5827302932739258, - "learning_rate": 1.6978465964377884e-05, - "loss": 0.0961, + "epoch": 0.9063911873699173, + "grad_norm": 0.3744799792766571, + "learning_rate": 1.3957392084200551e-05, + "loss": 0.0738, "step": 17855 }, { - "epoch": 0.453357025003173, - "grad_norm": 0.4383994936943054, - "learning_rate": 1.6977619833312183e-05, - "loss": 0.0849, + "epoch": 0.9066450073607797, + "grad_norm": 0.3893011212348938, + "learning_rate": 1.3955699950928136e-05, + "loss": 0.0616, "step": 17860 }, { - "epoch": 0.4534839446630283, - "grad_norm": 0.8873216509819031, - "learning_rate": 1.6976773702246478e-05, - "loss": 0.1143, + "epoch": 0.9068988273516422, + "grad_norm": 0.35065850615501404, + "learning_rate": 1.395400781765572e-05, + "loss": 0.0651, "step": 17865 }, { - "epoch": 0.4536108643228836, - "grad_norm": 2.2285990715026855, - "learning_rate": 1.6975927571180776e-05, - "loss": 0.1292, + "epoch": 0.9071526473425047, + "grad_norm": 0.5612497925758362, + "learning_rate": 1.3952315684383304e-05, + "loss": 0.0835, "step": 17870 }, { - "epoch": 0.45373778398273895, - "grad_norm": 0.7122294902801514, - "learning_rate": 1.6975081440115074e-05, - "loss": 0.1222, + "epoch": 0.9074064673333672, + "grad_norm": 0.39559072256088257, + "learning_rate": 1.3950623551110887e-05, + "loss": 0.0682, "step": 17875 }, { - "epoch": 0.45386470364259424, - "grad_norm": 0.6010311245918274, - "learning_rate": 1.6974235309049373e-05, - "loss": 0.1017, + "epoch": 0.9076602873242297, + "grad_norm": 0.35949650406837463, + "learning_rate": 1.3948931417838469e-05, + "loss": 0.0785, "step": 17880 }, { - "epoch": 0.4539916233024496, - "grad_norm": 0.5135166645050049, - "learning_rate": 1.697338917798367e-05, - "loss": 0.0796, + "epoch": 0.9079141073150921, + "grad_norm": 0.5224626064300537, + "learning_rate": 1.3947239284566054e-05, + "loss": 0.085, "step": 17885 }, { - "epoch": 0.45411854296230486, - "grad_norm": 1.4698787927627563, - "learning_rate": 1.697254304691797e-05, - "loss": 0.0973, + "epoch": 0.9081679273059546, + "grad_norm": 0.3599477708339691, + "learning_rate": 1.3945547151293638e-05, + "loss": 0.0684, "step": 17890 }, { - "epoch": 0.45424546262216015, - "grad_norm": 0.6247333884239197, - "learning_rate": 1.6971696915852268e-05, - "loss": 0.1036, + "epoch": 0.9084217472968171, + "grad_norm": 0.6261371970176697, + "learning_rate": 1.394385501802122e-05, + "loss": 0.068, "step": 17895 }, { - "epoch": 0.4543723822820155, - "grad_norm": 0.5324369072914124, - "learning_rate": 1.6970850784786566e-05, - "loss": 0.1117, + "epoch": 0.9086755672876796, + "grad_norm": 0.3289092183113098, + "learning_rate": 1.3942162884748805e-05, + "loss": 0.0606, "step": 17900 }, { - "epoch": 0.45449930194187077, - "grad_norm": 0.623579204082489, - "learning_rate": 1.697000465372086e-05, - "loss": 0.1021, + "epoch": 0.9089293872785421, + "grad_norm": 0.39295879006385803, + "learning_rate": 1.3940470751476387e-05, + "loss": 0.0604, "step": 17905 }, { - "epoch": 0.4546262216017261, - "grad_norm": 0.5436346530914307, - "learning_rate": 1.696915852265516e-05, - "loss": 0.1034, + "epoch": 0.9091832072694045, + "grad_norm": 0.40853872895240784, + "learning_rate": 1.3938778618203972e-05, + "loss": 0.0763, "step": 17910 }, { - "epoch": 0.4547531412615814, - "grad_norm": 0.543655514717102, - "learning_rate": 1.6968312391589458e-05, - "loss": 0.0858, + "epoch": 0.909437027260267, + "grad_norm": 0.587341845035553, + "learning_rate": 1.3937086484931554e-05, + "loss": 0.0724, "step": 17915 }, { - "epoch": 0.45488006092143674, - "grad_norm": 0.5254409313201904, - "learning_rate": 1.6967466260523757e-05, - "loss": 0.1069, + "epoch": 0.9096908472511295, + "grad_norm": 0.5507720708847046, + "learning_rate": 1.3935394351659137e-05, + "loss": 0.0695, "step": 17920 }, { - "epoch": 0.455006980581292, - "grad_norm": 0.6548727750778198, - "learning_rate": 1.6966620129458055e-05, - "loss": 0.1027, + "epoch": 0.909944667241992, + "grad_norm": 0.31212714314460754, + "learning_rate": 1.3933702218386723e-05, + "loss": 0.0757, "step": 17925 }, { - "epoch": 0.45513390024114736, - "grad_norm": 0.6791672110557556, - "learning_rate": 1.6965773998392353e-05, - "loss": 0.1094, + "epoch": 0.9101984872328545, + "grad_norm": 0.31327691674232483, + "learning_rate": 1.3932010085114304e-05, + "loss": 0.0721, "step": 17930 }, { - "epoch": 0.45526081990100264, - "grad_norm": 0.6401612758636475, - "learning_rate": 1.6964927867326652e-05, - "loss": 0.1051, + "epoch": 0.910452307223717, + "grad_norm": 0.3152126669883728, + "learning_rate": 1.3930317951841888e-05, + "loss": 0.0568, "step": 17935 }, { - "epoch": 0.455387739560858, - "grad_norm": 0.6563493609428406, - "learning_rate": 1.696408173626095e-05, - "loss": 0.1078, + "epoch": 0.9107061272145794, + "grad_norm": 0.4510830342769623, + "learning_rate": 1.3928625818569471e-05, + "loss": 0.0672, "step": 17940 }, { - "epoch": 0.45551465922071327, - "grad_norm": 0.5431410074234009, - "learning_rate": 1.6963235605195245e-05, - "loss": 0.093, + "epoch": 0.9109599472054419, + "grad_norm": 0.3216569721698761, + "learning_rate": 1.3926933685297055e-05, + "loss": 0.0676, "step": 17945 }, { - "epoch": 0.4556415788805686, - "grad_norm": 0.6583861112594604, - "learning_rate": 1.6962389474129544e-05, - "loss": 0.1362, + "epoch": 0.9112137671963044, + "grad_norm": 0.6665675044059753, + "learning_rate": 1.3925241552024637e-05, + "loss": 0.0646, "step": 17950 }, { - "epoch": 0.4557684985404239, - "grad_norm": 0.6400881409645081, - "learning_rate": 1.6961543343063842e-05, - "loss": 0.1011, + "epoch": 0.9114675871871669, + "grad_norm": 0.37302878499031067, + "learning_rate": 1.3923549418752222e-05, + "loss": 0.0681, "step": 17955 }, { - "epoch": 0.45589541820027923, - "grad_norm": 0.6504119038581848, - "learning_rate": 1.696069721199814e-05, - "loss": 0.1161, + "epoch": 0.9117214071780293, + "grad_norm": 0.30719074606895447, + "learning_rate": 1.3921857285479806e-05, + "loss": 0.0694, "step": 17960 }, { - "epoch": 0.4560223378601345, - "grad_norm": 0.4299499988555908, - "learning_rate": 1.6959851080932435e-05, - "loss": 0.0942, + "epoch": 0.9119752271688918, + "grad_norm": 0.38624605536460876, + "learning_rate": 1.392016515220739e-05, + "loss": 0.0725, "step": 17965 }, { - "epoch": 0.45614925751998986, - "grad_norm": 0.5434197187423706, - "learning_rate": 1.6959004949866734e-05, - "loss": 0.1027, + "epoch": 0.9122290471597543, + "grad_norm": 0.299686461687088, + "learning_rate": 1.3918473018934973e-05, + "loss": 0.0604, "step": 17970 }, { - "epoch": 0.45627617717984514, - "grad_norm": 0.8671542406082153, - "learning_rate": 1.6958158818801032e-05, - "loss": 0.1115, + "epoch": 0.9124828671506168, + "grad_norm": 0.38363954424858093, + "learning_rate": 1.3916780885662555e-05, + "loss": 0.0683, "step": 17975 }, { - "epoch": 0.4564030968397005, - "grad_norm": 0.668302595615387, - "learning_rate": 1.695731268773533e-05, - "loss": 0.095, + "epoch": 0.9127366871414793, + "grad_norm": 0.3559637665748596, + "learning_rate": 1.391508875239014e-05, + "loss": 0.0681, "step": 17980 }, { - "epoch": 0.45653001649955577, - "grad_norm": 0.6672228574752808, - "learning_rate": 1.695646655666963e-05, - "loss": 0.1042, + "epoch": 0.9129905071323418, + "grad_norm": 0.41254445910453796, + "learning_rate": 1.3913396619117723e-05, + "loss": 0.0647, "step": 17985 }, { - "epoch": 0.4566569361594111, - "grad_norm": 2.1054468154907227, - "learning_rate": 1.6955620425603928e-05, - "loss": 0.1088, + "epoch": 0.9132443271232042, + "grad_norm": 0.3159063756465912, + "learning_rate": 1.3911704485845305e-05, + "loss": 0.0687, "step": 17990 }, { - "epoch": 0.4567838558192664, - "grad_norm": 0.8274752497673035, - "learning_rate": 1.6954774294538226e-05, - "loss": 0.1041, + "epoch": 0.9134981471140667, + "grad_norm": 0.5465823411941528, + "learning_rate": 1.391001235257289e-05, + "loss": 0.0702, "step": 17995 }, { - "epoch": 0.45691077547912173, - "grad_norm": 0.490213543176651, - "learning_rate": 1.6953928163472524e-05, - "loss": 0.1061, + "epoch": 0.9137519671049292, + "grad_norm": 0.37820661067962646, + "learning_rate": 1.3908320219300472e-05, + "loss": 0.0766, "step": 18000 }, { - "epoch": 0.457037695138977, - "grad_norm": 0.7491369247436523, - "learning_rate": 1.695308203240682e-05, - "loss": 0.115, + "epoch": 0.9140057870957916, + "grad_norm": 0.35360532999038696, + "learning_rate": 1.3906628086028058e-05, + "loss": 0.0726, "step": 18005 }, { - "epoch": 0.45716461479883236, - "grad_norm": 0.628951907157898, - "learning_rate": 1.6952235901341118e-05, - "loss": 0.1219, + "epoch": 0.9142596070866541, + "grad_norm": 0.3606135845184326, + "learning_rate": 1.3904935952755641e-05, + "loss": 0.0647, "step": 18010 }, { - "epoch": 0.45729153445868764, - "grad_norm": 0.841793954372406, - "learning_rate": 1.6951389770275416e-05, - "loss": 0.109, + "epoch": 0.9145134270775166, + "grad_norm": 0.3642440438270569, + "learning_rate": 1.3903243819483223e-05, + "loss": 0.0769, "step": 18015 }, { - "epoch": 0.457418454118543, - "grad_norm": 0.7292496562004089, - "learning_rate": 1.6950543639209715e-05, - "loss": 0.1134, + "epoch": 0.9147672470683791, + "grad_norm": 0.49041110277175903, + "learning_rate": 1.3901551686210808e-05, + "loss": 0.0738, "step": 18020 }, { - "epoch": 0.45754537377839827, - "grad_norm": 0.4222528338432312, - "learning_rate": 1.6949697508144013e-05, - "loss": 0.0884, + "epoch": 0.9150210670592416, + "grad_norm": 0.38325151801109314, + "learning_rate": 1.389985955293839e-05, + "loss": 0.062, "step": 18025 }, { - "epoch": 0.4576722934382536, - "grad_norm": 1.4051499366760254, - "learning_rate": 1.694885137707831e-05, - "loss": 0.1022, + "epoch": 0.9152748870501041, + "grad_norm": 0.4725360870361328, + "learning_rate": 1.3898167419665974e-05, + "loss": 0.0713, "step": 18030 }, { - "epoch": 0.4577992130981089, - "grad_norm": 0.6308716535568237, - "learning_rate": 1.694800524601261e-05, - "loss": 0.1048, + "epoch": 0.9155287070409666, + "grad_norm": 0.7474817037582397, + "learning_rate": 1.3896475286393559e-05, + "loss": 0.0604, "step": 18035 }, { - "epoch": 0.45792613275796423, - "grad_norm": 0.8664820194244385, - "learning_rate": 1.6947159114946908e-05, - "loss": 0.123, + "epoch": 0.915782527031829, + "grad_norm": 0.3981219530105591, + "learning_rate": 1.389478315312114e-05, + "loss": 0.0713, "step": 18040 }, { - "epoch": 0.4580530524178195, - "grad_norm": 0.5411807894706726, - "learning_rate": 1.6946312983881203e-05, - "loss": 0.1168, + "epoch": 0.9160363470226915, + "grad_norm": 0.32454854249954224, + "learning_rate": 1.3893091019848724e-05, + "loss": 0.0672, "step": 18045 }, { - "epoch": 0.45817997207767486, - "grad_norm": 0.648794949054718, - "learning_rate": 1.69454668528155e-05, - "loss": 0.0959, + "epoch": 0.916290167013554, + "grad_norm": 0.3997170329093933, + "learning_rate": 1.3891398886576308e-05, + "loss": 0.0828, "step": 18050 }, { - "epoch": 0.45830689173753014, - "grad_norm": 0.5803685188293457, - "learning_rate": 1.69446207217498e-05, - "loss": 0.119, + "epoch": 0.9165439870044164, + "grad_norm": 0.28583142161369324, + "learning_rate": 1.3889706753303891e-05, + "loss": 0.0685, "step": 18055 }, { - "epoch": 0.4584338113973855, - "grad_norm": 0.6222162842750549, - "learning_rate": 1.69437745906841e-05, - "loss": 0.0848, + "epoch": 0.9167978069952789, + "grad_norm": 0.48778054118156433, + "learning_rate": 1.3888014620031477e-05, + "loss": 0.0628, "step": 18060 }, { - "epoch": 0.45856073105724077, - "grad_norm": 0.47328075766563416, - "learning_rate": 1.6942928459618397e-05, - "loss": 0.1064, + "epoch": 0.9170516269861414, + "grad_norm": 0.38761603832244873, + "learning_rate": 1.3886322486759058e-05, + "loss": 0.0718, "step": 18065 }, { - "epoch": 0.45868765071709605, - "grad_norm": 0.5010561347007751, - "learning_rate": 1.6942082328552695e-05, - "loss": 0.0853, + "epoch": 0.9173054469770039, + "grad_norm": 0.5895673632621765, + "learning_rate": 1.3884630353486642e-05, + "loss": 0.0774, "step": 18070 }, { - "epoch": 0.4588145703769514, - "grad_norm": 0.7321553826332092, - "learning_rate": 1.6941236197486994e-05, - "loss": 0.0938, + "epoch": 0.9175592669678664, + "grad_norm": 0.30503496527671814, + "learning_rate": 1.3882938220214225e-05, + "loss": 0.0607, "step": 18075 }, { - "epoch": 0.4589414900368067, - "grad_norm": 0.7831577062606812, - "learning_rate": 1.6940390066421292e-05, - "loss": 0.1109, + "epoch": 0.9178130869587289, + "grad_norm": 0.4478660225868225, + "learning_rate": 1.3881246086941809e-05, + "loss": 0.0708, "step": 18080 }, { - "epoch": 0.459068409696662, - "grad_norm": 0.5651807188987732, - "learning_rate": 1.693954393535559e-05, - "loss": 0.096, + "epoch": 0.9180669069495914, + "grad_norm": 0.509867787361145, + "learning_rate": 1.3879553953669391e-05, + "loss": 0.0738, "step": 18085 }, { - "epoch": 0.4591953293565173, - "grad_norm": 0.9590626358985901, - "learning_rate": 1.6938697804289885e-05, - "loss": 0.1151, + "epoch": 0.9183207269404539, + "grad_norm": 1.060240387916565, + "learning_rate": 1.3877861820396976e-05, + "loss": 0.0762, "step": 18090 }, { - "epoch": 0.45932224901637264, - "grad_norm": 0.3489951193332672, - "learning_rate": 1.6937851673224184e-05, - "loss": 0.1005, + "epoch": 0.9185745469313163, + "grad_norm": 0.31515824794769287, + "learning_rate": 1.387616968712456e-05, + "loss": 0.0693, "step": 18095 }, { - "epoch": 0.4594491686762279, - "grad_norm": 0.5368314981460571, - "learning_rate": 1.6937005542158482e-05, - "loss": 0.1083, + "epoch": 0.9188283669221788, + "grad_norm": 0.32389432191848755, + "learning_rate": 1.3874477553852141e-05, + "loss": 0.0649, "step": 18100 }, { - "epoch": 0.45957608833608327, - "grad_norm": 0.6405259966850281, - "learning_rate": 1.693615941109278e-05, - "loss": 0.086, + "epoch": 0.9190821869130412, + "grad_norm": 0.4039955139160156, + "learning_rate": 1.3872785420579727e-05, + "loss": 0.0619, "step": 18105 }, { - "epoch": 0.45970300799593855, - "grad_norm": 1.388687252998352, - "learning_rate": 1.6935313280027076e-05, - "loss": 0.1249, + "epoch": 0.9193360069039037, + "grad_norm": 0.29883208870887756, + "learning_rate": 1.3871093287307309e-05, + "loss": 0.0692, "step": 18110 }, { - "epoch": 0.4598299276557939, - "grad_norm": 0.45437854528427124, - "learning_rate": 1.6934467148961374e-05, - "loss": 0.0818, + "epoch": 0.9195898268947662, + "grad_norm": 0.49272438883781433, + "learning_rate": 1.3869401154034894e-05, + "loss": 0.0791, "step": 18115 }, { - "epoch": 0.4599568473156492, - "grad_norm": 0.5922515988349915, - "learning_rate": 1.6933621017895672e-05, - "loss": 0.0878, + "epoch": 0.9198436468856287, + "grad_norm": 0.6225094795227051, + "learning_rate": 1.3867709020762476e-05, + "loss": 0.0692, "step": 18120 }, { - "epoch": 0.4600837669755045, - "grad_norm": 0.6254956722259521, - "learning_rate": 1.693277488682997e-05, - "loss": 0.0969, + "epoch": 0.9200974668764912, + "grad_norm": 0.3257209062576294, + "learning_rate": 1.386601688749006e-05, + "loss": 0.0773, "step": 18125 }, { - "epoch": 0.4602106866353598, - "grad_norm": 0.952159583568573, - "learning_rate": 1.693192875576427e-05, - "loss": 0.0961, + "epoch": 0.9203512868673537, + "grad_norm": 0.5255199074745178, + "learning_rate": 1.3864324754217644e-05, + "loss": 0.0753, "step": 18130 }, { - "epoch": 0.46033760629521514, - "grad_norm": 0.8591480255126953, - "learning_rate": 1.6931082624698568e-05, - "loss": 0.1033, + "epoch": 0.9206051068582162, + "grad_norm": 0.42838171124458313, + "learning_rate": 1.3862632620945226e-05, + "loss": 0.0654, "step": 18135 }, { - "epoch": 0.4604645259550704, - "grad_norm": 0.602628231048584, - "learning_rate": 1.6930236493632866e-05, - "loss": 0.1001, + "epoch": 0.9208589268490787, + "grad_norm": 0.37420716881752014, + "learning_rate": 1.386094048767281e-05, + "loss": 0.0669, "step": 18140 }, { - "epoch": 0.46059144561492577, - "grad_norm": 0.918921947479248, - "learning_rate": 1.6929390362567164e-05, - "loss": 0.1112, + "epoch": 0.9211127468399412, + "grad_norm": 0.4622999429702759, + "learning_rate": 1.3859248354400393e-05, + "loss": 0.0679, "step": 18145 }, { - "epoch": 0.46071836527478105, - "grad_norm": 0.45466238260269165, - "learning_rate": 1.692854423150146e-05, - "loss": 0.095, + "epoch": 0.9213665668308036, + "grad_norm": 0.4860808849334717, + "learning_rate": 1.3857556221127977e-05, + "loss": 0.0785, "step": 18150 }, { - "epoch": 0.4608452849346364, - "grad_norm": 0.5515931248664856, - "learning_rate": 1.6927698100435758e-05, - "loss": 0.099, + "epoch": 0.921620386821666, + "grad_norm": 0.32169151306152344, + "learning_rate": 1.3855864087855562e-05, + "loss": 0.0637, "step": 18155 }, { - "epoch": 0.4609722045944917, - "grad_norm": 0.5007161498069763, - "learning_rate": 1.6926851969370056e-05, - "loss": 0.0837, + "epoch": 0.9218742068125285, + "grad_norm": 0.37703365087509155, + "learning_rate": 1.3854171954583144e-05, + "loss": 0.065, "step": 18160 }, { - "epoch": 0.461099124254347, - "grad_norm": 0.6181071400642395, - "learning_rate": 1.6926005838304355e-05, - "loss": 0.1175, + "epoch": 0.922128026803391, + "grad_norm": 0.3887845277786255, + "learning_rate": 1.3852479821310728e-05, + "loss": 0.0599, "step": 18165 }, { - "epoch": 0.4612260439142023, - "grad_norm": 0.5658477544784546, - "learning_rate": 1.6925159707238653e-05, - "loss": 0.0953, + "epoch": 0.9223818467942535, + "grad_norm": 0.35805755853652954, + "learning_rate": 1.3850787688038311e-05, + "loss": 0.0732, "step": 18170 }, { - "epoch": 0.46135296357405764, - "grad_norm": 0.7083274722099304, - "learning_rate": 1.692431357617295e-05, - "loss": 0.1022, + "epoch": 0.922635666785116, + "grad_norm": 0.33947518467903137, + "learning_rate": 1.3849095554765895e-05, + "loss": 0.0667, "step": 18175 }, { - "epoch": 0.4614798832339129, - "grad_norm": 0.9264853000640869, - "learning_rate": 1.692346744510725e-05, - "loss": 0.131, + "epoch": 0.9228894867759785, + "grad_norm": 0.4389743208885193, + "learning_rate": 1.3847403421493477e-05, + "loss": 0.0693, "step": 18180 }, { - "epoch": 0.46160680289376826, - "grad_norm": 0.5138913989067078, - "learning_rate": 1.6922621314041548e-05, - "loss": 0.0787, + "epoch": 0.923143306766841, + "grad_norm": 0.44879239797592163, + "learning_rate": 1.3845711288221062e-05, + "loss": 0.0618, "step": 18185 }, { - "epoch": 0.46173372255362355, - "grad_norm": 0.7340983152389526, - "learning_rate": 1.6921775182975843e-05, - "loss": 0.1132, + "epoch": 0.9233971267577035, + "grad_norm": 0.3365269601345062, + "learning_rate": 1.3844019154948645e-05, + "loss": 0.0759, "step": 18190 }, { - "epoch": 0.4618606422134789, - "grad_norm": 1.04453706741333, - "learning_rate": 1.692092905191014e-05, - "loss": 0.0823, + "epoch": 0.923650946748566, + "grad_norm": 0.24301739037036896, + "learning_rate": 1.3842327021676227e-05, + "loss": 0.0658, "step": 18195 }, { - "epoch": 0.4619875618733342, - "grad_norm": 0.5991907119750977, - "learning_rate": 1.692008292084444e-05, - "loss": 0.1056, + "epoch": 0.9239047667394285, + "grad_norm": 0.324535995721817, + "learning_rate": 1.3840634888403812e-05, + "loss": 0.0572, "step": 18200 }, { - "epoch": 0.4621144815331895, - "grad_norm": 0.6745870113372803, - "learning_rate": 1.691923678977874e-05, - "loss": 0.1058, + "epoch": 0.9241585867302908, + "grad_norm": 0.3125174641609192, + "learning_rate": 1.3838942755131394e-05, + "loss": 0.0705, "step": 18205 }, { - "epoch": 0.4622414011930448, - "grad_norm": 0.6864849328994751, - "learning_rate": 1.6918390658713037e-05, - "loss": 0.0979, + "epoch": 0.9244124067211533, + "grad_norm": 0.41187185049057007, + "learning_rate": 1.383725062185898e-05, + "loss": 0.0654, "step": 18210 }, { - "epoch": 0.46236832085290014, - "grad_norm": 0.6246083378791809, - "learning_rate": 1.6917544527647335e-05, - "loss": 0.1096, + "epoch": 0.9246662267120158, + "grad_norm": 0.4254433512687683, + "learning_rate": 1.3835558488586563e-05, + "loss": 0.0832, "step": 18215 }, { - "epoch": 0.4624952405127554, - "grad_norm": 0.6875829696655273, - "learning_rate": 1.6916698396581634e-05, - "loss": 0.1039, + "epoch": 0.9249200467028783, + "grad_norm": 0.37248459458351135, + "learning_rate": 1.3833866355314145e-05, + "loss": 0.0635, "step": 18220 }, { - "epoch": 0.46262216017261076, - "grad_norm": 0.45151928067207336, - "learning_rate": 1.6915852265515932e-05, - "loss": 0.0949, + "epoch": 0.9251738666937408, + "grad_norm": 0.34162911772727966, + "learning_rate": 1.383217422204173e-05, + "loss": 0.0626, "step": 18225 }, { - "epoch": 0.46274907983246605, - "grad_norm": 0.5502700209617615, - "learning_rate": 1.6915006134450227e-05, - "loss": 0.1068, + "epoch": 0.9254276866846033, + "grad_norm": 0.4066483676433563, + "learning_rate": 1.3830482088769312e-05, + "loss": 0.0745, "step": 18230 }, { - "epoch": 0.4628759994923214, - "grad_norm": 0.6354668140411377, - "learning_rate": 1.6914160003384525e-05, - "loss": 0.0978, + "epoch": 0.9256815066754658, + "grad_norm": 0.600093424320221, + "learning_rate": 1.3828789955496896e-05, + "loss": 0.0673, "step": 18235 }, { - "epoch": 0.4630029191521767, - "grad_norm": 0.6441617608070374, - "learning_rate": 1.6913313872318824e-05, - "loss": 0.1245, + "epoch": 0.9259353266663283, + "grad_norm": 0.47302451729774475, + "learning_rate": 1.382709782222448e-05, + "loss": 0.0653, "step": 18240 }, { - "epoch": 0.46312983881203196, - "grad_norm": 0.447865754365921, - "learning_rate": 1.6912467741253122e-05, - "loss": 0.1072, + "epoch": 0.9261891466571908, + "grad_norm": 0.41447651386260986, + "learning_rate": 1.3825405688952063e-05, + "loss": 0.0709, "step": 18245 }, { - "epoch": 0.4632567584718873, - "grad_norm": 0.49521777033805847, - "learning_rate": 1.6911621610187417e-05, - "loss": 0.092, + "epoch": 0.9264429666480531, + "grad_norm": 0.31857216358184814, + "learning_rate": 1.3823713555679648e-05, + "loss": 0.0702, "step": 18250 }, { - "epoch": 0.4633836781317426, - "grad_norm": 0.88885098695755, - "learning_rate": 1.6910775479121716e-05, - "loss": 0.109, + "epoch": 0.9266967866389156, + "grad_norm": 0.39774346351623535, + "learning_rate": 1.382202142240723e-05, + "loss": 0.0757, "step": 18255 }, { - "epoch": 0.4635105977915979, - "grad_norm": 0.8041297197341919, - "learning_rate": 1.6909929348056014e-05, - "loss": 0.0921, + "epoch": 0.9269506066297781, + "grad_norm": 0.3512285053730011, + "learning_rate": 1.3820329289134813e-05, + "loss": 0.0664, "step": 18260 }, { - "epoch": 0.4636375174514532, - "grad_norm": 0.6937339901924133, - "learning_rate": 1.6909083216990313e-05, - "loss": 0.0889, + "epoch": 0.9272044266206406, + "grad_norm": 0.42229223251342773, + "learning_rate": 1.3818637155862398e-05, + "loss": 0.0662, "step": 18265 }, { - "epoch": 0.46376443711130855, - "grad_norm": 0.5631436109542847, - "learning_rate": 1.690823708592461e-05, - "loss": 0.1018, + "epoch": 0.9274582466115031, + "grad_norm": 0.4002987742424011, + "learning_rate": 1.381694502258998e-05, + "loss": 0.0679, "step": 18270 }, { - "epoch": 0.46389135677116383, - "grad_norm": 0.6517699956893921, - "learning_rate": 1.690739095485891e-05, - "loss": 0.0825, + "epoch": 0.9277120666023656, + "grad_norm": 0.4806728661060333, + "learning_rate": 1.3815252889317564e-05, + "loss": 0.0645, "step": 18275 }, { - "epoch": 0.46401827643101917, - "grad_norm": 0.4982329308986664, - "learning_rate": 1.6906544823793208e-05, - "loss": 0.1189, + "epoch": 0.9279658865932281, + "grad_norm": 0.29817402362823486, + "learning_rate": 1.3813560756045147e-05, + "loss": 0.0686, "step": 18280 }, { - "epoch": 0.46414519609087446, - "grad_norm": 0.41290906071662903, - "learning_rate": 1.6905698692727506e-05, - "loss": 0.0851, + "epoch": 0.9282197065840906, + "grad_norm": 0.4930202066898346, + "learning_rate": 1.3811868622772731e-05, + "loss": 0.0744, "step": 18285 }, { - "epoch": 0.4642721157507298, - "grad_norm": 0.5334296226501465, - "learning_rate": 1.69048525616618e-05, - "loss": 0.1103, + "epoch": 0.9284735265749531, + "grad_norm": 0.4029638171195984, + "learning_rate": 1.3810176489500313e-05, + "loss": 0.0637, "step": 18290 }, { - "epoch": 0.4643990354105851, - "grad_norm": 0.4386861026287079, - "learning_rate": 1.69040064305961e-05, - "loss": 0.1134, + "epoch": 0.9287273465658156, + "grad_norm": 0.45114755630493164, + "learning_rate": 1.3808484356227898e-05, + "loss": 0.0668, "step": 18295 }, { - "epoch": 0.4645259550704404, - "grad_norm": 0.7337814569473267, - "learning_rate": 1.6903160299530398e-05, - "loss": 0.1073, + "epoch": 0.928981166556678, + "grad_norm": 0.3982153832912445, + "learning_rate": 1.380679222295548e-05, + "loss": 0.0613, "step": 18300 }, { - "epoch": 0.4646528747302957, - "grad_norm": 0.7278926968574524, - "learning_rate": 1.6902314168464696e-05, - "loss": 0.0799, + "epoch": 0.9292349865475404, + "grad_norm": 0.3455192446708679, + "learning_rate": 1.3805100089683065e-05, + "loss": 0.0707, "step": 18305 }, { - "epoch": 0.46477979439015105, - "grad_norm": 0.4607621133327484, - "learning_rate": 1.6901468037398995e-05, - "loss": 0.1006, + "epoch": 0.9294888065384029, + "grad_norm": 0.3606429398059845, + "learning_rate": 1.3803407956410649e-05, + "loss": 0.0708, "step": 18310 }, { - "epoch": 0.46490671405000633, - "grad_norm": 0.5767455101013184, - "learning_rate": 1.6900621906333293e-05, - "loss": 0.1006, + "epoch": 0.9297426265292654, + "grad_norm": 0.45667099952697754, + "learning_rate": 1.380171582313823e-05, + "loss": 0.0642, "step": 18315 }, { - "epoch": 0.46503363370986167, - "grad_norm": 0.8062119483947754, - "learning_rate": 1.689977577526759e-05, - "loss": 0.086, + "epoch": 0.9299964465201279, + "grad_norm": 0.38719412684440613, + "learning_rate": 1.3800023689865816e-05, + "loss": 0.0648, "step": 18320 }, { - "epoch": 0.46516055336971696, - "grad_norm": 0.5781612396240234, - "learning_rate": 1.689892964420189e-05, - "loss": 0.0881, + "epoch": 0.9302502665109904, + "grad_norm": 0.5505213737487793, + "learning_rate": 1.3798331556593398e-05, + "loss": 0.0653, "step": 18325 }, { - "epoch": 0.4652874730295723, - "grad_norm": 0.37657222151756287, - "learning_rate": 1.6898083513136185e-05, - "loss": 0.0832, + "epoch": 0.9305040865018529, + "grad_norm": 0.5225023627281189, + "learning_rate": 1.3796639423320981e-05, + "loss": 0.0715, "step": 18330 }, { - "epoch": 0.4654143926894276, - "grad_norm": 0.8385258913040161, - "learning_rate": 1.6897237382070483e-05, - "loss": 0.0935, + "epoch": 0.9307579064927154, + "grad_norm": 0.2744523286819458, + "learning_rate": 1.3794947290048566e-05, + "loss": 0.0596, "step": 18335 }, { - "epoch": 0.4655413123492829, - "grad_norm": 0.691718339920044, - "learning_rate": 1.6896391251004782e-05, - "loss": 0.1035, + "epoch": 0.9310117264835779, + "grad_norm": 0.4220918118953705, + "learning_rate": 1.3793255156776148e-05, + "loss": 0.0703, "step": 18340 }, { - "epoch": 0.4656682320091382, - "grad_norm": 0.7118987441062927, - "learning_rate": 1.689554511993908e-05, - "loss": 0.0852, + "epoch": 0.9312655464744404, + "grad_norm": 0.4151606261730194, + "learning_rate": 1.3791563023503732e-05, + "loss": 0.0629, "step": 18345 }, { - "epoch": 0.46579515166899355, - "grad_norm": 0.4598201811313629, - "learning_rate": 1.689469898887338e-05, - "loss": 0.1068, + "epoch": 0.9315193664653028, + "grad_norm": 0.43558046221733093, + "learning_rate": 1.3789870890231315e-05, + "loss": 0.0554, "step": 18350 }, { - "epoch": 0.46592207132884883, - "grad_norm": 0.3606215715408325, - "learning_rate": 1.6893852857807677e-05, - "loss": 0.0797, + "epoch": 0.9317731864561652, + "grad_norm": 0.3388653099536896, + "learning_rate": 1.3788178756958899e-05, + "loss": 0.0702, "step": 18355 }, { - "epoch": 0.46604899098870417, - "grad_norm": 0.47170037031173706, - "learning_rate": 1.6893006726741975e-05, - "loss": 0.077, + "epoch": 0.9320270064470277, + "grad_norm": 0.5925946235656738, + "learning_rate": 1.3786486623686484e-05, + "loss": 0.0682, "step": 18360 }, { - "epoch": 0.46617591064855946, - "grad_norm": 0.5751693844795227, - "learning_rate": 1.6892160595676274e-05, - "loss": 0.1, + "epoch": 0.9322808264378902, + "grad_norm": 1.0079941749572754, + "learning_rate": 1.3784794490414066e-05, + "loss": 0.0663, "step": 18365 }, { - "epoch": 0.4663028303084148, - "grad_norm": 0.8554493188858032, - "learning_rate": 1.689131446461057e-05, - "loss": 0.1206, + "epoch": 0.9325346464287527, + "grad_norm": 0.38128307461738586, + "learning_rate": 1.378310235714165e-05, + "loss": 0.0671, "step": 18370 }, { - "epoch": 0.4664297499682701, - "grad_norm": 0.5989235043525696, - "learning_rate": 1.6890468333544867e-05, - "loss": 0.0914, + "epoch": 0.9327884664196152, + "grad_norm": 0.3320787847042084, + "learning_rate": 1.3781410223869233e-05, + "loss": 0.0654, "step": 18375 }, { - "epoch": 0.4665566696281254, - "grad_norm": 0.35792306065559387, - "learning_rate": 1.6889622202479166e-05, - "loss": 0.0827, + "epoch": 0.9330422864104777, + "grad_norm": 0.2928427755832672, + "learning_rate": 1.3779718090596817e-05, + "loss": 0.0761, "step": 18380 }, { - "epoch": 0.4666835892879807, - "grad_norm": 0.7403062582015991, - "learning_rate": 1.6888776071413464e-05, - "loss": 0.0922, + "epoch": 0.9332961064013402, + "grad_norm": 0.4131205081939697, + "learning_rate": 1.3778025957324398e-05, + "loss": 0.0728, "step": 18385 }, { - "epoch": 0.46681050894783604, - "grad_norm": 0.6503852605819702, - "learning_rate": 1.688792994034776e-05, - "loss": 0.1161, + "epoch": 0.9335499263922027, + "grad_norm": 0.38338300585746765, + "learning_rate": 1.3776333824051984e-05, + "loss": 0.0645, "step": 18390 }, { - "epoch": 0.46693742860769133, - "grad_norm": 0.6267450451850891, - "learning_rate": 1.6887083809282057e-05, - "loss": 0.0922, + "epoch": 0.9338037463830652, + "grad_norm": 0.387997031211853, + "learning_rate": 1.3774641690779567e-05, + "loss": 0.0662, "step": 18395 }, { - "epoch": 0.46706434826754667, - "grad_norm": 0.4117092192173004, - "learning_rate": 1.6886237678216356e-05, - "loss": 0.1004, + "epoch": 0.9340575663739276, + "grad_norm": 0.384552925825119, + "learning_rate": 1.377294955750715e-05, + "loss": 0.0722, "step": 18400 }, { - "epoch": 0.46719126792740195, - "grad_norm": 0.7530947327613831, - "learning_rate": 1.6885391547150654e-05, - "loss": 0.0976, + "epoch": 0.93431138636479, + "grad_norm": 0.36367639899253845, + "learning_rate": 1.3771257424234734e-05, + "loss": 0.0649, "step": 18405 }, { - "epoch": 0.46731818758725724, - "grad_norm": 0.6101336479187012, - "learning_rate": 1.6884545416084953e-05, - "loss": 0.1144, + "epoch": 0.9345652063556525, + "grad_norm": 0.7512829303741455, + "learning_rate": 1.3769565290962316e-05, + "loss": 0.0625, "step": 18410 }, { - "epoch": 0.4674451072471126, - "grad_norm": 0.7076879143714905, - "learning_rate": 1.688369928501925e-05, - "loss": 0.1087, + "epoch": 0.934819026346515, + "grad_norm": 0.38222187757492065, + "learning_rate": 1.3767873157689901e-05, + "loss": 0.072, "step": 18415 }, { - "epoch": 0.46757202690696786, - "grad_norm": 0.6824182271957397, - "learning_rate": 1.688285315395355e-05, - "loss": 0.093, + "epoch": 0.9350728463373775, + "grad_norm": 0.6945295929908752, + "learning_rate": 1.3766181024417485e-05, + "loss": 0.0635, "step": 18420 }, { - "epoch": 0.4676989465668232, - "grad_norm": 0.6147491931915283, - "learning_rate": 1.6882007022887848e-05, - "loss": 0.1093, + "epoch": 0.93532666632824, + "grad_norm": 0.4692384600639343, + "learning_rate": 1.3764488891145067e-05, + "loss": 0.0697, "step": 18425 }, { - "epoch": 0.4678258662266785, - "grad_norm": 0.42010927200317383, - "learning_rate": 1.6881160891822143e-05, - "loss": 0.1046, + "epoch": 0.9355804863191025, + "grad_norm": 0.3983794152736664, + "learning_rate": 1.3762796757872652e-05, + "loss": 0.0651, "step": 18430 }, { - "epoch": 0.46795278588653383, - "grad_norm": 0.5939725637435913, - "learning_rate": 1.688031476075644e-05, - "loss": 0.0974, + "epoch": 0.935834306309965, + "grad_norm": 0.450428307056427, + "learning_rate": 1.3761104624600234e-05, + "loss": 0.0644, "step": 18435 }, { - "epoch": 0.4680797055463891, - "grad_norm": 0.3769335448741913, - "learning_rate": 1.687946862969074e-05, - "loss": 0.1307, + "epoch": 0.9360881263008275, + "grad_norm": 0.26266250014305115, + "learning_rate": 1.3759412491327817e-05, + "loss": 0.0562, "step": 18440 }, { - "epoch": 0.46820662520624445, - "grad_norm": 0.5621718764305115, - "learning_rate": 1.6878622498625038e-05, - "loss": 0.0873, + "epoch": 0.93634194629169, + "grad_norm": 0.4074264168739319, + "learning_rate": 1.3757720358055403e-05, + "loss": 0.0627, "step": 18445 }, { - "epoch": 0.46833354486609974, - "grad_norm": 0.8378008604049683, - "learning_rate": 1.6877776367559336e-05, - "loss": 0.0912, + "epoch": 0.9365957662825524, + "grad_norm": 0.41876572370529175, + "learning_rate": 1.3756028224782985e-05, + "loss": 0.0605, "step": 18450 }, { - "epoch": 0.4684604645259551, - "grad_norm": 0.44689279794692993, - "learning_rate": 1.6876930236493635e-05, - "loss": 0.071, + "epoch": 0.9368495862734149, + "grad_norm": 0.38248318433761597, + "learning_rate": 1.375433609151057e-05, + "loss": 0.0615, "step": 18455 }, { - "epoch": 0.46858738418581036, - "grad_norm": 0.5002709031105042, - "learning_rate": 1.6876084105427933e-05, - "loss": 0.0924, + "epoch": 0.9371034062642773, + "grad_norm": 0.38324958086013794, + "learning_rate": 1.3752643958238152e-05, + "loss": 0.0664, "step": 18460 }, { - "epoch": 0.4687143038456657, - "grad_norm": 0.48269546031951904, - "learning_rate": 1.687523797436223e-05, - "loss": 0.0987, + "epoch": 0.9373572262551398, + "grad_norm": 0.5330802202224731, + "learning_rate": 1.3750951824965735e-05, + "loss": 0.0593, "step": 18465 }, { - "epoch": 0.468841223505521, - "grad_norm": 0.7411462068557739, - "learning_rate": 1.6874391843296527e-05, - "loss": 0.096, + "epoch": 0.9376110462460023, + "grad_norm": 1.0437511205673218, + "learning_rate": 1.374925969169332e-05, + "loss": 0.064, "step": 18470 }, { - "epoch": 0.4689681431653763, - "grad_norm": 0.460835725069046, - "learning_rate": 1.6873545712230825e-05, - "loss": 0.0971, + "epoch": 0.9378648662368648, + "grad_norm": 0.9003655910491943, + "learning_rate": 1.3747567558420902e-05, + "loss": 0.0683, "step": 18475 }, { - "epoch": 0.4690950628252316, - "grad_norm": 0.7411019802093506, - "learning_rate": 1.6872699581165123e-05, - "loss": 0.1068, + "epoch": 0.9381186862277273, + "grad_norm": 0.32290807366371155, + "learning_rate": 1.3745875425148484e-05, + "loss": 0.068, "step": 18480 }, { - "epoch": 0.46922198248508695, - "grad_norm": 0.6132268309593201, - "learning_rate": 1.6871853450099422e-05, - "loss": 0.1052, + "epoch": 0.9383725062185898, + "grad_norm": 0.6567671895027161, + "learning_rate": 1.374418329187607e-05, + "loss": 0.0661, "step": 18485 }, { - "epoch": 0.46934890214494224, - "grad_norm": 0.4440930485725403, - "learning_rate": 1.687100731903372e-05, - "loss": 0.119, + "epoch": 0.9386263262094523, + "grad_norm": 0.35361889004707336, + "learning_rate": 1.3742491158603653e-05, + "loss": 0.0689, "step": 18490 }, { - "epoch": 0.4694758218047976, - "grad_norm": 0.6292732357978821, - "learning_rate": 1.687016118796802e-05, - "loss": 0.0809, + "epoch": 0.9388801462003148, + "grad_norm": 0.3336738049983978, + "learning_rate": 1.3740799025331235e-05, + "loss": 0.0594, "step": 18495 }, { - "epoch": 0.46960274146465286, - "grad_norm": 0.4338931143283844, - "learning_rate": 1.6869315056902317e-05, - "loss": 0.1004, + "epoch": 0.9391339661911772, + "grad_norm": 0.3773046135902405, + "learning_rate": 1.373910689205882e-05, + "loss": 0.0704, "step": 18500 }, { - "epoch": 0.4697296611245082, - "grad_norm": 1.315097689628601, - "learning_rate": 1.6868468925836615e-05, - "loss": 0.1164, + "epoch": 0.9393877861820397, + "grad_norm": 0.3520534038543701, + "learning_rate": 1.3737414758786402e-05, + "loss": 0.0704, "step": 18505 }, { - "epoch": 0.4698565807843635, - "grad_norm": 0.4814586639404297, - "learning_rate": 1.686762279477091e-05, - "loss": 0.1023, + "epoch": 0.9396416061729022, + "grad_norm": 0.3274901807308197, + "learning_rate": 1.3735722625513987e-05, + "loss": 0.071, "step": 18510 }, { - "epoch": 0.4699835004442188, - "grad_norm": 0.9216117858886719, - "learning_rate": 1.686677666370521e-05, - "loss": 0.0922, + "epoch": 0.9398954261637646, + "grad_norm": 0.25946927070617676, + "learning_rate": 1.373403049224157e-05, + "loss": 0.0698, "step": 18515 }, { - "epoch": 0.4701104201040741, - "grad_norm": 0.587454080581665, - "learning_rate": 1.6865930532639507e-05, - "loss": 0.0782, + "epoch": 0.9401492461546271, + "grad_norm": 0.5171555876731873, + "learning_rate": 1.3732338358969152e-05, + "loss": 0.0553, "step": 18520 }, { - "epoch": 0.47023733976392945, - "grad_norm": 0.5020370483398438, - "learning_rate": 1.6865084401573806e-05, - "loss": 0.1212, + "epoch": 0.9404030661454896, + "grad_norm": 0.32266131043434143, + "learning_rate": 1.3730646225696738e-05, + "loss": 0.0538, "step": 18525 }, { - "epoch": 0.47036425942378474, - "grad_norm": 0.6851378083229065, - "learning_rate": 1.68642382705081e-05, - "loss": 0.1039, + "epoch": 0.9406568861363521, + "grad_norm": 0.43915310502052307, + "learning_rate": 1.372895409242432e-05, + "loss": 0.0583, "step": 18530 }, { - "epoch": 0.4704911790836401, - "grad_norm": 0.8034664392471313, - "learning_rate": 1.68633921394424e-05, - "loss": 0.1005, + "epoch": 0.9409107061272146, + "grad_norm": 0.5458141565322876, + "learning_rate": 1.3727261959151903e-05, + "loss": 0.0679, "step": 18535 }, { - "epoch": 0.47061809874349536, - "grad_norm": 0.5177631378173828, - "learning_rate": 1.6862546008376698e-05, - "loss": 0.109, + "epoch": 0.9411645261180771, + "grad_norm": 0.33626362681388855, + "learning_rate": 1.3725569825879488e-05, + "loss": 0.0617, "step": 18540 }, { - "epoch": 0.4707450184033507, - "grad_norm": 0.7105624079704285, - "learning_rate": 1.6861699877310996e-05, - "loss": 0.0984, + "epoch": 0.9414183461089395, + "grad_norm": 0.3278324007987976, + "learning_rate": 1.372387769260707e-05, + "loss": 0.0599, "step": 18545 }, { - "epoch": 0.470871938063206, - "grad_norm": 0.5277933478355408, - "learning_rate": 1.6860853746245294e-05, - "loss": 0.1044, + "epoch": 0.941672166099802, + "grad_norm": 0.3287067115306854, + "learning_rate": 1.3722185559334655e-05, + "loss": 0.0635, "step": 18550 }, { - "epoch": 0.4709988577230613, - "grad_norm": 0.532352089881897, - "learning_rate": 1.6860007615179593e-05, - "loss": 0.0969, + "epoch": 0.9419259860906645, + "grad_norm": 0.3660200834274292, + "learning_rate": 1.3720493426062237e-05, + "loss": 0.055, "step": 18555 }, { - "epoch": 0.4711257773829166, - "grad_norm": 1.3218448162078857, - "learning_rate": 1.685916148411389e-05, - "loss": 0.0803, + "epoch": 0.942179806081527, + "grad_norm": 0.41835132241249084, + "learning_rate": 1.371880129278982e-05, + "loss": 0.0736, "step": 18560 }, { - "epoch": 0.47125269704277195, - "grad_norm": 0.7195568084716797, - "learning_rate": 1.685831535304819e-05, - "loss": 0.097, + "epoch": 0.9424336260723895, + "grad_norm": 0.44712701439857483, + "learning_rate": 1.3717109159517406e-05, + "loss": 0.0607, "step": 18565 }, { - "epoch": 0.47137961670262724, - "grad_norm": 0.8135544657707214, - "learning_rate": 1.6857469221982485e-05, - "loss": 0.0932, + "epoch": 0.9426874460632519, + "grad_norm": 0.33872178196907043, + "learning_rate": 1.3715417026244988e-05, + "loss": 0.0619, "step": 18570 }, { - "epoch": 0.4715065363624826, - "grad_norm": 0.547103226184845, - "learning_rate": 1.6856623090916783e-05, - "loss": 0.0939, + "epoch": 0.9429412660541144, + "grad_norm": 0.6011540293693542, + "learning_rate": 1.3713724892972571e-05, + "loss": 0.0646, "step": 18575 }, { - "epoch": 0.47163345602233786, - "grad_norm": 0.5264065861701965, - "learning_rate": 1.685577695985108e-05, - "loss": 0.1032, + "epoch": 0.9431950860449769, + "grad_norm": 0.5028777122497559, + "learning_rate": 1.3712032759700155e-05, + "loss": 0.0685, "step": 18580 }, { - "epoch": 0.47176037568219314, - "grad_norm": 0.36396318674087524, - "learning_rate": 1.685493082878538e-05, - "loss": 0.1028, + "epoch": 0.9434489060358394, + "grad_norm": 0.595824122428894, + "learning_rate": 1.3710340626427739e-05, + "loss": 0.0637, "step": 18585 }, { - "epoch": 0.4718872953420485, - "grad_norm": 0.661853015422821, - "learning_rate": 1.6854084697719678e-05, - "loss": 0.1117, + "epoch": 0.9437027260267019, + "grad_norm": 0.33471375703811646, + "learning_rate": 1.370864849315532e-05, + "loss": 0.0629, "step": 18590 }, { - "epoch": 0.47201421500190377, - "grad_norm": 1.0029263496398926, - "learning_rate": 1.6853238566653977e-05, - "loss": 0.1052, + "epoch": 0.9439565460175643, + "grad_norm": 0.3110968768596649, + "learning_rate": 1.3706956359882906e-05, + "loss": 0.0587, "step": 18595 }, { - "epoch": 0.4721411346617591, - "grad_norm": 0.6259196400642395, - "learning_rate": 1.6852392435588275e-05, - "loss": 0.0876, + "epoch": 0.9442103660084268, + "grad_norm": 0.303251713514328, + "learning_rate": 1.370526422661049e-05, + "loss": 0.0684, "step": 18600 }, { - "epoch": 0.4722680543216144, - "grad_norm": 1.0778429508209229, - "learning_rate": 1.6851546304522573e-05, - "loss": 0.104, + "epoch": 0.9444641859992893, + "grad_norm": 0.5813922882080078, + "learning_rate": 1.3703572093338073e-05, + "loss": 0.0623, "step": 18605 }, { - "epoch": 0.47239497398146973, - "grad_norm": 0.5674114227294922, - "learning_rate": 1.6850700173456872e-05, - "loss": 0.0935, + "epoch": 0.9447180059901518, + "grad_norm": 0.4020741283893585, + "learning_rate": 1.3701879960065656e-05, + "loss": 0.065, "step": 18610 }, { - "epoch": 0.472521893641325, - "grad_norm": 0.5380696654319763, - "learning_rate": 1.6849854042391167e-05, - "loss": 0.0873, + "epoch": 0.9449718259810143, + "grad_norm": 0.504642903804779, + "learning_rate": 1.3700187826793238e-05, + "loss": 0.0552, "step": 18615 }, { - "epoch": 0.47264881330118036, - "grad_norm": 0.5506348609924316, - "learning_rate": 1.6849007911325465e-05, - "loss": 0.104, + "epoch": 0.9452256459718767, + "grad_norm": 0.4510534703731537, + "learning_rate": 1.3698495693520823e-05, + "loss": 0.0653, "step": 18620 }, { - "epoch": 0.47277573296103564, - "grad_norm": 0.9700071811676025, - "learning_rate": 1.6848161780259764e-05, - "loss": 0.0904, + "epoch": 0.9454794659627392, + "grad_norm": 0.40231838822364807, + "learning_rate": 1.3696803560248407e-05, + "loss": 0.0604, "step": 18625 }, { - "epoch": 0.472902652620891, - "grad_norm": 0.513697624206543, - "learning_rate": 1.6847315649194062e-05, - "loss": 0.0891, + "epoch": 0.9457332859536017, + "grad_norm": 0.303325891494751, + "learning_rate": 1.3695111426975989e-05, + "loss": 0.0552, "step": 18630 }, { - "epoch": 0.47302957228074627, - "grad_norm": 0.7445078492164612, - "learning_rate": 1.684646951812836e-05, - "loss": 0.0936, + "epoch": 0.9459871059444642, + "grad_norm": 0.2996688187122345, + "learning_rate": 1.3693419293703574e-05, + "loss": 0.0658, "step": 18635 }, { - "epoch": 0.4731564919406016, - "grad_norm": 0.6322034597396851, - "learning_rate": 1.684562338706266e-05, - "loss": 0.0946, + "epoch": 0.9462409259353267, + "grad_norm": 0.57343989610672, + "learning_rate": 1.3691727160431156e-05, + "loss": 0.0678, "step": 18640 }, { - "epoch": 0.4732834116004569, - "grad_norm": 0.6050710678100586, - "learning_rate": 1.6844777255996957e-05, - "loss": 0.0857, + "epoch": 0.9464947459261891, + "grad_norm": 0.36376577615737915, + "learning_rate": 1.3690035027158741e-05, + "loss": 0.072, "step": 18645 }, { - "epoch": 0.47341033126031223, - "grad_norm": 0.7068480253219604, - "learning_rate": 1.6843931124931256e-05, - "loss": 0.0937, + "epoch": 0.9467485659170516, + "grad_norm": 0.4058065414428711, + "learning_rate": 1.3688342893886325e-05, + "loss": 0.0642, "step": 18650 }, { - "epoch": 0.4735372509201675, - "grad_norm": 0.640339732170105, - "learning_rate": 1.684308499386555e-05, - "loss": 0.1008, + "epoch": 0.9470023859079141, + "grad_norm": 0.5225498676300049, + "learning_rate": 1.3686650760613906e-05, + "loss": 0.0687, "step": 18655 }, { - "epoch": 0.47366417058002286, - "grad_norm": 0.4708218276500702, - "learning_rate": 1.684223886279985e-05, - "loss": 0.1039, + "epoch": 0.9472562058987766, + "grad_norm": 0.3555619716644287, + "learning_rate": 1.3684958627341492e-05, + "loss": 0.0575, "step": 18660 }, { - "epoch": 0.47379109023987814, - "grad_norm": 0.5731831789016724, - "learning_rate": 1.6841392731734147e-05, - "loss": 0.1143, + "epoch": 0.9475100258896391, + "grad_norm": 0.32786718010902405, + "learning_rate": 1.3683266494069074e-05, + "loss": 0.0603, "step": 18665 }, { - "epoch": 0.4739180098997335, - "grad_norm": 0.4256850779056549, - "learning_rate": 1.6840546600668446e-05, - "loss": 0.0985, + "epoch": 0.9477638458805016, + "grad_norm": 0.497963547706604, + "learning_rate": 1.3681574360796657e-05, + "loss": 0.0707, "step": 18670 }, { - "epoch": 0.47404492955958877, - "grad_norm": 0.547997236251831, - "learning_rate": 1.683970046960274e-05, - "loss": 0.0875, + "epoch": 0.948017665871364, + "grad_norm": 0.3656896650791168, + "learning_rate": 1.3679882227524242e-05, + "loss": 0.0636, "step": 18675 }, { - "epoch": 0.4741718492194441, - "grad_norm": 0.5757945775985718, - "learning_rate": 1.683885433853704e-05, - "loss": 0.0873, + "epoch": 0.9482714858622265, + "grad_norm": 0.38868141174316406, + "learning_rate": 1.3678190094251824e-05, + "loss": 0.0619, "step": 18680 }, { - "epoch": 0.4742987688792994, - "grad_norm": 0.5529053807258606, - "learning_rate": 1.6838008207471338e-05, - "loss": 0.1079, + "epoch": 0.948525305853089, + "grad_norm": 0.4414658844470978, + "learning_rate": 1.3676497960979406e-05, + "loss": 0.0726, "step": 18685 }, { - "epoch": 0.47442568853915473, - "grad_norm": 0.5818985104560852, - "learning_rate": 1.6837162076405636e-05, - "loss": 0.1047, + "epoch": 0.9487791258439515, + "grad_norm": 0.38483738899230957, + "learning_rate": 1.3674805827706991e-05, + "loss": 0.068, "step": 18690 }, { - "epoch": 0.47455260819901, - "grad_norm": 0.4426269829273224, - "learning_rate": 1.6836315945339934e-05, - "loss": 0.102, + "epoch": 0.9490329458348139, + "grad_norm": 0.33117255568504333, + "learning_rate": 1.3673113694434575e-05, + "loss": 0.0637, "step": 18695 }, { - "epoch": 0.47467952785886536, - "grad_norm": 0.9694790840148926, - "learning_rate": 1.6835469814274233e-05, - "loss": 0.1034, + "epoch": 0.9492867658256764, + "grad_norm": 0.633201539516449, + "learning_rate": 1.367142156116216e-05, + "loss": 0.0634, "step": 18700 }, { - "epoch": 0.47480644751872064, - "grad_norm": 0.7920471429824829, - "learning_rate": 1.683462368320853e-05, - "loss": 0.1069, + "epoch": 0.9495405858165389, + "grad_norm": 0.4020320177078247, + "learning_rate": 1.3669729427889742e-05, + "loss": 0.0544, "step": 18705 }, { - "epoch": 0.474933367178576, - "grad_norm": 1.0693694353103638, - "learning_rate": 1.683377755214283e-05, - "loss": 0.096, + "epoch": 0.9497944058074014, + "grad_norm": 0.42026662826538086, + "learning_rate": 1.3668037294617324e-05, + "loss": 0.0612, "step": 18710 }, { - "epoch": 0.47506028683843127, - "grad_norm": 0.4640830457210541, - "learning_rate": 1.6832931421077125e-05, - "loss": 0.0884, + "epoch": 0.9500482257982639, + "grad_norm": 2.030712842941284, + "learning_rate": 1.3666345161344909e-05, + "loss": 0.0656, "step": 18715 }, { - "epoch": 0.4751872064982866, - "grad_norm": 2.663846254348755, - "learning_rate": 1.6832085290011423e-05, - "loss": 0.0762, + "epoch": 0.9503020457891264, + "grad_norm": 0.44346386194229126, + "learning_rate": 1.3664653028072493e-05, + "loss": 0.0668, "step": 18720 }, { - "epoch": 0.4753141261581419, - "grad_norm": 0.6790435910224915, - "learning_rate": 1.683123915894572e-05, - "loss": 0.0924, + "epoch": 0.9505558657799889, + "grad_norm": 0.41186749935150146, + "learning_rate": 1.3662960894800074e-05, + "loss": 0.0753, "step": 18725 }, { - "epoch": 0.47544104581799723, - "grad_norm": 0.6927458643913269, - "learning_rate": 1.683039302788002e-05, - "loss": 0.0909, + "epoch": 0.9508096857708513, + "grad_norm": 0.3317037522792816, + "learning_rate": 1.366126876152766e-05, + "loss": 0.0677, "step": 18730 }, { - "epoch": 0.4755679654778525, - "grad_norm": 0.4892197847366333, - "learning_rate": 1.6829546896814318e-05, - "loss": 0.0944, + "epoch": 0.9510635057617138, + "grad_norm": 0.3373609781265259, + "learning_rate": 1.3659576628255241e-05, + "loss": 0.0704, "step": 18735 }, { - "epoch": 0.47569488513770786, - "grad_norm": 0.8895225524902344, - "learning_rate": 1.6828700765748617e-05, - "loss": 0.1143, + "epoch": 0.9513173257525763, + "grad_norm": 0.3054006099700928, + "learning_rate": 1.3657884494982825e-05, + "loss": 0.0564, "step": 18740 }, { - "epoch": 0.47582180479756314, - "grad_norm": 1.2972551584243774, - "learning_rate": 1.6827854634682915e-05, - "loss": 0.1088, + "epoch": 0.9515711457434387, + "grad_norm": 0.3507018983364105, + "learning_rate": 1.365619236171041e-05, + "loss": 0.0682, "step": 18745 }, { - "epoch": 0.4759487244574185, - "grad_norm": 0.5416139364242554, - "learning_rate": 1.6827008503617213e-05, - "loss": 0.1051, + "epoch": 0.9518249657343012, + "grad_norm": 0.501518189907074, + "learning_rate": 1.3654500228437992e-05, + "loss": 0.0721, "step": 18750 }, { - "epoch": 0.47607564411727377, - "grad_norm": 0.9011152982711792, - "learning_rate": 1.682616237255151e-05, - "loss": 0.1002, + "epoch": 0.9520787857251637, + "grad_norm": 0.3085997998714447, + "learning_rate": 1.3652808095165577e-05, + "loss": 0.06, "step": 18755 }, { - "epoch": 0.47620256377712905, - "grad_norm": 0.8603365421295166, - "learning_rate": 1.6825316241485807e-05, - "loss": 0.1156, + "epoch": 0.9523326057160262, + "grad_norm": 0.700425386428833, + "learning_rate": 1.365111596189316e-05, + "loss": 0.0705, "step": 18760 }, { - "epoch": 0.4763294834369844, - "grad_norm": 0.5183418393135071, - "learning_rate": 1.6824470110420105e-05, - "loss": 0.0994, + "epoch": 0.9525864257068887, + "grad_norm": 0.3953579068183899, + "learning_rate": 1.3649423828620743e-05, + "loss": 0.0609, "step": 18765 }, { - "epoch": 0.4764564030968397, - "grad_norm": 0.44797518849372864, - "learning_rate": 1.6823623979354404e-05, - "loss": 0.095, + "epoch": 0.9528402456977512, + "grad_norm": 0.48132267594337463, + "learning_rate": 1.3647731695348328e-05, + "loss": 0.0641, "step": 18770 }, { - "epoch": 0.476583322756695, - "grad_norm": 0.7543697953224182, - "learning_rate": 1.6822777848288702e-05, - "loss": 0.0934, + "epoch": 0.9530940656886137, + "grad_norm": 0.38885220885276794, + "learning_rate": 1.364603956207591e-05, + "loss": 0.0662, "step": 18775 }, { - "epoch": 0.4767102424165503, - "grad_norm": 0.362051784992218, - "learning_rate": 1.6821931717223e-05, - "loss": 0.074, + "epoch": 0.9533478856794761, + "grad_norm": 0.30875876545906067, + "learning_rate": 1.3644347428803493e-05, + "loss": 0.0597, "step": 18780 }, { - "epoch": 0.47683716207640564, - "grad_norm": 0.45601779222488403, - "learning_rate": 1.68210855861573e-05, - "loss": 0.1244, + "epoch": 0.9536017056703386, + "grad_norm": 0.324533611536026, + "learning_rate": 1.3642655295531077e-05, + "loss": 0.0548, "step": 18785 }, { - "epoch": 0.4769640817362609, - "grad_norm": 0.49506595730781555, - "learning_rate": 1.6820239455091597e-05, - "loss": 0.0921, + "epoch": 0.9538555256612011, + "grad_norm": 0.33613696694374084, + "learning_rate": 1.364096316225866e-05, + "loss": 0.0601, "step": 18790 }, { - "epoch": 0.47709100139611627, - "grad_norm": 0.5831204652786255, - "learning_rate": 1.6819393324025892e-05, - "loss": 0.0943, + "epoch": 0.9541093456520635, + "grad_norm": 0.5050367712974548, + "learning_rate": 1.3639271028986246e-05, + "loss": 0.0637, "step": 18795 }, { - "epoch": 0.47721792105597155, - "grad_norm": 0.9161967039108276, - "learning_rate": 1.681854719296019e-05, - "loss": 0.086, + "epoch": 0.954363165642926, + "grad_norm": 0.2775934338569641, + "learning_rate": 1.3637578895713828e-05, + "loss": 0.072, "step": 18800 }, { - "epoch": 0.4773448407158269, - "grad_norm": 0.43863236904144287, - "learning_rate": 1.681770106189449e-05, - "loss": 0.1065, + "epoch": 0.9546169856337885, + "grad_norm": 0.33112195134162903, + "learning_rate": 1.3635886762441411e-05, + "loss": 0.0624, "step": 18805 }, { - "epoch": 0.4774717603756822, - "grad_norm": 0.521542489528656, - "learning_rate": 1.6816854930828788e-05, - "loss": 0.0808, + "epoch": 0.954870805624651, + "grad_norm": 0.4370911121368408, + "learning_rate": 1.3634194629168995e-05, + "loss": 0.0684, "step": 18810 }, { - "epoch": 0.4775986800355375, - "grad_norm": 0.6336131691932678, - "learning_rate": 1.6816008799763083e-05, - "loss": 0.1087, + "epoch": 0.9551246256155135, + "grad_norm": 0.2732505798339844, + "learning_rate": 1.3632502495896578e-05, + "loss": 0.0636, "step": 18815 }, { - "epoch": 0.4777255996953928, - "grad_norm": 0.48803991079330444, - "learning_rate": 1.681516266869738e-05, - "loss": 0.1135, + "epoch": 0.955378445606376, + "grad_norm": 0.45320791006088257, + "learning_rate": 1.363081036262416e-05, + "loss": 0.0745, "step": 18820 }, { - "epoch": 0.47785251935524814, - "grad_norm": 0.3910767138004303, - "learning_rate": 1.681431653763168e-05, - "loss": 0.0843, + "epoch": 0.9556322655972385, + "grad_norm": 0.44382214546203613, + "learning_rate": 1.3629118229351745e-05, + "loss": 0.0564, "step": 18825 }, { - "epoch": 0.4779794390151034, - "grad_norm": 1.2619826793670654, - "learning_rate": 1.6813470406565978e-05, - "loss": 0.102, + "epoch": 0.955886085588101, + "grad_norm": 0.33199283480644226, + "learning_rate": 1.3627426096079329e-05, + "loss": 0.069, "step": 18830 }, { - "epoch": 0.47810635867495876, - "grad_norm": 0.5977516174316406, - "learning_rate": 1.6812624275500276e-05, - "loss": 0.1012, + "epoch": 0.9561399055789634, + "grad_norm": 0.48582470417022705, + "learning_rate": 1.362573396280691e-05, + "loss": 0.0689, "step": 18835 }, { - "epoch": 0.47823327833481405, - "grad_norm": 0.4186924993991852, - "learning_rate": 1.6811778144434575e-05, - "loss": 0.0955, + "epoch": 0.9563937255698258, + "grad_norm": 0.4178053140640259, + "learning_rate": 1.3624041829534496e-05, + "loss": 0.0692, "step": 18840 }, { - "epoch": 0.4783601979946694, - "grad_norm": 0.6258871555328369, - "learning_rate": 1.6810932013368873e-05, - "loss": 0.1065, + "epoch": 0.9566475455606883, + "grad_norm": 0.39388084411621094, + "learning_rate": 1.3622349696262078e-05, + "loss": 0.0658, "step": 18845 }, { - "epoch": 0.4784871176545247, - "grad_norm": 0.5300717353820801, - "learning_rate": 1.681008588230317e-05, - "loss": 0.0832, + "epoch": 0.9569013655515508, + "grad_norm": 0.3513961136341095, + "learning_rate": 1.3620657562989663e-05, + "loss": 0.0729, "step": 18850 }, { - "epoch": 0.47861403731438, - "grad_norm": 0.6829471588134766, - "learning_rate": 1.6809239751237466e-05, - "loss": 0.0864, + "epoch": 0.9571551855424133, + "grad_norm": 0.31259411573410034, + "learning_rate": 1.3618965429717247e-05, + "loss": 0.0679, "step": 18855 }, { - "epoch": 0.4787409569742353, - "grad_norm": 1.1057380437850952, - "learning_rate": 1.6808393620171765e-05, - "loss": 0.1032, + "epoch": 0.9574090055332758, + "grad_norm": 0.5756545066833496, + "learning_rate": 1.3617273296444828e-05, + "loss": 0.0665, "step": 18860 }, { - "epoch": 0.47886787663409064, - "grad_norm": 0.4304085671901703, - "learning_rate": 1.6807547489106063e-05, - "loss": 0.0845, + "epoch": 0.9576628255241383, + "grad_norm": 0.5503848791122437, + "learning_rate": 1.3615581163172414e-05, + "loss": 0.0535, "step": 18865 }, { - "epoch": 0.4789947962939459, - "grad_norm": 0.536396324634552, - "learning_rate": 1.680670135804036e-05, - "loss": 0.1123, + "epoch": 0.9579166455150008, + "grad_norm": 0.3748461604118347, + "learning_rate": 1.3613889029899996e-05, + "loss": 0.07, "step": 18870 }, { - "epoch": 0.47912171595380126, - "grad_norm": 0.5210887789726257, - "learning_rate": 1.680585522697466e-05, - "loss": 0.0948, + "epoch": 0.9581704655058633, + "grad_norm": 0.25519129633903503, + "learning_rate": 1.3612196896627579e-05, + "loss": 0.0668, "step": 18875 }, { - "epoch": 0.47924863561365655, - "grad_norm": 1.0347521305084229, - "learning_rate": 1.680500909590896e-05, - "loss": 0.1095, + "epoch": 0.9584242854967258, + "grad_norm": 0.46383601427078247, + "learning_rate": 1.3610504763355164e-05, + "loss": 0.0691, "step": 18880 }, { - "epoch": 0.4793755552735119, - "grad_norm": 0.4582555294036865, - "learning_rate": 1.6804162964843257e-05, - "loss": 0.0927, + "epoch": 0.9586781054875883, + "grad_norm": 0.4589533805847168, + "learning_rate": 1.3608812630082746e-05, + "loss": 0.0734, "step": 18885 }, { - "epoch": 0.4795024749333672, - "grad_norm": 0.47157132625579834, - "learning_rate": 1.6803316833777555e-05, - "loss": 0.116, + "epoch": 0.9589319254784506, + "grad_norm": 0.9147213697433472, + "learning_rate": 1.3607120496810331e-05, + "loss": 0.0678, "step": 18890 }, { - "epoch": 0.4796293945932225, - "grad_norm": 0.620322048664093, - "learning_rate": 1.680247070271185e-05, - "loss": 0.0952, + "epoch": 0.9591857454693131, + "grad_norm": 0.37536412477493286, + "learning_rate": 1.3605428363537913e-05, + "loss": 0.0653, "step": 18895 }, { - "epoch": 0.4797563142530778, - "grad_norm": 0.9901988506317139, - "learning_rate": 1.680162457164615e-05, - "loss": 0.1197, + "epoch": 0.9594395654601756, + "grad_norm": 0.7372077703475952, + "learning_rate": 1.3603736230265497e-05, + "loss": 0.0673, "step": 18900 }, { - "epoch": 0.47988323391293314, - "grad_norm": 0.5637772679328918, - "learning_rate": 1.6800778440580447e-05, - "loss": 0.1183, + "epoch": 0.9596933854510381, + "grad_norm": 0.4121319651603699, + "learning_rate": 1.360204409699308e-05, + "loss": 0.0643, "step": 18905 }, { - "epoch": 0.4800101535727884, - "grad_norm": 0.5264401435852051, - "learning_rate": 1.6799932309514745e-05, - "loss": 0.0742, + "epoch": 0.9599472054419006, + "grad_norm": 0.2821780741214752, + "learning_rate": 1.3600351963720664e-05, + "loss": 0.0634, "step": 18910 }, { - "epoch": 0.48013707323264376, - "grad_norm": 0.469461590051651, - "learning_rate": 1.6799086178449044e-05, - "loss": 0.1028, + "epoch": 0.9602010254327631, + "grad_norm": 0.2763786017894745, + "learning_rate": 1.3598659830448246e-05, + "loss": 0.0655, "step": 18915 }, { - "epoch": 0.48026399289249905, - "grad_norm": 0.5002307891845703, - "learning_rate": 1.6798240047383342e-05, - "loss": 0.081, + "epoch": 0.9604548454236256, + "grad_norm": 0.3872879445552826, + "learning_rate": 1.3596967697175831e-05, + "loss": 0.0695, "step": 18920 }, { - "epoch": 0.48039091255235433, - "grad_norm": 0.5193182826042175, - "learning_rate": 1.679739391631764e-05, - "loss": 0.0895, + "epoch": 0.9607086654144881, + "grad_norm": 0.3432711660861969, + "learning_rate": 1.3595275563903414e-05, + "loss": 0.0618, "step": 18925 }, { - "epoch": 0.4805178322122097, - "grad_norm": 0.4430599510669708, - "learning_rate": 1.679654778525194e-05, - "loss": 0.1002, + "epoch": 0.9609624854053506, + "grad_norm": 0.40918609499931335, + "learning_rate": 1.3593583430630996e-05, + "loss": 0.0663, "step": 18930 }, { - "epoch": 0.48064475187206496, - "grad_norm": 0.9129483103752136, - "learning_rate": 1.6795701654186234e-05, - "loss": 0.0942, + "epoch": 0.961216305396213, + "grad_norm": 0.2839658558368683, + "learning_rate": 1.3591891297358582e-05, + "loss": 0.0609, "step": 18935 }, { - "epoch": 0.4807716715319203, - "grad_norm": 2.0529327392578125, - "learning_rate": 1.6794855523120532e-05, - "loss": 0.1017, + "epoch": 0.9614701253870754, + "grad_norm": 0.45697519183158875, + "learning_rate": 1.3590199164086163e-05, + "loss": 0.0682, "step": 18940 }, { - "epoch": 0.4808985911917756, - "grad_norm": 0.5005518794059753, - "learning_rate": 1.679400939205483e-05, - "loss": 0.0921, + "epoch": 0.9617239453779379, + "grad_norm": 0.4115654528141022, + "learning_rate": 1.3588507030813749e-05, + "loss": 0.0755, "step": 18945 }, { - "epoch": 0.4810255108516309, - "grad_norm": 0.42734241485595703, - "learning_rate": 1.679316326098913e-05, - "loss": 0.0913, + "epoch": 0.9619777653688004, + "grad_norm": 0.3328510820865631, + "learning_rate": 1.3586814897541332e-05, + "loss": 0.0726, "step": 18950 }, { - "epoch": 0.4811524305114862, - "grad_norm": 0.48257318139076233, - "learning_rate": 1.6792317129923424e-05, - "loss": 0.0819, + "epoch": 0.9622315853596629, + "grad_norm": 0.392888605594635, + "learning_rate": 1.3585122764268914e-05, + "loss": 0.0692, "step": 18955 }, { - "epoch": 0.48127935017134155, - "grad_norm": 0.40320974588394165, - "learning_rate": 1.6791470998857723e-05, - "loss": 0.1033, + "epoch": 0.9624854053505254, + "grad_norm": 0.7154536843299866, + "learning_rate": 1.35834306309965e-05, + "loss": 0.0709, "step": 18960 }, { - "epoch": 0.48140626983119683, - "grad_norm": 1.2381645441055298, - "learning_rate": 1.679062486779202e-05, - "loss": 0.1052, + "epoch": 0.9627392253413879, + "grad_norm": 0.38823920488357544, + "learning_rate": 1.3581738497724081e-05, + "loss": 0.0581, "step": 18965 }, { - "epoch": 0.48153318949105217, - "grad_norm": 0.4621211886405945, - "learning_rate": 1.678977873672632e-05, - "loss": 0.0799, + "epoch": 0.9629930453322504, + "grad_norm": 1.6257457733154297, + "learning_rate": 1.3580046364451665e-05, + "loss": 0.0627, "step": 18970 }, { - "epoch": 0.48166010915090746, - "grad_norm": 0.40159136056900024, - "learning_rate": 1.6788932605660618e-05, - "loss": 0.0971, + "epoch": 0.9632468653231129, + "grad_norm": 0.3897399604320526, + "learning_rate": 1.357835423117925e-05, + "loss": 0.0612, "step": 18975 }, { - "epoch": 0.4817870288107628, - "grad_norm": 0.4948984384536743, - "learning_rate": 1.6788086474594916e-05, - "loss": 0.0813, + "epoch": 0.9635006853139754, + "grad_norm": 0.33613255620002747, + "learning_rate": 1.3576662097906832e-05, + "loss": 0.0535, "step": 18980 }, { - "epoch": 0.4819139484706181, - "grad_norm": 0.6141747832298279, - "learning_rate": 1.6787240343529215e-05, - "loss": 0.1014, + "epoch": 0.9637545053048379, + "grad_norm": 0.3078048527240753, + "learning_rate": 1.3574969964634415e-05, + "loss": 0.0664, "step": 18985 }, { - "epoch": 0.4820408681304734, - "grad_norm": 0.5378977060317993, - "learning_rate": 1.6786394212463513e-05, - "loss": 0.0973, + "epoch": 0.9640083252957002, + "grad_norm": 0.8129367828369141, + "learning_rate": 1.3573277831361999e-05, + "loss": 0.0686, "step": 18990 }, { - "epoch": 0.4821677877903287, - "grad_norm": 0.4309171140193939, - "learning_rate": 1.6785548081397808e-05, - "loss": 0.1093, + "epoch": 0.9642621452865627, + "grad_norm": 0.28624656796455383, + "learning_rate": 1.3571585698089582e-05, + "loss": 0.0629, "step": 18995 }, { - "epoch": 0.48229470745018405, - "grad_norm": 0.5356213450431824, - "learning_rate": 1.6784701950332106e-05, - "loss": 0.0865, + "epoch": 0.9645159652774252, + "grad_norm": 0.40177884697914124, + "learning_rate": 1.3569893564817168e-05, + "loss": 0.0631, "step": 19000 }, { - "epoch": 0.48242162711003933, - "grad_norm": 1.5632023811340332, - "learning_rate": 1.6783855819266405e-05, - "loss": 0.0997, + "epoch": 0.9647697852682877, + "grad_norm": 0.8333194255828857, + "learning_rate": 1.356820143154475e-05, + "loss": 0.0636, "step": 19005 }, { - "epoch": 0.48254854676989467, - "grad_norm": 0.638945996761322, - "learning_rate": 1.6783009688200703e-05, - "loss": 0.1169, + "epoch": 0.9650236052591502, + "grad_norm": 0.6080228090286255, + "learning_rate": 1.3566509298272333e-05, + "loss": 0.0579, "step": 19010 }, { - "epoch": 0.48267546642974996, - "grad_norm": 0.6454835534095764, - "learning_rate": 1.6782163557135e-05, - "loss": 0.1037, + "epoch": 0.9652774252500127, + "grad_norm": 0.3721805512905121, + "learning_rate": 1.3564817164999917e-05, + "loss": 0.0657, "step": 19015 }, { - "epoch": 0.4828023860896053, - "grad_norm": 0.491497665643692, - "learning_rate": 1.67813174260693e-05, - "loss": 0.0888, + "epoch": 0.9655312452408752, + "grad_norm": 0.36625728011131287, + "learning_rate": 1.35631250317275e-05, + "loss": 0.065, "step": 19020 }, { - "epoch": 0.4829293057494606, - "grad_norm": 0.4631255865097046, - "learning_rate": 1.67804712950036e-05, - "loss": 0.096, + "epoch": 0.9657850652317377, + "grad_norm": 0.4012068510055542, + "learning_rate": 1.3561432898455082e-05, + "loss": 0.0647, "step": 19025 }, { - "epoch": 0.4830562254093159, - "grad_norm": 0.4877651035785675, - "learning_rate": 1.6779625163937897e-05, - "loss": 0.0923, + "epoch": 0.9660388852226002, + "grad_norm": 0.32912421226501465, + "learning_rate": 1.3559740765182667e-05, + "loss": 0.0632, "step": 19030 }, { - "epoch": 0.4831831450691712, - "grad_norm": 0.6451601386070251, - "learning_rate": 1.6778779032872192e-05, - "loss": 0.0998, + "epoch": 0.9662927052134627, + "grad_norm": 0.6822962164878845, + "learning_rate": 1.355804863191025e-05, + "loss": 0.0625, "step": 19035 }, { - "epoch": 0.48331006472902655, - "grad_norm": 0.5482479333877563, - "learning_rate": 1.677793290180649e-05, - "loss": 0.0928, + "epoch": 0.966546525204325, + "grad_norm": 0.3175319731235504, + "learning_rate": 1.3556356498637834e-05, + "loss": 0.068, "step": 19040 }, { - "epoch": 0.48343698438888183, - "grad_norm": 0.6643748879432678, - "learning_rate": 1.677708677074079e-05, - "loss": 0.1267, + "epoch": 0.9668003451951875, + "grad_norm": 0.4144521653652191, + "learning_rate": 1.3554664365365418e-05, + "loss": 0.0693, "step": 19045 }, { - "epoch": 0.48356390404873717, - "grad_norm": 0.6641902923583984, - "learning_rate": 1.6776240639675087e-05, - "loss": 0.0896, + "epoch": 0.96705416518605, + "grad_norm": 0.42917636036872864, + "learning_rate": 1.3552972232093e-05, + "loss": 0.0584, "step": 19050 }, { - "epoch": 0.48369082370859245, - "grad_norm": 0.6425026655197144, - "learning_rate": 1.6775394508609385e-05, - "loss": 0.0896, + "epoch": 0.9673079851769125, + "grad_norm": 0.3734082877635956, + "learning_rate": 1.3551280098820585e-05, + "loss": 0.0753, "step": 19055 }, { - "epoch": 0.4838177433684478, - "grad_norm": 0.6086316108703613, - "learning_rate": 1.6774548377543684e-05, - "loss": 0.0839, + "epoch": 0.967561805167775, + "grad_norm": 0.6813828945159912, + "learning_rate": 1.3549587965548168e-05, + "loss": 0.0681, "step": 19060 }, { - "epoch": 0.4839446630283031, - "grad_norm": 0.5209307670593262, - "learning_rate": 1.6773702246477982e-05, - "loss": 0.1035, + "epoch": 0.9678156251586375, + "grad_norm": 0.5187677145004272, + "learning_rate": 1.354789583227575e-05, + "loss": 0.063, "step": 19065 }, { - "epoch": 0.4840715826881584, - "grad_norm": 0.4861399233341217, - "learning_rate": 1.677285611541228e-05, - "loss": 0.0785, + "epoch": 0.9680694451495, + "grad_norm": 0.37740230560302734, + "learning_rate": 1.3546203699003336e-05, + "loss": 0.0598, "step": 19070 }, { - "epoch": 0.4841985023480137, - "grad_norm": 0.5182640552520752, - "learning_rate": 1.6772009984346576e-05, - "loss": 0.0884, + "epoch": 0.9683232651403625, + "grad_norm": 0.35848960280418396, + "learning_rate": 1.3544511565730917e-05, + "loss": 0.0598, "step": 19075 }, { - "epoch": 0.48432542200786904, - "grad_norm": 1.0315122604370117, - "learning_rate": 1.6771163853280874e-05, - "loss": 0.1034, + "epoch": 0.968577085131225, + "grad_norm": 0.36278432607650757, + "learning_rate": 1.3542819432458501e-05, + "loss": 0.0645, "step": 19080 }, { - "epoch": 0.48445234166772433, - "grad_norm": 0.510769248008728, - "learning_rate": 1.6770317722215173e-05, - "loss": 0.0901, + "epoch": 0.9688309051220874, + "grad_norm": 0.37709158658981323, + "learning_rate": 1.3541127299186085e-05, + "loss": 0.0654, "step": 19085 }, { - "epoch": 0.48457926132757967, - "grad_norm": 0.5307183861732483, - "learning_rate": 1.676947159114947e-05, - "loss": 0.0934, + "epoch": 0.9690847251129499, + "grad_norm": 0.41565874218940735, + "learning_rate": 1.3539435165913668e-05, + "loss": 0.067, "step": 19090 }, { - "epoch": 0.48470618098743495, - "grad_norm": 0.8895853757858276, - "learning_rate": 1.6768625460083766e-05, - "loss": 0.103, + "epoch": 0.9693385451038123, + "grad_norm": 0.405154287815094, + "learning_rate": 1.3537743032641253e-05, + "loss": 0.06, "step": 19095 }, { - "epoch": 0.48483310064729024, - "grad_norm": 0.4139460027217865, - "learning_rate": 1.6767779329018064e-05, - "loss": 0.0884, + "epoch": 0.9695923650946748, + "grad_norm": 0.4619700610637665, + "learning_rate": 1.3536050899368835e-05, + "loss": 0.0692, "step": 19100 }, { - "epoch": 0.4849600203071456, - "grad_norm": 1.3043227195739746, - "learning_rate": 1.6766933197952363e-05, - "loss": 0.1106, + "epoch": 0.9698461850855373, + "grad_norm": 0.43478238582611084, + "learning_rate": 1.3534358766096419e-05, + "loss": 0.0603, "step": 19105 }, { - "epoch": 0.48508693996700086, - "grad_norm": 0.6213691234588623, - "learning_rate": 1.676608706688666e-05, - "loss": 0.1038, + "epoch": 0.9701000050763998, + "grad_norm": 0.36731022596359253, + "learning_rate": 1.3532666632824002e-05, + "loss": 0.06, "step": 19110 }, { - "epoch": 0.4852138596268562, - "grad_norm": 1.5482627153396606, - "learning_rate": 1.676524093582096e-05, - "loss": 0.1128, + "epoch": 0.9703538250672623, + "grad_norm": 1.0390390157699585, + "learning_rate": 1.3530974499551586e-05, + "loss": 0.0686, "step": 19115 }, { - "epoch": 0.4853407792867115, - "grad_norm": 0.5718422532081604, - "learning_rate": 1.6764394804755258e-05, - "loss": 0.1017, + "epoch": 0.9706076450581248, + "grad_norm": 0.3847677707672119, + "learning_rate": 1.3529282366279168e-05, + "loss": 0.0622, "step": 19120 }, { - "epoch": 0.48546769894656683, - "grad_norm": 1.0407518148422241, - "learning_rate": 1.6763548673689556e-05, - "loss": 0.1079, + "epoch": 0.9708614650489873, + "grad_norm": 0.302962064743042, + "learning_rate": 1.3527590233006753e-05, + "loss": 0.0576, "step": 19125 }, { - "epoch": 0.4855946186064221, - "grad_norm": 0.5971589088439941, - "learning_rate": 1.6762702542623855e-05, - "loss": 0.0957, + "epoch": 0.9711152850398498, + "grad_norm": 0.48355162143707275, + "learning_rate": 1.3525898099734336e-05, + "loss": 0.0691, "step": 19130 }, { - "epoch": 0.48572153826627745, - "grad_norm": 0.6507247686386108, - "learning_rate": 1.6761856411558153e-05, - "loss": 0.0947, + "epoch": 0.9713691050307122, + "grad_norm": 0.40184906125068665, + "learning_rate": 1.352420596646192e-05, + "loss": 0.064, "step": 19135 }, { - "epoch": 0.48584845792613274, - "grad_norm": 0.6178021430969238, - "learning_rate": 1.6761010280492448e-05, - "loss": 0.0766, + "epoch": 0.9716229250215747, + "grad_norm": 0.3403814136981964, + "learning_rate": 1.3522513833189504e-05, + "loss": 0.0654, "step": 19140 }, { - "epoch": 0.4859753775859881, - "grad_norm": 0.6514716744422913, - "learning_rate": 1.6760164149426747e-05, - "loss": 0.0927, + "epoch": 0.9718767450124371, + "grad_norm": 0.37238359451293945, + "learning_rate": 1.3520821699917085e-05, + "loss": 0.068, "step": 19145 }, { - "epoch": 0.48610229724584336, - "grad_norm": 0.5126349925994873, - "learning_rate": 1.6759318018361045e-05, - "loss": 0.1077, + "epoch": 0.9721305650032996, + "grad_norm": 0.30097347497940063, + "learning_rate": 1.351912956664467e-05, + "loss": 0.0694, "step": 19150 }, { - "epoch": 0.4862292169056987, - "grad_norm": 0.519413411617279, - "learning_rate": 1.6758471887295343e-05, - "loss": 0.0829, + "epoch": 0.9723843849941621, + "grad_norm": 0.25011131167411804, + "learning_rate": 1.3517437433372254e-05, + "loss": 0.0593, "step": 19155 }, { - "epoch": 0.486356136565554, - "grad_norm": 0.42318686842918396, - "learning_rate": 1.6757625756229642e-05, - "loss": 0.0866, + "epoch": 0.9726382049850246, + "grad_norm": 0.3264051377773285, + "learning_rate": 1.3515745300099836e-05, + "loss": 0.0596, "step": 19160 }, { - "epoch": 0.4864830562254093, - "grad_norm": 1.1935911178588867, - "learning_rate": 1.675677962516394e-05, - "loss": 0.0826, + "epoch": 0.9728920249758871, + "grad_norm": 0.4142516553401947, + "learning_rate": 1.3514053166827421e-05, + "loss": 0.0748, "step": 19165 }, { - "epoch": 0.4866099758852646, - "grad_norm": 0.5282414555549622, - "learning_rate": 1.675593349409824e-05, - "loss": 0.0952, + "epoch": 0.9731458449667496, + "grad_norm": 0.4116646349430084, + "learning_rate": 1.3512361033555003e-05, + "loss": 0.0582, "step": 19170 }, { - "epoch": 0.48673689554511995, - "grad_norm": 0.44397616386413574, - "learning_rate": 1.6755087363032537e-05, - "loss": 0.0841, + "epoch": 0.9733996649576121, + "grad_norm": 0.33693939447402954, + "learning_rate": 1.3510668900282587e-05, + "loss": 0.0742, "step": 19175 }, { - "epoch": 0.48686381520497524, - "grad_norm": 0.7662266492843628, - "learning_rate": 1.6754241231966832e-05, - "loss": 0.107, + "epoch": 0.9736534849484746, + "grad_norm": 0.3353005051612854, + "learning_rate": 1.3508976767010172e-05, + "loss": 0.0641, "step": 19180 }, { - "epoch": 0.4869907348648306, - "grad_norm": 0.43245717883110046, - "learning_rate": 1.675339510090113e-05, - "loss": 0.1036, + "epoch": 0.973907304939337, + "grad_norm": 0.4305810332298279, + "learning_rate": 1.3507284633737754e-05, + "loss": 0.0645, "step": 19185 }, { - "epoch": 0.48711765452468586, - "grad_norm": 0.5105351209640503, - "learning_rate": 1.675254896983543e-05, - "loss": 0.086, + "epoch": 0.9741611249301995, + "grad_norm": 0.3571843206882477, + "learning_rate": 1.3505592500465339e-05, + "loss": 0.066, "step": 19190 }, { - "epoch": 0.4872445741845412, - "grad_norm": 0.5778452754020691, - "learning_rate": 1.6751702838769727e-05, - "loss": 0.0904, + "epoch": 0.974414944921062, + "grad_norm": 0.34298276901245117, + "learning_rate": 1.350390036719292e-05, + "loss": 0.0736, "step": 19195 }, { - "epoch": 0.4873714938443965, - "grad_norm": 0.6332741975784302, - "learning_rate": 1.6750856707704026e-05, - "loss": 0.0813, + "epoch": 0.9746687649119244, + "grad_norm": 0.4371688961982727, + "learning_rate": 1.3502208233920504e-05, + "loss": 0.0638, "step": 19200 }, { - "epoch": 0.4874984135042518, - "grad_norm": 0.36652714014053345, - "learning_rate": 1.6750010576638324e-05, - "loss": 0.0988, + "epoch": 0.9749225849027869, + "grad_norm": 0.43059366941452026, + "learning_rate": 1.350051610064809e-05, + "loss": 0.0683, "step": 19205 }, { - "epoch": 0.4876253331641071, - "grad_norm": 0.7193161845207214, - "learning_rate": 1.6749164445572622e-05, - "loss": 0.0888, + "epoch": 0.9751764048936494, + "grad_norm": 0.33127647638320923, + "learning_rate": 1.3498823967375671e-05, + "loss": 0.066, "step": 19210 }, { - "epoch": 0.48775225282396245, - "grad_norm": 0.6816926598548889, - "learning_rate": 1.674831831450692e-05, - "loss": 0.0819, + "epoch": 0.9754302248845119, + "grad_norm": 0.4141124188899994, + "learning_rate": 1.3497131834103255e-05, + "loss": 0.0569, "step": 19215 }, { - "epoch": 0.48787917248381774, - "grad_norm": 0.6912873983383179, - "learning_rate": 1.6747472183441216e-05, - "loss": 0.0927, + "epoch": 0.9756840448753744, + "grad_norm": 0.45645686984062195, + "learning_rate": 1.3495439700830839e-05, + "loss": 0.0676, "step": 19220 }, { - "epoch": 0.4880060921436731, - "grad_norm": 0.7208134531974792, - "learning_rate": 1.6746626052375514e-05, - "loss": 0.108, + "epoch": 0.9759378648662369, + "grad_norm": 0.44985485076904297, + "learning_rate": 1.3493747567558422e-05, + "loss": 0.0654, "step": 19225 }, { - "epoch": 0.48813301180352836, - "grad_norm": 0.7013769745826721, - "learning_rate": 1.6745779921309813e-05, - "loss": 0.0865, + "epoch": 0.9761916848570994, + "grad_norm": 0.3672463893890381, + "learning_rate": 1.3492055434286004e-05, + "loss": 0.0711, "step": 19230 }, { - "epoch": 0.4882599314633837, - "grad_norm": 0.6994516253471375, - "learning_rate": 1.674493379024411e-05, - "loss": 0.098, + "epoch": 0.9764455048479618, + "grad_norm": 1.25873601436615, + "learning_rate": 1.349036330101359e-05, + "loss": 0.0723, "step": 19235 }, { - "epoch": 0.488386851123239, - "grad_norm": 0.7433639168739319, - "learning_rate": 1.6744087659178406e-05, - "loss": 0.0878, + "epoch": 0.9766993248388243, + "grad_norm": 0.5634270310401917, + "learning_rate": 1.3488671167741173e-05, + "loss": 0.0616, "step": 19240 }, { - "epoch": 0.4885137707830943, - "grad_norm": 0.7197049260139465, - "learning_rate": 1.6743241528112704e-05, - "loss": 0.07, + "epoch": 0.9769531448296868, + "grad_norm": 0.2947288155555725, + "learning_rate": 1.3486979034468756e-05, + "loss": 0.0618, "step": 19245 }, { - "epoch": 0.4886406904429496, - "grad_norm": 0.5190019607543945, - "learning_rate": 1.6742395397047003e-05, - "loss": 0.1119, + "epoch": 0.9772069648205493, + "grad_norm": 0.5857254266738892, + "learning_rate": 1.348528690119634e-05, + "loss": 0.0682, "step": 19250 }, { - "epoch": 0.48876761010280495, - "grad_norm": 0.5559335350990295, - "learning_rate": 1.67415492659813e-05, - "loss": 0.0753, + "epoch": 0.9774607848114117, + "grad_norm": 0.300155907869339, + "learning_rate": 1.3483594767923922e-05, + "loss": 0.0563, "step": 19255 }, { - "epoch": 0.48889452976266023, - "grad_norm": 0.654876172542572, - "learning_rate": 1.67407031349156e-05, - "loss": 0.066, + "epoch": 0.9777146048022742, + "grad_norm": 0.521580159664154, + "learning_rate": 1.3481902634651507e-05, + "loss": 0.0677, "step": 19260 }, { - "epoch": 0.4890214494225156, - "grad_norm": 0.4983915090560913, - "learning_rate": 1.6739857003849898e-05, - "loss": 0.1131, + "epoch": 0.9779684247931367, + "grad_norm": 0.3018982708454132, + "learning_rate": 1.348021050137909e-05, + "loss": 0.0695, "step": 19265 }, { - "epoch": 0.48914836908237086, - "grad_norm": 0.39488014578819275, - "learning_rate": 1.6739010872784196e-05, - "loss": 0.1009, + "epoch": 0.9782222447839992, + "grad_norm": 0.4247705638408661, + "learning_rate": 1.3478518368106672e-05, + "loss": 0.0648, "step": 19270 }, { - "epoch": 0.48927528874222614, - "grad_norm": 0.5197723507881165, - "learning_rate": 1.6738164741718495e-05, - "loss": 0.1298, + "epoch": 0.9784760647748617, + "grad_norm": 0.3755514621734619, + "learning_rate": 1.3476826234834258e-05, + "loss": 0.0713, "step": 19275 }, { - "epoch": 0.4894022084020815, - "grad_norm": 0.4345821440219879, - "learning_rate": 1.673731861065279e-05, - "loss": 0.0945, + "epoch": 0.9787298847657242, + "grad_norm": 0.650913417339325, + "learning_rate": 1.347513410156184e-05, + "loss": 0.0591, "step": 19280 }, { - "epoch": 0.48952912806193677, - "grad_norm": 0.6727240681648254, - "learning_rate": 1.6736472479587088e-05, - "loss": 0.115, + "epoch": 0.9789837047565866, + "grad_norm": 0.32536032795906067, + "learning_rate": 1.3473441968289425e-05, + "loss": 0.0794, "step": 19285 }, { - "epoch": 0.4896560477217921, - "grad_norm": 0.6930618286132812, - "learning_rate": 1.6735626348521387e-05, - "loss": 0.106, + "epoch": 0.9792375247474491, + "grad_norm": 0.5901187658309937, + "learning_rate": 1.3471749835017006e-05, + "loss": 0.0784, "step": 19290 }, { - "epoch": 0.4897829673816474, - "grad_norm": 0.5312812328338623, - "learning_rate": 1.6734780217455685e-05, - "loss": 0.0903, + "epoch": 0.9794913447383116, + "grad_norm": 0.3043603003025055, + "learning_rate": 1.347005770174459e-05, + "loss": 0.0748, "step": 19295 }, { - "epoch": 0.48990988704150273, - "grad_norm": 1.3166269063949585, - "learning_rate": 1.6733934086389983e-05, - "loss": 0.0885, + "epoch": 0.9797451647291741, + "grad_norm": 0.36308273673057556, + "learning_rate": 1.3468365568472175e-05, + "loss": 0.0697, "step": 19300 }, { - "epoch": 0.490036806701358, - "grad_norm": 0.5533337593078613, - "learning_rate": 1.6733087955324282e-05, - "loss": 0.11, + "epoch": 0.9799989847200365, + "grad_norm": 0.34357786178588867, + "learning_rate": 1.3466673435199757e-05, + "loss": 0.0694, "step": 19305 }, { - "epoch": 0.49016372636121336, - "grad_norm": 0.8613161444664001, - "learning_rate": 1.673224182425858e-05, - "loss": 0.1155, + "epoch": 0.980252804710899, + "grad_norm": 0.9105756878852844, + "learning_rate": 1.346498130192734e-05, + "loss": 0.0545, "step": 19310 }, { - "epoch": 0.49029064602106864, - "grad_norm": 0.5992020964622498, - "learning_rate": 1.673139569319288e-05, - "loss": 0.0833, + "epoch": 0.9805066247017615, + "grad_norm": 0.3355824947357178, + "learning_rate": 1.3463289168654924e-05, + "loss": 0.0552, "step": 19315 }, { - "epoch": 0.490417565680924, - "grad_norm": 0.7299078106880188, - "learning_rate": 1.6730549562127174e-05, - "loss": 0.1053, + "epoch": 0.980760444692624, + "grad_norm": 0.3532446026802063, + "learning_rate": 1.3461597035382508e-05, + "loss": 0.0681, "step": 19320 }, { - "epoch": 0.49054448534077927, - "grad_norm": 0.7103135585784912, - "learning_rate": 1.6729703431061472e-05, - "loss": 0.1095, + "epoch": 0.9810142646834865, + "grad_norm": 0.3839724659919739, + "learning_rate": 1.345990490211009e-05, + "loss": 0.0596, "step": 19325 }, { - "epoch": 0.4906714050006346, - "grad_norm": 0.6288467645645142, - "learning_rate": 1.672885729999577e-05, - "loss": 0.108, + "epoch": 0.981268084674349, + "grad_norm": 0.4662843346595764, + "learning_rate": 1.3458212768837675e-05, + "loss": 0.0633, "step": 19330 }, { - "epoch": 0.4907983246604899, - "grad_norm": 0.5651088356971741, - "learning_rate": 1.672801116893007e-05, - "loss": 0.0849, + "epoch": 0.9815219046652114, + "grad_norm": 0.4850928485393524, + "learning_rate": 1.3456520635565258e-05, + "loss": 0.06, "step": 19335 }, { - "epoch": 0.49092524432034523, - "grad_norm": 0.7312604784965515, - "learning_rate": 1.6727165037864367e-05, - "loss": 0.101, + "epoch": 0.9817757246560739, + "grad_norm": 0.5426207184791565, + "learning_rate": 1.3454828502292842e-05, + "loss": 0.0711, "step": 19340 }, { - "epoch": 0.4910521639802005, - "grad_norm": 0.46652328968048096, - "learning_rate": 1.6726318906798666e-05, - "loss": 0.0789, + "epoch": 0.9820295446469364, + "grad_norm": 0.24476711452007294, + "learning_rate": 1.3453136369020425e-05, + "loss": 0.0687, "step": 19345 }, { - "epoch": 0.49117908364005586, - "grad_norm": 0.37486153841018677, - "learning_rate": 1.6725472775732964e-05, - "loss": 0.0814, + "epoch": 0.9822833646377989, + "grad_norm": 0.39669865369796753, + "learning_rate": 1.3451444235748007e-05, + "loss": 0.0648, "step": 19350 }, { - "epoch": 0.49130600329991114, - "grad_norm": 0.6446362137794495, - "learning_rate": 1.6724626644667263e-05, - "loss": 0.1059, + "epoch": 0.9825371846286614, + "grad_norm": 0.329333633184433, + "learning_rate": 1.3449752102475593e-05, + "loss": 0.0623, "step": 19355 }, { - "epoch": 0.4914329229597665, - "grad_norm": 0.4844749867916107, - "learning_rate": 1.6723780513601558e-05, - "loss": 0.0774, + "epoch": 0.9827910046195238, + "grad_norm": 0.5927011966705322, + "learning_rate": 1.3448059969203176e-05, + "loss": 0.0579, "step": 19360 }, { - "epoch": 0.49155984261962177, - "grad_norm": 1.3831578493118286, - "learning_rate": 1.6722934382535856e-05, - "loss": 0.0742, + "epoch": 0.9830448246103863, + "grad_norm": 0.4235691726207733, + "learning_rate": 1.3446367835930758e-05, + "loss": 0.0576, "step": 19365 }, { - "epoch": 0.4916867622794771, - "grad_norm": 0.4069730043411255, - "learning_rate": 1.6722088251470154e-05, - "loss": 0.0887, + "epoch": 0.9832986446012488, + "grad_norm": 0.3612593412399292, + "learning_rate": 1.3444675702658343e-05, + "loss": 0.0659, "step": 19370 }, { - "epoch": 0.4918136819393324, - "grad_norm": 0.6603564620018005, - "learning_rate": 1.6721242120404453e-05, - "loss": 0.0733, + "epoch": 0.9835524645921113, + "grad_norm": 0.4044787287712097, + "learning_rate": 1.3442983569385925e-05, + "loss": 0.0665, "step": 19375 }, { - "epoch": 0.49194060159918773, - "grad_norm": 0.35680925846099854, - "learning_rate": 1.6720395989338748e-05, - "loss": 0.104, + "epoch": 0.9838062845829737, + "grad_norm": 0.32001495361328125, + "learning_rate": 1.344129143611351e-05, + "loss": 0.058, "step": 19380 }, { - "epoch": 0.492067521259043, - "grad_norm": 0.5794273614883423, - "learning_rate": 1.6719549858273046e-05, - "loss": 0.1064, + "epoch": 0.9840601045738362, + "grad_norm": 0.2624494731426239, + "learning_rate": 1.3439599302841094e-05, + "loss": 0.0622, "step": 19385 }, { - "epoch": 0.49219444091889836, - "grad_norm": 0.9445390105247498, - "learning_rate": 1.6718703727207345e-05, - "loss": 0.0827, + "epoch": 0.9843139245646987, + "grad_norm": 0.3039698004722595, + "learning_rate": 1.3437907169568676e-05, + "loss": 0.0539, "step": 19390 }, { - "epoch": 0.49232136057875364, - "grad_norm": 0.6755049228668213, - "learning_rate": 1.6717857596141643e-05, - "loss": 0.082, + "epoch": 0.9845677445555612, + "grad_norm": 1.1381529569625854, + "learning_rate": 1.3436215036296261e-05, + "loss": 0.0671, "step": 19395 }, { - "epoch": 0.492448280238609, - "grad_norm": 0.7115680575370789, - "learning_rate": 1.671701146507594e-05, - "loss": 0.0973, + "epoch": 0.9848215645464237, + "grad_norm": 0.3349166810512543, + "learning_rate": 1.3434522903023843e-05, + "loss": 0.0663, "step": 19400 }, { - "epoch": 0.49257519989846427, - "grad_norm": 0.7086094617843628, - "learning_rate": 1.671616533401024e-05, - "loss": 0.1086, + "epoch": 0.9850753845372862, + "grad_norm": 0.31104400753974915, + "learning_rate": 1.3432830769751426e-05, + "loss": 0.053, "step": 19405 }, { - "epoch": 0.4927021195583196, - "grad_norm": 0.958219587802887, - "learning_rate": 1.6715319202944538e-05, - "loss": 0.0965, + "epoch": 0.9853292045281486, + "grad_norm": 0.3512427806854248, + "learning_rate": 1.3431138636479012e-05, + "loss": 0.0741, "step": 19410 }, { - "epoch": 0.4928290392181749, - "grad_norm": 0.544370174407959, - "learning_rate": 1.6714473071878837e-05, - "loss": 0.1207, + "epoch": 0.9855830245190111, + "grad_norm": 0.5050990581512451, + "learning_rate": 1.3429446503206593e-05, + "loss": 0.0678, "step": 19415 }, { - "epoch": 0.49295595887803023, - "grad_norm": 0.47960904240608215, - "learning_rate": 1.671362694081313e-05, - "loss": 0.0884, + "epoch": 0.9858368445098736, + "grad_norm": 0.4575447142124176, + "learning_rate": 1.3427754369934177e-05, + "loss": 0.0707, "step": 19420 }, { - "epoch": 0.4930828785378855, - "grad_norm": 0.7839269042015076, - "learning_rate": 1.671278080974743e-05, - "loss": 0.0817, + "epoch": 0.9860906645007361, + "grad_norm": 0.464824378490448, + "learning_rate": 1.342606223666176e-05, + "loss": 0.0573, "step": 19425 }, { - "epoch": 0.49320979819774086, - "grad_norm": 0.9556987881660461, - "learning_rate": 1.671193467868173e-05, - "loss": 0.1083, + "epoch": 0.9863444844915985, + "grad_norm": 0.2967434823513031, + "learning_rate": 1.3424370103389344e-05, + "loss": 0.0627, "step": 19430 }, { - "epoch": 0.49333671785759614, - "grad_norm": 0.5445716977119446, - "learning_rate": 1.6711088547616027e-05, - "loss": 0.0954, + "epoch": 0.986598304482461, + "grad_norm": 0.425357848405838, + "learning_rate": 1.342267797011693e-05, + "loss": 0.0534, "step": 19435 }, { - "epoch": 0.4934636375174514, - "grad_norm": 0.6531522870063782, - "learning_rate": 1.6710242416550325e-05, - "loss": 0.1025, + "epoch": 0.9868521244733235, + "grad_norm": 0.437012255191803, + "learning_rate": 1.3420985836844511e-05, + "loss": 0.0685, "step": 19440 }, { - "epoch": 0.49359055717730677, - "grad_norm": 0.5426739454269409, - "learning_rate": 1.6709396285484624e-05, - "loss": 0.1056, + "epoch": 0.987105944464186, + "grad_norm": 0.37560611963272095, + "learning_rate": 1.3419293703572095e-05, + "loss": 0.0605, "step": 19445 }, { - "epoch": 0.49371747683716205, - "grad_norm": 0.5461218953132629, - "learning_rate": 1.6708550154418922e-05, - "loss": 0.0748, + "epoch": 0.9873597644550485, + "grad_norm": 0.35586923360824585, + "learning_rate": 1.3417601570299678e-05, + "loss": 0.0641, "step": 19450 }, { - "epoch": 0.4938443964970174, - "grad_norm": 1.4487708806991577, - "learning_rate": 1.670770402335322e-05, - "loss": 0.0943, + "epoch": 0.987613584445911, + "grad_norm": 0.3776257038116455, + "learning_rate": 1.3415909437027262e-05, + "loss": 0.0571, "step": 19455 }, { - "epoch": 0.4939713161568727, - "grad_norm": 0.6062582731246948, - "learning_rate": 1.6706857892287515e-05, - "loss": 0.088, + "epoch": 0.9878674044367735, + "grad_norm": 1.247651219367981, + "learning_rate": 1.3414217303754844e-05, + "loss": 0.0635, "step": 19460 }, { - "epoch": 0.494098235816728, - "grad_norm": 0.3478766083717346, - "learning_rate": 1.6706011761221814e-05, - "loss": 0.106, + "epoch": 0.988121224427636, + "grad_norm": 0.5398990511894226, + "learning_rate": 1.3412525170482429e-05, + "loss": 0.0712, "step": 19465 }, { - "epoch": 0.4942251554765833, - "grad_norm": 0.7675686478614807, - "learning_rate": 1.6705165630156112e-05, - "loss": 0.1305, + "epoch": 0.9883750444184984, + "grad_norm": 0.49520477652549744, + "learning_rate": 1.341083303721001e-05, + "loss": 0.0661, "step": 19470 }, { - "epoch": 0.49435207513643864, - "grad_norm": 0.7561877965927124, - "learning_rate": 1.670431949909041e-05, - "loss": 0.1096, + "epoch": 0.9886288644093609, + "grad_norm": 0.38991716504096985, + "learning_rate": 1.3409140903937594e-05, + "loss": 0.0766, "step": 19475 }, { - "epoch": 0.4944789947962939, - "grad_norm": 0.7236150503158569, - "learning_rate": 1.670347336802471e-05, - "loss": 0.0936, + "epoch": 0.9888826844002233, + "grad_norm": 0.2874346971511841, + "learning_rate": 1.340744877066518e-05, + "loss": 0.0654, "step": 19480 }, { - "epoch": 0.49460591445614926, - "grad_norm": 0.5845525860786438, - "learning_rate": 1.6702627236959007e-05, - "loss": 0.1003, + "epoch": 0.9891365043910858, + "grad_norm": 0.5209989547729492, + "learning_rate": 1.3405756637392761e-05, + "loss": 0.0577, "step": 19485 }, { - "epoch": 0.49473283411600455, - "grad_norm": 0.6901052594184875, - "learning_rate": 1.6701781105893306e-05, - "loss": 0.1038, + "epoch": 0.9893903243819483, + "grad_norm": 0.7382110953330994, + "learning_rate": 1.3404064504120347e-05, + "loss": 0.0635, "step": 19490 }, { - "epoch": 0.4948597537758599, - "grad_norm": 0.4955129325389862, - "learning_rate": 1.6700934974827604e-05, - "loss": 0.095, + "epoch": 0.9896441443728108, + "grad_norm": 0.41847217082977295, + "learning_rate": 1.3402372370847928e-05, + "loss": 0.0525, "step": 19495 }, { - "epoch": 0.4949866734357152, - "grad_norm": 0.5225616693496704, - "learning_rate": 1.67000888437619e-05, - "loss": 0.1303, + "epoch": 0.9898979643636733, + "grad_norm": 0.3458189070224762, + "learning_rate": 1.3400680237575512e-05, + "loss": 0.1819, "step": 19500 }, { - "epoch": 0.4951135930955705, - "grad_norm": 0.4329621195793152, - "learning_rate": 1.6699242712696198e-05, - "loss": 0.1081, + "epoch": 0.9901517843545358, + "grad_norm": 0.3702060580253601, + "learning_rate": 1.3398988104303097e-05, + "loss": 0.0635, "step": 19505 }, { - "epoch": 0.4952405127554258, - "grad_norm": 1.0323795080184937, - "learning_rate": 1.6698396581630496e-05, - "loss": 0.0788, + "epoch": 0.9904056043453983, + "grad_norm": 0.3774377405643463, + "learning_rate": 1.3397295971030679e-05, + "loss": 0.0685, "step": 19510 }, { - "epoch": 0.49536743241528114, - "grad_norm": 0.4719780683517456, - "learning_rate": 1.6697550450564794e-05, - "loss": 0.0804, + "epoch": 0.9906594243362608, + "grad_norm": 0.296116441488266, + "learning_rate": 1.3395603837758263e-05, + "loss": 0.0704, "step": 19515 }, { - "epoch": 0.4954943520751364, - "grad_norm": 0.7488115429878235, - "learning_rate": 1.669670431949909e-05, - "loss": 0.1162, + "epoch": 0.9909132443271232, + "grad_norm": 0.5571689605712891, + "learning_rate": 1.3393911704485846e-05, + "loss": 0.0602, "step": 19520 }, { - "epoch": 0.49562127173499176, - "grad_norm": 0.5859333276748657, - "learning_rate": 1.6695858188433388e-05, - "loss": 0.1211, + "epoch": 0.9911670643179857, + "grad_norm": 0.36025819182395935, + "learning_rate": 1.339221957121343e-05, + "loss": 0.0623, "step": 19525 }, { - "epoch": 0.49574819139484705, - "grad_norm": 0.5923458933830261, - "learning_rate": 1.6695012057367686e-05, - "loss": 0.078, + "epoch": 0.9914208843088481, + "grad_norm": 0.39932265877723694, + "learning_rate": 1.3390527437941015e-05, + "loss": 0.0564, "step": 19530 }, { - "epoch": 0.4958751110547024, - "grad_norm": 0.6318618655204773, - "learning_rate": 1.6694165926301985e-05, - "loss": 0.0988, + "epoch": 0.9916747042997106, + "grad_norm": 0.35256311297416687, + "learning_rate": 1.3388835304668597e-05, + "loss": 0.0751, "step": 19535 }, { - "epoch": 0.4960020307145577, - "grad_norm": 0.6077578067779541, - "learning_rate": 1.6693319795236283e-05, - "loss": 0.0944, + "epoch": 0.9919285242905731, + "grad_norm": 0.4102122187614441, + "learning_rate": 1.338714317139618e-05, + "loss": 0.0593, "step": 19540 }, { - "epoch": 0.496128950374413, - "grad_norm": 0.6861861944198608, - "learning_rate": 1.669247366417058e-05, - "loss": 0.1081, + "epoch": 0.9921823442814356, + "grad_norm": 0.3455532193183899, + "learning_rate": 1.3385451038123764e-05, + "loss": 0.0598, "step": 19545 }, { - "epoch": 0.4962558700342683, - "grad_norm": 0.6280723214149475, - "learning_rate": 1.669162753310488e-05, - "loss": 0.1003, + "epoch": 0.9924361642722981, + "grad_norm": 0.3834126889705658, + "learning_rate": 1.3383758904851347e-05, + "loss": 0.0608, "step": 19550 }, { - "epoch": 0.49638278969412364, - "grad_norm": 0.6779731512069702, - "learning_rate": 1.6690781402039178e-05, - "loss": 0.0832, + "epoch": 0.9926899842631606, + "grad_norm": 0.4701562821865082, + "learning_rate": 1.338206677157893e-05, + "loss": 0.057, "step": 19555 }, { - "epoch": 0.4965097093539789, - "grad_norm": 0.6034691333770752, - "learning_rate": 1.6689935270973473e-05, - "loss": 0.113, + "epoch": 0.9929438042540231, + "grad_norm": 0.5309749245643616, + "learning_rate": 1.3380374638306514e-05, + "loss": 0.0592, "step": 19560 }, { - "epoch": 0.49663662901383426, - "grad_norm": 0.4523245096206665, - "learning_rate": 1.668908913990777e-05, - "loss": 0.0955, + "epoch": 0.9931976242448856, + "grad_norm": 0.4886894226074219, + "learning_rate": 1.3378682505034098e-05, + "loss": 0.0625, "step": 19565 }, { - "epoch": 0.49676354867368955, - "grad_norm": 0.5686243176460266, - "learning_rate": 1.668824300884207e-05, - "loss": 0.1034, + "epoch": 0.993451444235748, + "grad_norm": 0.5212421417236328, + "learning_rate": 1.337699037176168e-05, + "loss": 0.0668, "step": 19570 }, { - "epoch": 0.4968904683335449, - "grad_norm": 0.49595338106155396, - "learning_rate": 1.668739687777637e-05, - "loss": 0.1055, + "epoch": 0.9937052642266105, + "grad_norm": 0.3621155917644501, + "learning_rate": 1.3375298238489265e-05, + "loss": 0.0623, "step": 19575 }, { - "epoch": 0.4970173879934002, - "grad_norm": 0.5879131555557251, - "learning_rate": 1.6686550746710667e-05, - "loss": 0.1141, + "epoch": 0.9939590842174729, + "grad_norm": 0.47136250138282776, + "learning_rate": 1.3373606105216847e-05, + "loss": 0.0728, "step": 19580 }, { - "epoch": 0.4971443076532555, - "grad_norm": 0.8022995591163635, - "learning_rate": 1.6685704615644965e-05, - "loss": 0.0874, + "epoch": 0.9942129042083354, + "grad_norm": 0.31008508801460266, + "learning_rate": 1.3371913971944432e-05, + "loss": 0.0583, "step": 19585 }, { - "epoch": 0.4972712273131108, - "grad_norm": 0.5565257668495178, - "learning_rate": 1.6684858484579264e-05, - "loss": 0.1092, + "epoch": 0.9944667241991979, + "grad_norm": 0.47124168276786804, + "learning_rate": 1.3370221838672016e-05, + "loss": 0.0659, "step": 19590 }, { - "epoch": 0.49739814697296614, - "grad_norm": 0.4459535777568817, - "learning_rate": 1.6684012353513562e-05, - "loss": 0.0705, + "epoch": 0.9947205441900604, + "grad_norm": 0.40367239713668823, + "learning_rate": 1.3368529705399598e-05, + "loss": 0.059, "step": 19595 }, { - "epoch": 0.4975250666328214, - "grad_norm": 0.491912305355072, - "learning_rate": 1.6683166222447857e-05, - "loss": 0.108, + "epoch": 0.9949743641809229, + "grad_norm": 0.37501493096351624, + "learning_rate": 1.3366837572127183e-05, + "loss": 0.0666, "step": 19600 }, { - "epoch": 0.49765198629267676, - "grad_norm": 0.2980550527572632, - "learning_rate": 1.6682320091382155e-05, - "loss": 0.0687, + "epoch": 0.9952281841717854, + "grad_norm": 0.6273606419563293, + "learning_rate": 1.3365145438854765e-05, + "loss": 0.0662, "step": 19605 }, { - "epoch": 0.49777890595253205, - "grad_norm": 0.6104912757873535, - "learning_rate": 1.6681473960316454e-05, - "loss": 0.1017, + "epoch": 0.9954820041626479, + "grad_norm": 0.3741655945777893, + "learning_rate": 1.3363453305582348e-05, + "loss": 0.0577, "step": 19610 }, { - "epoch": 0.49790582561238733, - "grad_norm": 0.4643993079662323, - "learning_rate": 1.6680627829250752e-05, - "loss": 0.0937, + "epoch": 0.9957358241535104, + "grad_norm": 0.33114105463027954, + "learning_rate": 1.3361761172309933e-05, + "loss": 0.0658, "step": 19615 }, { - "epoch": 0.49803274527224267, - "grad_norm": 0.5789825320243835, - "learning_rate": 1.667978169818505e-05, - "loss": 0.0895, + "epoch": 0.9959896441443729, + "grad_norm": 0.35361751914024353, + "learning_rate": 1.3360069039037515e-05, + "loss": 0.063, "step": 19620 }, { - "epoch": 0.49815966493209796, - "grad_norm": 0.6257911324501038, - "learning_rate": 1.667893556711935e-05, - "loss": 0.1253, + "epoch": 0.9962434641352353, + "grad_norm": 0.41206908226013184, + "learning_rate": 1.3358376905765099e-05, + "loss": 0.0629, "step": 19625 }, { - "epoch": 0.4982865845919533, - "grad_norm": 0.7247307896614075, - "learning_rate": 1.6678089436053648e-05, - "loss": 0.0943, + "epoch": 0.9964972841260977, + "grad_norm": 0.4646819233894348, + "learning_rate": 1.3356684772492682e-05, + "loss": 0.0647, "step": 19630 }, { - "epoch": 0.4984135042518086, - "grad_norm": 0.739146888256073, - "learning_rate": 1.6677243304987946e-05, - "loss": 0.0912, + "epoch": 0.9967511041169602, + "grad_norm": 0.3486287295818329, + "learning_rate": 1.3354992639220266e-05, + "loss": 0.0571, "step": 19635 }, { - "epoch": 0.4985404239116639, - "grad_norm": 0.5403836965560913, - "learning_rate": 1.6676397173922244e-05, - "loss": 0.1051, + "epoch": 0.9970049241078227, + "grad_norm": 0.3921375274658203, + "learning_rate": 1.3353300505947851e-05, + "loss": 0.057, "step": 19640 }, { - "epoch": 0.4986673435715192, - "grad_norm": 0.6873509883880615, - "learning_rate": 1.667555104285654e-05, - "loss": 0.0973, + "epoch": 0.9972587440986852, + "grad_norm": 0.3869142532348633, + "learning_rate": 1.3351608372675433e-05, + "loss": 0.0695, "step": 19645 }, { - "epoch": 0.49879426323137455, - "grad_norm": 0.5005928874015808, - "learning_rate": 1.6674704911790838e-05, - "loss": 0.0793, + "epoch": 0.9975125640895477, + "grad_norm": 0.652873694896698, + "learning_rate": 1.3349916239403015e-05, + "loss": 0.0608, "step": 19650 }, { - "epoch": 0.49892118289122983, - "grad_norm": 0.5070963501930237, - "learning_rate": 1.6673858780725136e-05, - "loss": 0.0843, + "epoch": 0.9977663840804102, + "grad_norm": 0.35025376081466675, + "learning_rate": 1.33482241061306e-05, + "loss": 0.0675, "step": 19655 }, { - "epoch": 0.49904810255108517, - "grad_norm": 0.5442276000976562, - "learning_rate": 1.6673012649659435e-05, - "loss": 0.0984, + "epoch": 0.9980202040712727, + "grad_norm": 0.45848169922828674, + "learning_rate": 1.3346531972858184e-05, + "loss": 0.0656, "step": 19660 }, { - "epoch": 0.49917502221094046, - "grad_norm": 0.6352691650390625, - "learning_rate": 1.667216651859373e-05, - "loss": 0.096, + "epoch": 0.9982740240621352, + "grad_norm": 0.3314003050327301, + "learning_rate": 1.3344839839585766e-05, + "loss": 0.0591, "step": 19665 }, { - "epoch": 0.4993019418707958, - "grad_norm": 0.6160368919372559, - "learning_rate": 1.6671320387528028e-05, - "loss": 0.0993, + "epoch": 0.9985278440529977, + "grad_norm": 0.3548816740512848, + "learning_rate": 1.334314770631335e-05, + "loss": 0.0668, "step": 19670 }, { - "epoch": 0.4994288615306511, - "grad_norm": 0.4976855516433716, - "learning_rate": 1.6670474256462326e-05, - "loss": 0.0992, + "epoch": 0.99878166404386, + "grad_norm": 0.46631526947021484, + "learning_rate": 1.3341455573040933e-05, + "loss": 0.0563, "step": 19675 }, { - "epoch": 0.4995557811905064, - "grad_norm": 0.4441879987716675, - "learning_rate": 1.6669628125396625e-05, - "loss": 0.0683, + "epoch": 0.9990354840347225, + "grad_norm": 0.5142710208892822, + "learning_rate": 1.3339763439768518e-05, + "loss": 0.0609, "step": 19680 }, { - "epoch": 0.4996827008503617, - "grad_norm": 0.3724319636821747, - "learning_rate": 1.6668781994330923e-05, - "loss": 0.0927, + "epoch": 0.999289304025585, + "grad_norm": 0.965015172958374, + "learning_rate": 1.3338071306496101e-05, + "loss": 0.0591, "step": 19685 }, { - "epoch": 0.49980962051021705, - "grad_norm": 0.538765549659729, - "learning_rate": 1.666793586326522e-05, - "loss": 0.1067, + "epoch": 0.9995431240164475, + "grad_norm": 0.568295955657959, + "learning_rate": 1.3336379173223683e-05, + "loss": 0.0636, "step": 19690 }, { - "epoch": 0.49993654017007233, - "grad_norm": 0.45005956292152405, - "learning_rate": 1.666708973219952e-05, - "loss": 0.0947, + "epoch": 0.99979694400731, + "grad_norm": 0.3382091820240021, + "learning_rate": 1.3334687039951268e-05, + "loss": 0.0609, "step": 19695 }, { - "epoch": 0.5000634598299276, - "grad_norm": 0.4414019286632538, - "learning_rate": 1.666624360113382e-05, - "loss": 0.0831, - "step": 19700 - }, - { - "epoch": 0.500190379489783, - "grad_norm": 0.5497666597366333, - "learning_rate": 1.6665397470068113e-05, - "loss": 0.0819, - "step": 19705 - }, - { - "epoch": 0.5003172991496383, - "grad_norm": 0.9812287092208862, - "learning_rate": 1.6664551339002412e-05, - "loss": 0.0933, - "step": 19710 - }, - { - "epoch": 0.5004442188094936, - "grad_norm": 1.4678304195404053, - "learning_rate": 1.666370520793671e-05, - "loss": 0.0872, - "step": 19715 - }, - { - "epoch": 0.5005711384693489, - "grad_norm": 0.5792670845985413, - "learning_rate": 1.666285907687101e-05, - "loss": 0.0903, - "step": 19720 - }, - { - "epoch": 0.5006980581292042, - "grad_norm": 0.36847779154777527, - "learning_rate": 1.6662012945805307e-05, - "loss": 0.1178, - "step": 19725 - }, - { - "epoch": 0.5008249777890595, - "grad_norm": 0.5593390464782715, - "learning_rate": 1.6661166814739605e-05, - "loss": 0.0791, - "step": 19730 - }, - { - "epoch": 0.5009518974489149, - "grad_norm": 0.4409730136394501, - "learning_rate": 1.6660320683673904e-05, - "loss": 0.0859, - "step": 19735 - }, - { - "epoch": 0.5010788171087701, - "grad_norm": 0.8377828001976013, - "learning_rate": 1.6659474552608202e-05, - "loss": 0.1105, - "step": 19740 - }, - { - "epoch": 0.5012057367686255, - "grad_norm": 0.7388654351234436, - "learning_rate": 1.6658628421542497e-05, - "loss": 0.1268, - "step": 19745 - }, - { - "epoch": 0.5013326564284808, - "grad_norm": 0.7784233689308167, - "learning_rate": 1.6657782290476796e-05, - "loss": 0.0961, - "step": 19750 - }, - { - "epoch": 0.5014595760883361, - "grad_norm": 0.5578014850616455, - "learning_rate": 1.6656936159411094e-05, - "loss": 0.0881, - "step": 19755 - }, - { - "epoch": 0.5015864957481914, - "grad_norm": 0.44881269335746765, - "learning_rate": 1.6656090028345392e-05, - "loss": 0.0797, - "step": 19760 - }, - { - "epoch": 0.5017134154080467, - "grad_norm": 0.5749704241752625, - "learning_rate": 1.665524389727969e-05, - "loss": 0.0954, - "step": 19765 - }, - { - "epoch": 0.501840335067902, - "grad_norm": 0.6196232438087463, - "learning_rate": 1.665439776621399e-05, - "loss": 0.1056, - "step": 19770 - }, - { - "epoch": 0.5019672547277574, - "grad_norm": 0.9598363637924194, - "learning_rate": 1.6653551635148288e-05, - "loss": 0.1118, - "step": 19775 - }, - { - "epoch": 0.5020941743876126, - "grad_norm": 0.5604069232940674, - "learning_rate": 1.6652705504082586e-05, - "loss": 0.0897, - "step": 19780 - }, - { - "epoch": 0.502221094047468, - "grad_norm": 1.0647603273391724, - "learning_rate": 1.665185937301688e-05, - "loss": 0.1052, - "step": 19785 - }, - { - "epoch": 0.5023480137073233, - "grad_norm": 0.5377844572067261, - "learning_rate": 1.665101324195118e-05, - "loss": 0.0956, - "step": 19790 - }, - { - "epoch": 0.5024749333671785, - "grad_norm": 0.7939673662185669, - "learning_rate": 1.6650167110885478e-05, - "loss": 0.0835, - "step": 19795 - }, - { - "epoch": 0.5026018530270339, - "grad_norm": 0.42631039023399353, - "learning_rate": 1.6649320979819776e-05, - "loss": 0.0747, - "step": 19800 - }, - { - "epoch": 0.5027287726868892, - "grad_norm": 0.6767135262489319, - "learning_rate": 1.664847484875407e-05, - "loss": 0.0878, - "step": 19805 - }, - { - "epoch": 0.5028556923467445, - "grad_norm": 0.5626912713050842, - "learning_rate": 1.664762871768837e-05, - "loss": 0.0948, - "step": 19810 - }, - { - "epoch": 0.5029826120065998, - "grad_norm": 0.4751000702381134, - "learning_rate": 1.6646782586622668e-05, - "loss": 0.0993, - "step": 19815 - }, - { - "epoch": 0.5031095316664551, - "grad_norm": 0.5756491422653198, - "learning_rate": 1.6645936455556966e-05, - "loss": 0.1021, - "step": 19820 - }, - { - "epoch": 0.5032364513263105, - "grad_norm": 0.5661914348602295, - "learning_rate": 1.6645090324491265e-05, - "loss": 0.1062, - "step": 19825 - }, - { - "epoch": 0.5033633709861658, - "grad_norm": 0.9604399800300598, - "learning_rate": 1.6644244193425563e-05, - "loss": 0.1114, - "step": 19830 - }, - { - "epoch": 0.503490290646021, - "grad_norm": 2.3052818775177, - "learning_rate": 1.664339806235986e-05, - "loss": 0.079, - "step": 19835 - }, - { - "epoch": 0.5036172103058764, - "grad_norm": 0.5581837892532349, - "learning_rate": 1.664255193129416e-05, - "loss": 0.0962, - "step": 19840 - }, - { - "epoch": 0.5037441299657317, - "grad_norm": 0.7398168444633484, - "learning_rate": 1.6641705800228455e-05, - "loss": 0.119, - "step": 19845 - }, - { - "epoch": 0.503871049625587, - "grad_norm": 0.46287450194358826, - "learning_rate": 1.6640859669162753e-05, - "loss": 0.0974, - "step": 19850 - }, - { - "epoch": 0.5039979692854423, - "grad_norm": 0.6258445978164673, - "learning_rate": 1.6640013538097052e-05, - "loss": 0.101, - "step": 19855 - }, - { - "epoch": 0.5041248889452976, - "grad_norm": 0.5074719786643982, - "learning_rate": 1.663916740703135e-05, - "loss": 0.09, - "step": 19860 - }, - { - "epoch": 0.504251808605153, - "grad_norm": 0.6754934787750244, - "learning_rate": 1.663832127596565e-05, - "loss": 0.0946, - "step": 19865 - }, - { - "epoch": 0.5043787282650083, - "grad_norm": 0.5471130013465881, - "learning_rate": 1.6637475144899947e-05, - "loss": 0.1141, - "step": 19870 - }, - { - "epoch": 0.5045056479248635, - "grad_norm": 0.7180278897285461, - "learning_rate": 1.6636629013834245e-05, - "loss": 0.0814, - "step": 19875 - }, - { - "epoch": 0.5046325675847189, - "grad_norm": 0.48811355233192444, - "learning_rate": 1.6635782882768544e-05, - "loss": 0.099, - "step": 19880 - }, - { - "epoch": 0.5047594872445742, - "grad_norm": 0.6394610404968262, - "learning_rate": 1.663493675170284e-05, - "loss": 0.1036, - "step": 19885 - }, - { - "epoch": 0.5048864069044295, - "grad_norm": 0.6242973208427429, - "learning_rate": 1.6634090620637137e-05, - "loss": 0.11, - "step": 19890 - }, - { - "epoch": 0.5050133265642848, - "grad_norm": 2.14322829246521, - "learning_rate": 1.6633244489571436e-05, - "loss": 0.1176, - "step": 19895 - }, - { - "epoch": 0.5051402462241401, - "grad_norm": 0.38455262780189514, - "learning_rate": 1.6632398358505734e-05, - "loss": 0.1037, - "step": 19900 - }, - { - "epoch": 0.5052671658839955, - "grad_norm": 0.6989079117774963, - "learning_rate": 1.6631552227440033e-05, - "loss": 0.1157, - "step": 19905 - }, - { - "epoch": 0.5053940855438508, - "grad_norm": 0.5305747985839844, - "learning_rate": 1.663070609637433e-05, - "loss": 0.0927, - "step": 19910 - }, - { - "epoch": 0.505521005203706, - "grad_norm": 0.5454434156417847, - "learning_rate": 1.662985996530863e-05, - "loss": 0.1123, - "step": 19915 - }, - { - "epoch": 0.5056479248635614, - "grad_norm": 0.5282379984855652, - "learning_rate": 1.6629013834242928e-05, - "loss": 0.0786, - "step": 19920 - }, - { - "epoch": 0.5057748445234167, - "grad_norm": 1.351515769958496, - "learning_rate": 1.6628167703177223e-05, - "loss": 0.0986, - "step": 19925 - }, - { - "epoch": 0.505901764183272, - "grad_norm": 0.32827499508857727, - "learning_rate": 1.662732157211152e-05, - "loss": 0.0849, - "step": 19930 - }, - { - "epoch": 0.5060286838431273, - "grad_norm": 0.6324721574783325, - "learning_rate": 1.662647544104582e-05, - "loss": 0.0817, - "step": 19935 - }, - { - "epoch": 0.5061556035029826, - "grad_norm": 0.4307498335838318, - "learning_rate": 1.6625629309980118e-05, - "loss": 0.0892, - "step": 19940 - }, - { - "epoch": 0.506282523162838, - "grad_norm": 0.6163227558135986, - "learning_rate": 1.6624783178914413e-05, - "loss": 0.1029, - "step": 19945 - }, - { - "epoch": 0.5064094428226933, - "grad_norm": 2.131944417953491, - "learning_rate": 1.662393704784871e-05, - "loss": 0.0964, - "step": 19950 - }, - { - "epoch": 0.5065363624825485, - "grad_norm": 0.5297308564186096, - "learning_rate": 1.662309091678301e-05, - "loss": 0.0681, - "step": 19955 - }, - { - "epoch": 0.5066632821424039, - "grad_norm": 0.6266825199127197, - "learning_rate": 1.6622244785717308e-05, - "loss": 0.0838, - "step": 19960 - }, - { - "epoch": 0.5067902018022592, - "grad_norm": 0.7263214588165283, - "learning_rate": 1.6621398654651607e-05, - "loss": 0.1129, - "step": 19965 - }, - { - "epoch": 0.5069171214621144, - "grad_norm": 0.45671477913856506, - "learning_rate": 1.6620552523585905e-05, - "loss": 0.0815, - "step": 19970 - }, - { - "epoch": 0.5070440411219698, - "grad_norm": 0.7688432335853577, - "learning_rate": 1.6619706392520203e-05, - "loss": 0.0876, - "step": 19975 - }, - { - "epoch": 0.5071709607818251, - "grad_norm": 0.8926118612289429, - "learning_rate": 1.6618860261454502e-05, - "loss": 0.0882, - "step": 19980 - }, - { - "epoch": 0.5072978804416804, - "grad_norm": 1.7958054542541504, - "learning_rate": 1.6618014130388797e-05, - "loss": 0.0978, - "step": 19985 - }, - { - "epoch": 0.5074248001015357, - "grad_norm": 0.6260294318199158, - "learning_rate": 1.6617167999323095e-05, - "loss": 0.1047, - "step": 19990 - }, - { - "epoch": 0.507551719761391, - "grad_norm": 0.5420095920562744, - "learning_rate": 1.6616321868257394e-05, - "loss": 0.0884, - "step": 19995 - }, - { - "epoch": 0.5076786394212464, - "grad_norm": 0.637332022190094, - "learning_rate": 1.6615475737191692e-05, - "loss": 0.0948, - "step": 20000 - }, - { - "epoch": 0.5078055590811017, - "grad_norm": 0.5119022130966187, - "learning_rate": 1.661462960612599e-05, - "loss": 0.0993, - "step": 20005 - }, - { - "epoch": 0.5079324787409569, - "grad_norm": 0.4489385783672333, - "learning_rate": 1.661378347506029e-05, - "loss": 0.0759, - "step": 20010 - }, - { - "epoch": 0.5080593984008123, - "grad_norm": 0.9239776730537415, - "learning_rate": 1.6612937343994587e-05, - "loss": 0.105, - "step": 20015 - }, - { - "epoch": 0.5081863180606676, - "grad_norm": 0.5969302654266357, - "learning_rate": 1.6612091212928886e-05, - "loss": 0.0914, - "step": 20020 - }, - { - "epoch": 0.508313237720523, - "grad_norm": 0.8410769701004028, - "learning_rate": 1.661124508186318e-05, - "loss": 0.0731, - "step": 20025 - }, - { - "epoch": 0.5084401573803782, - "grad_norm": 0.46746039390563965, - "learning_rate": 1.661039895079748e-05, - "loss": 0.0912, - "step": 20030 - }, - { - "epoch": 0.5085670770402335, - "grad_norm": 0.532863438129425, - "learning_rate": 1.6609552819731777e-05, - "loss": 0.1054, - "step": 20035 - }, - { - "epoch": 0.5086939967000889, - "grad_norm": 0.560094952583313, - "learning_rate": 1.6608706688666076e-05, - "loss": 0.1137, - "step": 20040 - }, - { - "epoch": 0.5088209163599442, - "grad_norm": 0.652804434299469, - "learning_rate": 1.6607860557600374e-05, - "loss": 0.124, - "step": 20045 - }, - { - "epoch": 0.5089478360197994, - "grad_norm": 0.8804638385772705, - "learning_rate": 1.6607014426534673e-05, - "loss": 0.097, - "step": 20050 - }, - { - "epoch": 0.5090747556796548, - "grad_norm": 0.5823671817779541, - "learning_rate": 1.660616829546897e-05, - "loss": 0.0943, - "step": 20055 - }, - { - "epoch": 0.5092016753395101, - "grad_norm": 0.6465955376625061, - "learning_rate": 1.660532216440327e-05, - "loss": 0.121, - "step": 20060 - }, - { - "epoch": 0.5093285949993654, - "grad_norm": 0.6377924084663391, - "learning_rate": 1.6604476033337564e-05, - "loss": 0.0961, - "step": 20065 - }, - { - "epoch": 0.5094555146592207, - "grad_norm": 0.6566052436828613, - "learning_rate": 1.6603629902271863e-05, - "loss": 0.1093, - "step": 20070 - }, - { - "epoch": 0.509582434319076, - "grad_norm": 0.6621702313423157, - "learning_rate": 1.660278377120616e-05, - "loss": 0.0856, - "step": 20075 - }, - { - "epoch": 0.5097093539789314, - "grad_norm": 0.6070704460144043, - "learning_rate": 1.660193764014046e-05, - "loss": 0.1, - "step": 20080 - }, - { - "epoch": 0.5098362736387867, - "grad_norm": 0.6874863505363464, - "learning_rate": 1.6601091509074755e-05, - "loss": 0.085, - "step": 20085 - }, - { - "epoch": 0.5099631932986419, - "grad_norm": 0.6172812581062317, - "learning_rate": 1.6600245378009053e-05, - "loss": 0.103, - "step": 20090 - }, - { - "epoch": 0.5100901129584973, - "grad_norm": 1.7730872631072998, - "learning_rate": 1.659939924694335e-05, - "loss": 0.1064, - "step": 20095 - }, - { - "epoch": 0.5102170326183526, - "grad_norm": 0.45988649129867554, - "learning_rate": 1.659855311587765e-05, - "loss": 0.0894, - "step": 20100 - }, - { - "epoch": 0.510343952278208, - "grad_norm": 0.5985777378082275, - "learning_rate": 1.6597706984811948e-05, - "loss": 0.1085, - "step": 20105 - }, - { - "epoch": 0.5104708719380632, - "grad_norm": 0.45311930775642395, - "learning_rate": 1.6596860853746247e-05, - "loss": 0.0945, - "step": 20110 - }, - { - "epoch": 0.5105977915979185, - "grad_norm": 0.37515467405319214, - "learning_rate": 1.6596014722680545e-05, - "loss": 0.075, - "step": 20115 - }, - { - "epoch": 0.5107247112577739, - "grad_norm": 0.37705013155937195, - "learning_rate": 1.6595168591614843e-05, - "loss": 0.0726, - "step": 20120 - }, - { - "epoch": 0.5108516309176292, - "grad_norm": 0.8580511212348938, - "learning_rate": 1.659432246054914e-05, - "loss": 0.0855, - "step": 20125 - }, - { - "epoch": 0.5109785505774844, - "grad_norm": 0.6302055716514587, - "learning_rate": 1.6593476329483437e-05, - "loss": 0.1296, - "step": 20130 - }, - { - "epoch": 0.5111054702373398, - "grad_norm": 0.5887080430984497, - "learning_rate": 1.6592630198417735e-05, - "loss": 0.1075, - "step": 20135 - }, - { - "epoch": 0.5112323898971951, - "grad_norm": 0.3792460560798645, - "learning_rate": 1.6591784067352034e-05, - "loss": 0.079, - "step": 20140 - }, - { - "epoch": 0.5113593095570503, - "grad_norm": 0.7752928733825684, - "learning_rate": 1.6590937936286332e-05, - "loss": 0.0999, - "step": 20145 - }, - { - "epoch": 0.5114862292169057, - "grad_norm": 0.5385003685951233, - "learning_rate": 1.659009180522063e-05, - "loss": 0.106, - "step": 20150 - }, - { - "epoch": 0.511613148876761, - "grad_norm": 0.45954132080078125, - "learning_rate": 1.658924567415493e-05, - "loss": 0.0768, - "step": 20155 - }, - { - "epoch": 0.5117400685366164, - "grad_norm": 0.6186748147010803, - "learning_rate": 1.6588399543089227e-05, - "loss": 0.0924, - "step": 20160 - }, - { - "epoch": 0.5118669881964716, - "grad_norm": 0.5684992074966431, - "learning_rate": 1.6587553412023526e-05, - "loss": 0.1071, - "step": 20165 - }, - { - "epoch": 0.5119939078563269, - "grad_norm": 0.5828484892845154, - "learning_rate": 1.658670728095782e-05, - "loss": 0.1076, - "step": 20170 - }, - { - "epoch": 0.5121208275161823, - "grad_norm": 0.4090133011341095, - "learning_rate": 1.658586114989212e-05, - "loss": 0.0767, - "step": 20175 - }, - { - "epoch": 0.5122477471760376, - "grad_norm": 0.39908725023269653, - "learning_rate": 1.6585015018826418e-05, - "loss": 0.0781, - "step": 20180 - }, - { - "epoch": 0.5123746668358928, - "grad_norm": 0.7353113293647766, - "learning_rate": 1.6584168887760716e-05, - "loss": 0.0791, - "step": 20185 - }, - { - "epoch": 0.5125015864957482, - "grad_norm": 0.6577494740486145, - "learning_rate": 1.6583322756695014e-05, - "loss": 0.1037, - "step": 20190 - }, - { - "epoch": 0.5126285061556035, - "grad_norm": 1.2765203714370728, - "learning_rate": 1.6582476625629313e-05, - "loss": 0.1045, - "step": 20195 - }, - { - "epoch": 0.5127554258154589, - "grad_norm": 0.7477211356163025, - "learning_rate": 1.658163049456361e-05, - "loss": 0.1206, - "step": 20200 - }, - { - "epoch": 0.5128823454753141, - "grad_norm": 0.6470760107040405, - "learning_rate": 1.658078436349791e-05, - "loss": 0.1044, - "step": 20205 - }, - { - "epoch": 0.5130092651351694, - "grad_norm": 0.8798675537109375, - "learning_rate": 1.6579938232432205e-05, - "loss": 0.0935, - "step": 20210 - }, - { - "epoch": 0.5131361847950248, - "grad_norm": 0.7529031038284302, - "learning_rate": 1.6579092101366503e-05, - "loss": 0.0839, - "step": 20215 - }, - { - "epoch": 0.5132631044548801, - "grad_norm": 0.7313467860221863, - "learning_rate": 1.65782459703008e-05, - "loss": 0.087, - "step": 20220 - }, - { - "epoch": 0.5133900241147353, - "grad_norm": 0.6592637896537781, - "learning_rate": 1.65773998392351e-05, - "loss": 0.0845, - "step": 20225 - }, - { - "epoch": 0.5135169437745907, - "grad_norm": 0.4067671000957489, - "learning_rate": 1.6576553708169395e-05, - "loss": 0.1067, - "step": 20230 - }, - { - "epoch": 0.513643863434446, - "grad_norm": 1.3131901025772095, - "learning_rate": 1.6575707577103693e-05, - "loss": 0.0835, - "step": 20235 - }, - { - "epoch": 0.5137707830943014, - "grad_norm": 0.441041499376297, - "learning_rate": 1.657486144603799e-05, - "loss": 0.1061, - "step": 20240 - }, - { - "epoch": 0.5138977027541566, - "grad_norm": 1.004672884941101, - "learning_rate": 1.657401531497229e-05, - "loss": 0.0911, - "step": 20245 - }, - { - "epoch": 0.5140246224140119, - "grad_norm": 0.7555381059646606, - "learning_rate": 1.657316918390659e-05, - "loss": 0.0961, - "step": 20250 - }, - { - "epoch": 0.5141515420738673, - "grad_norm": 0.7465673685073853, - "learning_rate": 1.6572323052840887e-05, - "loss": 0.0892, - "step": 20255 - }, - { - "epoch": 0.5142784617337226, - "grad_norm": 0.8479891419410706, - "learning_rate": 1.6571476921775185e-05, - "loss": 0.0953, - "step": 20260 - }, - { - "epoch": 0.5144053813935778, - "grad_norm": 0.4479544460773468, - "learning_rate": 1.6570630790709484e-05, - "loss": 0.0861, - "step": 20265 - }, - { - "epoch": 0.5145323010534332, - "grad_norm": 0.6327027678489685, - "learning_rate": 1.656978465964378e-05, - "loss": 0.1084, - "step": 20270 - }, - { - "epoch": 0.5146592207132885, - "grad_norm": 0.48268890380859375, - "learning_rate": 1.6568938528578077e-05, - "loss": 0.0872, - "step": 20275 - }, - { - "epoch": 0.5147861403731439, - "grad_norm": 0.5535205602645874, - "learning_rate": 1.6568092397512375e-05, - "loss": 0.076, - "step": 20280 - }, - { - "epoch": 0.5149130600329991, - "grad_norm": 0.5264055132865906, - "learning_rate": 1.6567246266446674e-05, - "loss": 0.0855, - "step": 20285 - }, - { - "epoch": 0.5150399796928544, - "grad_norm": 0.5739513039588928, - "learning_rate": 1.6566400135380972e-05, - "loss": 0.1, - "step": 20290 - }, - { - "epoch": 0.5151668993527098, - "grad_norm": 0.5062876343727112, - "learning_rate": 1.656555400431527e-05, - "loss": 0.1231, - "step": 20295 - }, - { - "epoch": 0.515293819012565, - "grad_norm": 1.2796211242675781, - "learning_rate": 1.656470787324957e-05, - "loss": 0.0849, - "step": 20300 - }, - { - "epoch": 0.5154207386724203, - "grad_norm": 0.6284975409507751, - "learning_rate": 1.6563861742183867e-05, - "loss": 0.0874, - "step": 20305 - }, - { - "epoch": 0.5155476583322757, - "grad_norm": 0.5426923632621765, - "learning_rate": 1.6563015611118162e-05, - "loss": 0.0802, - "step": 20310 - }, - { - "epoch": 0.515674577992131, - "grad_norm": 0.5967511534690857, - "learning_rate": 1.656216948005246e-05, - "loss": 0.1097, - "step": 20315 - }, - { - "epoch": 0.5158014976519862, - "grad_norm": 0.4320564866065979, - "learning_rate": 1.656132334898676e-05, - "loss": 0.0799, - "step": 20320 - }, - { - "epoch": 0.5159284173118416, - "grad_norm": 0.6252341270446777, - "learning_rate": 1.6560477217921058e-05, - "loss": 0.0794, - "step": 20325 - }, - { - "epoch": 0.5160553369716969, - "grad_norm": 0.9294304847717285, - "learning_rate": 1.6559631086855356e-05, - "loss": 0.0846, - "step": 20330 - }, - { - "epoch": 0.5161822566315523, - "grad_norm": 0.4284333884716034, - "learning_rate": 1.6558784955789654e-05, - "loss": 0.0794, - "step": 20335 - }, - { - "epoch": 0.5163091762914075, - "grad_norm": 0.7344610095024109, - "learning_rate": 1.6557938824723953e-05, - "loss": 0.11, - "step": 20340 - }, - { - "epoch": 0.5164360959512628, - "grad_norm": 0.4932243824005127, - "learning_rate": 1.655709269365825e-05, - "loss": 0.097, - "step": 20345 - }, - { - "epoch": 0.5165630156111182, - "grad_norm": 0.5826136469841003, - "learning_rate": 1.6556246562592546e-05, - "loss": 0.0843, - "step": 20350 - }, - { - "epoch": 0.5166899352709735, - "grad_norm": 0.5660582780838013, - "learning_rate": 1.6555400431526845e-05, - "loss": 0.0931, - "step": 20355 - }, - { - "epoch": 0.5168168549308287, - "grad_norm": 0.42824915051460266, - "learning_rate": 1.6554554300461143e-05, - "loss": 0.1151, - "step": 20360 - }, - { - "epoch": 0.5169437745906841, - "grad_norm": 0.689601480960846, - "learning_rate": 1.655370816939544e-05, - "loss": 0.0854, - "step": 20365 - }, - { - "epoch": 0.5170706942505394, - "grad_norm": 1.1358097791671753, - "learning_rate": 1.6552862038329736e-05, - "loss": 0.1022, - "step": 20370 - }, - { - "epoch": 0.5171976139103948, - "grad_norm": 0.3622950613498688, - "learning_rate": 1.6552015907264035e-05, - "loss": 0.0761, - "step": 20375 - }, - { - "epoch": 0.51732453357025, - "grad_norm": 0.5106274485588074, - "learning_rate": 1.6551169776198333e-05, - "loss": 0.0843, - "step": 20380 - }, - { - "epoch": 0.5174514532301053, - "grad_norm": 0.5436364412307739, - "learning_rate": 1.655032364513263e-05, - "loss": 0.1041, - "step": 20385 - }, - { - "epoch": 0.5175783728899607, - "grad_norm": 0.7267700433731079, - "learning_rate": 1.654947751406693e-05, - "loss": 0.0818, - "step": 20390 - }, - { - "epoch": 0.517705292549816, - "grad_norm": 0.6781760454177856, - "learning_rate": 1.654863138300123e-05, - "loss": 0.0986, - "step": 20395 - }, - { - "epoch": 0.5178322122096712, - "grad_norm": 0.9434118270874023, - "learning_rate": 1.6547785251935527e-05, - "loss": 0.0935, - "step": 20400 - }, - { - "epoch": 0.5179591318695266, - "grad_norm": 1.3519322872161865, - "learning_rate": 1.6546939120869825e-05, - "loss": 0.0822, - "step": 20405 - }, - { - "epoch": 0.5180860515293819, - "grad_norm": 0.647814929485321, - "learning_rate": 1.654609298980412e-05, - "loss": 0.0844, - "step": 20410 - }, - { - "epoch": 0.5182129711892373, - "grad_norm": 0.5351223349571228, - "learning_rate": 1.654524685873842e-05, - "loss": 0.0989, - "step": 20415 - }, - { - "epoch": 0.5183398908490925, - "grad_norm": 2.2951409816741943, - "learning_rate": 1.6544400727672717e-05, - "loss": 0.0929, - "step": 20420 - }, - { - "epoch": 0.5184668105089478, - "grad_norm": 2.1591110229492188, - "learning_rate": 1.6543554596607016e-05, - "loss": 0.0845, - "step": 20425 - }, - { - "epoch": 0.5185937301688032, - "grad_norm": 0.7073385119438171, - "learning_rate": 1.6542708465541314e-05, - "loss": 0.0907, - "step": 20430 - }, - { - "epoch": 0.5187206498286585, - "grad_norm": 0.5487199425697327, - "learning_rate": 1.6541862334475612e-05, - "loss": 0.098, - "step": 20435 - }, - { - "epoch": 0.5188475694885137, - "grad_norm": 0.5285652279853821, - "learning_rate": 1.654101620340991e-05, - "loss": 0.0935, - "step": 20440 - }, - { - "epoch": 0.5189744891483691, - "grad_norm": 1.1740927696228027, - "learning_rate": 1.654017007234421e-05, - "loss": 0.1191, - "step": 20445 - }, - { - "epoch": 0.5191014088082244, - "grad_norm": 0.6672606468200684, - "learning_rate": 1.6539323941278504e-05, - "loss": 0.0991, - "step": 20450 - }, - { - "epoch": 0.5192283284680798, - "grad_norm": 0.4592505991458893, - "learning_rate": 1.6538477810212803e-05, - "loss": 0.1038, - "step": 20455 - }, - { - "epoch": 0.519355248127935, - "grad_norm": 0.655148983001709, - "learning_rate": 1.65376316791471e-05, - "loss": 0.1361, - "step": 20460 - }, - { - "epoch": 0.5194821677877903, - "grad_norm": 0.7637618184089661, - "learning_rate": 1.65367855480814e-05, - "loss": 0.0804, - "step": 20465 - }, - { - "epoch": 0.5196090874476457, - "grad_norm": 0.34097957611083984, - "learning_rate": 1.6535939417015698e-05, - "loss": 0.0921, - "step": 20470 - }, - { - "epoch": 0.5197360071075009, - "grad_norm": 0.4685797095298767, - "learning_rate": 1.6535093285949996e-05, - "loss": 0.0944, - "step": 20475 - }, - { - "epoch": 0.5198629267673562, - "grad_norm": 0.4168073236942291, - "learning_rate": 1.6534247154884295e-05, - "loss": 0.0984, - "step": 20480 - }, - { - "epoch": 0.5199898464272116, - "grad_norm": 0.3822668194770813, - "learning_rate": 1.6533401023818593e-05, - "loss": 0.0791, - "step": 20485 - }, - { - "epoch": 0.5201167660870669, - "grad_norm": 0.5969893932342529, - "learning_rate": 1.6532554892752888e-05, - "loss": 0.0899, - "step": 20490 - }, - { - "epoch": 0.5202436857469221, - "grad_norm": 1.1511178016662598, - "learning_rate": 1.6531708761687186e-05, - "loss": 0.0757, - "step": 20495 - }, - { - "epoch": 0.5203706054067775, - "grad_norm": 1.3968865871429443, - "learning_rate": 1.6530862630621485e-05, - "loss": 0.106, - "step": 20500 - }, - { - "epoch": 0.5204975250666328, - "grad_norm": 0.3670293986797333, - "learning_rate": 1.6530016499555783e-05, - "loss": 0.0855, - "step": 20505 - }, - { - "epoch": 0.5206244447264882, - "grad_norm": 0.3750878572463989, - "learning_rate": 1.6529170368490078e-05, - "loss": 0.1151, - "step": 20510 - }, - { - "epoch": 0.5207513643863434, - "grad_norm": 0.4474570155143738, - "learning_rate": 1.6528324237424377e-05, - "loss": 0.0698, - "step": 20515 - }, - { - "epoch": 0.5208782840461987, - "grad_norm": 0.4369509518146515, - "learning_rate": 1.6527478106358675e-05, - "loss": 0.1009, - "step": 20520 - }, - { - "epoch": 0.5210052037060541, - "grad_norm": 0.5507971048355103, - "learning_rate": 1.6526631975292973e-05, - "loss": 0.0696, - "step": 20525 - }, - { - "epoch": 0.5211321233659094, - "grad_norm": 0.40282124280929565, - "learning_rate": 1.6525785844227272e-05, - "loss": 0.0858, - "step": 20530 - }, - { - "epoch": 0.5212590430257646, - "grad_norm": 0.48903146386146545, - "learning_rate": 1.652493971316157e-05, - "loss": 0.1188, - "step": 20535 - }, - { - "epoch": 0.52138596268562, - "grad_norm": 0.7884875535964966, - "learning_rate": 1.652409358209587e-05, - "loss": 0.1095, - "step": 20540 - }, - { - "epoch": 0.5215128823454753, - "grad_norm": 0.73907870054245, - "learning_rate": 1.6523247451030167e-05, - "loss": 0.0934, - "step": 20545 - }, - { - "epoch": 0.5216398020053307, - "grad_norm": 0.7124572396278381, - "learning_rate": 1.6522401319964462e-05, - "loss": 0.0886, - "step": 20550 - }, - { - "epoch": 0.5217667216651859, - "grad_norm": 0.6584247946739197, - "learning_rate": 1.652155518889876e-05, - "loss": 0.0859, - "step": 20555 - }, - { - "epoch": 0.5218936413250412, - "grad_norm": 0.5141717791557312, - "learning_rate": 1.652070905783306e-05, - "loss": 0.0908, - "step": 20560 - }, - { - "epoch": 0.5220205609848966, - "grad_norm": 0.8257293701171875, - "learning_rate": 1.6519862926767357e-05, - "loss": 0.0974, - "step": 20565 - }, - { - "epoch": 0.5221474806447519, - "grad_norm": 0.7016509175300598, - "learning_rate": 1.6519016795701656e-05, - "loss": 0.1049, - "step": 20570 - }, - { - "epoch": 0.5222744003046071, - "grad_norm": 0.6066516041755676, - "learning_rate": 1.6518170664635954e-05, - "loss": 0.1004, - "step": 20575 - }, - { - "epoch": 0.5224013199644625, - "grad_norm": 0.5293445587158203, - "learning_rate": 1.6517324533570252e-05, - "loss": 0.0865, - "step": 20580 - }, - { - "epoch": 0.5225282396243178, - "grad_norm": 0.7130727171897888, - "learning_rate": 1.651647840250455e-05, - "loss": 0.0866, - "step": 20585 - }, - { - "epoch": 0.5226551592841732, - "grad_norm": 0.4824776351451874, - "learning_rate": 1.6515632271438846e-05, - "loss": 0.0793, - "step": 20590 - }, - { - "epoch": 0.5227820789440284, - "grad_norm": 0.6968736052513123, - "learning_rate": 1.6514786140373144e-05, - "loss": 0.0994, - "step": 20595 - }, - { - "epoch": 0.5229089986038837, - "grad_norm": 0.44160792231559753, - "learning_rate": 1.6513940009307443e-05, - "loss": 0.0984, - "step": 20600 - }, - { - "epoch": 0.5230359182637391, - "grad_norm": 0.37844327092170715, - "learning_rate": 1.651309387824174e-05, - "loss": 0.0833, - "step": 20605 - }, - { - "epoch": 0.5231628379235944, - "grad_norm": 0.489725261926651, - "learning_rate": 1.651224774717604e-05, - "loss": 0.0928, - "step": 20610 - }, - { - "epoch": 0.5232897575834496, - "grad_norm": 0.5349194407463074, - "learning_rate": 1.6511401616110338e-05, - "loss": 0.1044, - "step": 20615 - }, - { - "epoch": 0.523416677243305, - "grad_norm": 0.6086074709892273, - "learning_rate": 1.6510555485044636e-05, - "loss": 0.0651, - "step": 20620 - }, - { - "epoch": 0.5235435969031603, - "grad_norm": 0.663662314414978, - "learning_rate": 1.6509709353978935e-05, - "loss": 0.09, - "step": 20625 - }, - { - "epoch": 0.5236705165630157, - "grad_norm": 0.43929603695869446, - "learning_rate": 1.650886322291323e-05, - "loss": 0.0918, - "step": 20630 - }, - { - "epoch": 0.5237974362228709, - "grad_norm": 0.42469340562820435, - "learning_rate": 1.6508017091847528e-05, - "loss": 0.0882, - "step": 20635 - }, - { - "epoch": 0.5239243558827262, - "grad_norm": 0.44438326358795166, - "learning_rate": 1.6507170960781826e-05, - "loss": 0.0815, - "step": 20640 - }, - { - "epoch": 0.5240512755425816, - "grad_norm": 0.7700662612915039, - "learning_rate": 1.6506324829716125e-05, - "loss": 0.0828, - "step": 20645 - }, - { - "epoch": 0.5241781952024368, - "grad_norm": 0.6394724249839783, - "learning_rate": 1.6505478698650423e-05, - "loss": 0.0983, - "step": 20650 - }, - { - "epoch": 0.5243051148622921, - "grad_norm": 0.6059297919273376, - "learning_rate": 1.6504632567584718e-05, - "loss": 0.1084, - "step": 20655 - }, - { - "epoch": 0.5244320345221475, - "grad_norm": 1.1678580045700073, - "learning_rate": 1.6503786436519017e-05, - "loss": 0.1011, - "step": 20660 - }, - { - "epoch": 0.5245589541820028, - "grad_norm": 0.9446230530738831, - "learning_rate": 1.6502940305453315e-05, - "loss": 0.1003, - "step": 20665 - }, - { - "epoch": 0.524685873841858, - "grad_norm": 0.5949775576591492, - "learning_rate": 1.6502094174387613e-05, - "loss": 0.0964, - "step": 20670 - }, - { - "epoch": 0.5248127935017134, - "grad_norm": 0.6916714310646057, - "learning_rate": 1.6501248043321912e-05, - "loss": 0.0867, - "step": 20675 - }, - { - "epoch": 0.5249397131615687, - "grad_norm": 0.5396777391433716, - "learning_rate": 1.650040191225621e-05, - "loss": 0.0975, - "step": 20680 - }, - { - "epoch": 0.5250666328214241, - "grad_norm": 0.45845314860343933, - "learning_rate": 1.649955578119051e-05, - "loss": 0.0772, - "step": 20685 - }, - { - "epoch": 0.5251935524812793, - "grad_norm": 0.6664096713066101, - "learning_rate": 1.6498709650124807e-05, - "loss": 0.0824, - "step": 20690 - }, - { - "epoch": 0.5253204721411346, - "grad_norm": 1.1835532188415527, - "learning_rate": 1.6497863519059102e-05, - "loss": 0.0977, - "step": 20695 - }, - { - "epoch": 0.52544739180099, - "grad_norm": 0.32027754187583923, - "learning_rate": 1.64970173879934e-05, - "loss": 0.0889, - "step": 20700 - }, - { - "epoch": 0.5255743114608453, - "grad_norm": 0.7077451944351196, - "learning_rate": 1.64961712569277e-05, - "loss": 0.0981, - "step": 20705 - }, - { - "epoch": 0.5257012311207006, - "grad_norm": 0.6798565983772278, - "learning_rate": 1.6495325125861997e-05, - "loss": 0.08, - "step": 20710 - }, - { - "epoch": 0.5258281507805559, - "grad_norm": 0.7080118656158447, - "learning_rate": 1.6494478994796296e-05, - "loss": 0.089, - "step": 20715 - }, - { - "epoch": 0.5259550704404112, - "grad_norm": 0.6248927116394043, - "learning_rate": 1.6493632863730594e-05, - "loss": 0.0866, - "step": 20720 - }, - { - "epoch": 0.5260819901002666, - "grad_norm": 0.9081325531005859, - "learning_rate": 1.6492786732664893e-05, - "loss": 0.0975, - "step": 20725 - }, - { - "epoch": 0.5262089097601218, - "grad_norm": 0.584141194820404, - "learning_rate": 1.649194060159919e-05, - "loss": 0.0996, - "step": 20730 - }, - { - "epoch": 0.5263358294199771, - "grad_norm": 0.6735355854034424, - "learning_rate": 1.6491094470533486e-05, - "loss": 0.0993, - "step": 20735 - }, - { - "epoch": 0.5264627490798325, - "grad_norm": 0.5204603672027588, - "learning_rate": 1.6490248339467784e-05, - "loss": 0.0949, - "step": 20740 - }, - { - "epoch": 0.5265896687396878, - "grad_norm": 0.6178103089332581, - "learning_rate": 1.6489402208402083e-05, - "loss": 0.1132, - "step": 20745 - }, - { - "epoch": 0.526716588399543, - "grad_norm": 0.43411627411842346, - "learning_rate": 1.648855607733638e-05, - "loss": 0.087, - "step": 20750 - }, - { - "epoch": 0.5268435080593984, - "grad_norm": 0.5160171985626221, - "learning_rate": 1.648770994627068e-05, - "loss": 0.0962, - "step": 20755 - }, - { - "epoch": 0.5269704277192537, - "grad_norm": 0.852611780166626, - "learning_rate": 1.6486863815204978e-05, - "loss": 0.0884, - "step": 20760 - }, - { - "epoch": 0.5270973473791091, - "grad_norm": 0.5411545634269714, - "learning_rate": 1.6486017684139276e-05, - "loss": 0.0931, - "step": 20765 - }, - { - "epoch": 0.5272242670389643, - "grad_norm": 0.7467847466468811, - "learning_rate": 1.6485171553073575e-05, - "loss": 0.1056, - "step": 20770 - }, - { - "epoch": 0.5273511866988196, - "grad_norm": 0.48939597606658936, - "learning_rate": 1.648432542200787e-05, - "loss": 0.0643, - "step": 20775 - }, - { - "epoch": 0.527478106358675, - "grad_norm": 0.8183727860450745, - "learning_rate": 1.6483479290942168e-05, - "loss": 0.0963, - "step": 20780 - }, - { - "epoch": 0.5276050260185303, - "grad_norm": 0.4735550582408905, - "learning_rate": 1.6482633159876467e-05, - "loss": 0.0995, - "step": 20785 - }, - { - "epoch": 0.5277319456783856, - "grad_norm": 0.5313462018966675, - "learning_rate": 1.6481787028810765e-05, - "loss": 0.1082, - "step": 20790 - }, - { - "epoch": 0.5278588653382409, - "grad_norm": 0.6248774528503418, - "learning_rate": 1.648094089774506e-05, - "loss": 0.0925, - "step": 20795 - }, - { - "epoch": 0.5279857849980962, - "grad_norm": 0.5131957530975342, - "learning_rate": 1.648009476667936e-05, - "loss": 0.0943, - "step": 20800 - }, - { - "epoch": 0.5281127046579516, - "grad_norm": 0.5045276880264282, - "learning_rate": 1.6479248635613657e-05, - "loss": 0.0897, - "step": 20805 - }, - { - "epoch": 0.5282396243178068, - "grad_norm": 0.5670545101165771, - "learning_rate": 1.6478402504547955e-05, - "loss": 0.1005, - "step": 20810 - }, - { - "epoch": 0.5283665439776621, - "grad_norm": 0.5490085482597351, - "learning_rate": 1.6477556373482254e-05, - "loss": 0.1016, - "step": 20815 - }, - { - "epoch": 0.5284934636375175, - "grad_norm": 0.5523887872695923, - "learning_rate": 1.6476710242416552e-05, - "loss": 0.0932, - "step": 20820 - }, - { - "epoch": 0.5286203832973727, - "grad_norm": 0.6080774068832397, - "learning_rate": 1.647586411135085e-05, - "loss": 0.0842, - "step": 20825 - }, - { - "epoch": 0.528747302957228, - "grad_norm": 0.49810484051704407, - "learning_rate": 1.647501798028515e-05, - "loss": 0.066, - "step": 20830 - }, - { - "epoch": 0.5288742226170834, - "grad_norm": 0.7334206700325012, - "learning_rate": 1.6474171849219444e-05, - "loss": 0.0894, - "step": 20835 - }, - { - "epoch": 0.5290011422769387, - "grad_norm": 0.453229695558548, - "learning_rate": 1.6473325718153742e-05, - "loss": 0.0845, - "step": 20840 - }, - { - "epoch": 0.529128061936794, - "grad_norm": 0.4836370348930359, - "learning_rate": 1.647247958708804e-05, - "loss": 0.0739, - "step": 20845 - }, - { - "epoch": 0.5292549815966493, - "grad_norm": 0.4656808078289032, - "learning_rate": 1.647163345602234e-05, - "loss": 0.089, - "step": 20850 - }, - { - "epoch": 0.5293819012565046, - "grad_norm": 0.5807144045829773, - "learning_rate": 1.6470787324956637e-05, - "loss": 0.0835, - "step": 20855 - }, - { - "epoch": 0.52950882091636, - "grad_norm": 0.926331639289856, - "learning_rate": 1.6469941193890936e-05, - "loss": 0.0836, - "step": 20860 - }, - { - "epoch": 0.5296357405762152, - "grad_norm": 0.736509382724762, - "learning_rate": 1.6469095062825234e-05, - "loss": 0.092, - "step": 20865 - }, - { - "epoch": 0.5297626602360705, - "grad_norm": 0.513617753982544, - "learning_rate": 1.6468248931759533e-05, - "loss": 0.0919, - "step": 20870 - }, - { - "epoch": 0.5298895798959259, - "grad_norm": 0.45135053992271423, - "learning_rate": 1.6467402800693828e-05, - "loss": 0.0755, - "step": 20875 - }, - { - "epoch": 0.5300164995557812, - "grad_norm": 0.5973974466323853, - "learning_rate": 1.6466556669628126e-05, - "loss": 0.0854, - "step": 20880 - }, - { - "epoch": 0.5301434192156365, - "grad_norm": 0.7075784802436829, - "learning_rate": 1.6465710538562424e-05, - "loss": 0.0937, - "step": 20885 - }, - { - "epoch": 0.5302703388754918, - "grad_norm": 0.8178196549415588, - "learning_rate": 1.6464864407496723e-05, - "loss": 0.1071, - "step": 20890 - }, - { - "epoch": 0.5303972585353471, - "grad_norm": 0.43611299991607666, - "learning_rate": 1.646401827643102e-05, - "loss": 0.0952, - "step": 20895 - }, - { - "epoch": 0.5305241781952025, - "grad_norm": 0.7396659255027771, - "learning_rate": 1.646317214536532e-05, - "loss": 0.0901, - "step": 20900 - }, - { - "epoch": 0.5306510978550577, - "grad_norm": 0.4596092104911804, - "learning_rate": 1.6462326014299618e-05, - "loss": 0.0701, - "step": 20905 - }, - { - "epoch": 0.530778017514913, - "grad_norm": 0.4946272671222687, - "learning_rate": 1.6461479883233916e-05, - "loss": 0.1056, - "step": 20910 - }, - { - "epoch": 0.5309049371747684, - "grad_norm": 0.47143301367759705, - "learning_rate": 1.646063375216821e-05, - "loss": 0.0945, - "step": 20915 - }, - { - "epoch": 0.5310318568346237, - "grad_norm": 0.647006630897522, - "learning_rate": 1.645978762110251e-05, - "loss": 0.1053, - "step": 20920 - }, - { - "epoch": 0.531158776494479, - "grad_norm": 0.6273084878921509, - "learning_rate": 1.6458941490036808e-05, - "loss": 0.0928, - "step": 20925 - }, - { - "epoch": 0.5312856961543343, - "grad_norm": 0.6034598350524902, - "learning_rate": 1.6458095358971107e-05, - "loss": 0.1205, - "step": 20930 - }, - { - "epoch": 0.5314126158141896, - "grad_norm": 0.5233375430107117, - "learning_rate": 1.64572492279054e-05, - "loss": 0.0805, - "step": 20935 - }, - { - "epoch": 0.531539535474045, - "grad_norm": 0.8237043023109436, - "learning_rate": 1.64564030968397e-05, - "loss": 0.0795, - "step": 20940 - }, - { - "epoch": 0.5316664551339002, - "grad_norm": 0.31523752212524414, - "learning_rate": 1.6455556965774e-05, - "loss": 0.0719, - "step": 20945 - }, - { - "epoch": 0.5317933747937555, - "grad_norm": 0.5870101451873779, - "learning_rate": 1.6454710834708297e-05, - "loss": 0.0895, - "step": 20950 - }, - { - "epoch": 0.5319202944536109, - "grad_norm": 0.4993893802165985, - "learning_rate": 1.6453864703642595e-05, - "loss": 0.0842, - "step": 20955 - }, - { - "epoch": 0.5320472141134662, - "grad_norm": 0.944998025894165, - "learning_rate": 1.6453018572576894e-05, - "loss": 0.0955, - "step": 20960 - }, - { - "epoch": 0.5321741337733215, - "grad_norm": 0.6228821873664856, - "learning_rate": 1.6452172441511192e-05, - "loss": 0.0904, - "step": 20965 - }, - { - "epoch": 0.5323010534331768, - "grad_norm": 0.5925621390342712, - "learning_rate": 1.645132631044549e-05, - "loss": 0.0875, - "step": 20970 - }, - { - "epoch": 0.5324279730930321, - "grad_norm": 0.3984404504299164, - "learning_rate": 1.6450480179379786e-05, - "loss": 0.1032, - "step": 20975 - }, - { - "epoch": 0.5325548927528875, - "grad_norm": 0.7396990656852722, - "learning_rate": 1.6449634048314084e-05, - "loss": 0.0892, - "step": 20980 - }, - { - "epoch": 0.5326818124127427, - "grad_norm": 0.6437301635742188, - "learning_rate": 1.6448787917248382e-05, - "loss": 0.079, - "step": 20985 - }, - { - "epoch": 0.532808732072598, - "grad_norm": 0.48075398802757263, - "learning_rate": 1.644794178618268e-05, - "loss": 0.0783, - "step": 20990 - }, - { - "epoch": 0.5329356517324534, - "grad_norm": 0.8263614773750305, - "learning_rate": 1.644709565511698e-05, - "loss": 0.0931, - "step": 20995 - }, - { - "epoch": 0.5330625713923086, - "grad_norm": 0.36074769496917725, - "learning_rate": 1.6446249524051278e-05, - "loss": 0.0726, - "step": 21000 - }, - { - "epoch": 0.533189491052164, - "grad_norm": 0.3912588059902191, - "learning_rate": 1.6445403392985576e-05, - "loss": 0.0693, - "step": 21005 - }, - { - "epoch": 0.5333164107120193, - "grad_norm": 0.587096095085144, - "learning_rate": 1.6444557261919874e-05, - "loss": 0.0714, - "step": 21010 - }, - { - "epoch": 0.5334433303718746, - "grad_norm": 0.5990480184555054, - "learning_rate": 1.644371113085417e-05, - "loss": 0.0907, - "step": 21015 - }, - { - "epoch": 0.5335702500317299, - "grad_norm": 0.7133907079696655, - "learning_rate": 1.6442864999788468e-05, - "loss": 0.065, - "step": 21020 - }, - { - "epoch": 0.5336971696915852, - "grad_norm": 0.547145426273346, - "learning_rate": 1.6442018868722766e-05, - "loss": 0.0731, - "step": 21025 - }, - { - "epoch": 0.5338240893514405, - "grad_norm": 0.6503618359565735, - "learning_rate": 1.6441172737657065e-05, - "loss": 0.0833, - "step": 21030 - }, - { - "epoch": 0.5339510090112959, - "grad_norm": 0.5873314738273621, - "learning_rate": 1.6440326606591363e-05, - "loss": 0.0685, - "step": 21035 - }, - { - "epoch": 0.5340779286711511, - "grad_norm": 0.9043633341789246, - "learning_rate": 1.643948047552566e-05, - "loss": 0.1019, - "step": 21040 - }, - { - "epoch": 0.5342048483310065, - "grad_norm": 0.5060474276542664, - "learning_rate": 1.643863434445996e-05, - "loss": 0.0868, - "step": 21045 - }, - { - "epoch": 0.5343317679908618, - "grad_norm": 0.7916196584701538, - "learning_rate": 1.6437788213394258e-05, - "loss": 0.0935, - "step": 21050 - }, - { - "epoch": 0.5344586876507171, - "grad_norm": 0.5299701690673828, - "learning_rate": 1.6436942082328553e-05, - "loss": 0.0828, - "step": 21055 - }, - { - "epoch": 0.5345856073105724, - "grad_norm": 0.506935179233551, - "learning_rate": 1.643609595126285e-05, - "loss": 0.098, - "step": 21060 - }, - { - "epoch": 0.5347125269704277, - "grad_norm": 0.6693544387817383, - "learning_rate": 1.643524982019715e-05, - "loss": 0.1092, - "step": 21065 - }, - { - "epoch": 0.534839446630283, - "grad_norm": 0.842107355594635, - "learning_rate": 1.643440368913145e-05, - "loss": 0.0976, - "step": 21070 - }, - { - "epoch": 0.5349663662901384, - "grad_norm": 0.5581983327865601, - "learning_rate": 1.6433557558065743e-05, - "loss": 0.1077, - "step": 21075 - }, - { - "epoch": 0.5350932859499936, - "grad_norm": 0.4444951117038727, - "learning_rate": 1.6432711427000042e-05, - "loss": 0.0942, - "step": 21080 - }, - { - "epoch": 0.535220205609849, - "grad_norm": 0.3694077730178833, - "learning_rate": 1.643186529593434e-05, - "loss": 0.0748, - "step": 21085 - }, - { - "epoch": 0.5353471252697043, - "grad_norm": 0.3555794358253479, - "learning_rate": 1.643101916486864e-05, - "loss": 0.1003, - "step": 21090 - }, - { - "epoch": 0.5354740449295596, - "grad_norm": 0.5381277799606323, - "learning_rate": 1.6430173033802937e-05, - "loss": 0.079, - "step": 21095 - }, - { - "epoch": 0.5356009645894149, - "grad_norm": 0.6114733815193176, - "learning_rate": 1.6429326902737235e-05, - "loss": 0.085, - "step": 21100 - }, - { - "epoch": 0.5357278842492702, - "grad_norm": 0.5573025941848755, - "learning_rate": 1.6428480771671534e-05, - "loss": 0.0972, - "step": 21105 - }, - { - "epoch": 0.5358548039091255, - "grad_norm": 0.5061011910438538, - "learning_rate": 1.6427634640605832e-05, - "loss": 0.0873, - "step": 21110 - }, - { - "epoch": 0.5359817235689809, - "grad_norm": 0.49840325117111206, - "learning_rate": 1.6426788509540127e-05, - "loss": 0.0993, - "step": 21115 - }, - { - "epoch": 0.5361086432288361, - "grad_norm": 0.5042077898979187, - "learning_rate": 1.6425942378474426e-05, - "loss": 0.0966, - "step": 21120 - }, - { - "epoch": 0.5362355628886915, - "grad_norm": 0.6636473536491394, - "learning_rate": 1.6425096247408724e-05, - "loss": 0.0722, - "step": 21125 - }, - { - "epoch": 0.5363624825485468, - "grad_norm": 0.4288860559463501, - "learning_rate": 1.6424250116343022e-05, - "loss": 0.079, - "step": 21130 - }, - { - "epoch": 0.5364894022084021, - "grad_norm": 0.9767224192619324, - "learning_rate": 1.642340398527732e-05, - "loss": 0.1028, - "step": 21135 - }, - { - "epoch": 0.5366163218682574, - "grad_norm": 0.6598927974700928, - "learning_rate": 1.642255785421162e-05, - "loss": 0.0987, - "step": 21140 - }, - { - "epoch": 0.5367432415281127, - "grad_norm": 0.6839733123779297, - "learning_rate": 1.6421711723145918e-05, - "loss": 0.0703, - "step": 21145 - }, - { - "epoch": 0.536870161187968, - "grad_norm": 0.38826650381088257, - "learning_rate": 1.6420865592080216e-05, - "loss": 0.1071, - "step": 21150 - }, - { - "epoch": 0.5369970808478234, - "grad_norm": 0.4748489260673523, - "learning_rate": 1.642001946101451e-05, - "loss": 0.0909, - "step": 21155 - }, - { - "epoch": 0.5371240005076786, - "grad_norm": 0.6104564070701599, - "learning_rate": 1.641917332994881e-05, - "loss": 0.0764, - "step": 21160 - }, - { - "epoch": 0.537250920167534, - "grad_norm": 0.35985416173934937, - "learning_rate": 1.6418327198883108e-05, - "loss": 0.0768, - "step": 21165 - }, - { - "epoch": 0.5373778398273893, - "grad_norm": 0.6751089096069336, - "learning_rate": 1.6417481067817406e-05, - "loss": 0.0939, - "step": 21170 - }, - { - "epoch": 0.5375047594872445, - "grad_norm": 0.5999599099159241, - "learning_rate": 1.6416634936751705e-05, - "loss": 0.0914, - "step": 21175 - }, - { - "epoch": 0.5376316791470999, - "grad_norm": 0.5442699790000916, - "learning_rate": 1.6415788805686003e-05, - "loss": 0.0939, - "step": 21180 - }, - { - "epoch": 0.5377585988069552, - "grad_norm": 0.465596467256546, - "learning_rate": 1.64149426746203e-05, - "loss": 0.0812, - "step": 21185 - }, - { - "epoch": 0.5378855184668105, - "grad_norm": 0.9955682754516602, - "learning_rate": 1.64140965435546e-05, - "loss": 0.0902, - "step": 21190 - }, - { - "epoch": 0.5380124381266658, - "grad_norm": 1.088228702545166, - "learning_rate": 1.6413250412488898e-05, - "loss": 0.0999, - "step": 21195 - }, - { - "epoch": 0.5381393577865211, - "grad_norm": 0.5203427672386169, - "learning_rate": 1.6412404281423193e-05, - "loss": 0.1076, - "step": 21200 - }, - { - "epoch": 0.5382662774463765, - "grad_norm": 0.6265187859535217, - "learning_rate": 1.641155815035749e-05, - "loss": 0.0779, - "step": 21205 - }, - { - "epoch": 0.5383931971062318, - "grad_norm": 0.6897786855697632, - "learning_rate": 1.641071201929179e-05, - "loss": 0.093, - "step": 21210 - }, - { - "epoch": 0.538520116766087, - "grad_norm": 0.7260372638702393, - "learning_rate": 1.640986588822609e-05, - "loss": 0.0907, - "step": 21215 - }, - { - "epoch": 0.5386470364259424, - "grad_norm": 1.5793938636779785, - "learning_rate": 1.6409019757160383e-05, - "loss": 0.096, - "step": 21220 - }, - { - "epoch": 0.5387739560857977, - "grad_norm": 0.5786048173904419, - "learning_rate": 1.6408173626094682e-05, - "loss": 0.0881, - "step": 21225 - }, - { - "epoch": 0.538900875745653, - "grad_norm": 0.7155357599258423, - "learning_rate": 1.640732749502898e-05, - "loss": 0.1086, - "step": 21230 - }, - { - "epoch": 0.5390277954055083, - "grad_norm": 0.36416858434677124, - "learning_rate": 1.640648136396328e-05, - "loss": 0.1052, - "step": 21235 - }, - { - "epoch": 0.5391547150653636, - "grad_norm": 0.7952359318733215, - "learning_rate": 1.6405635232897577e-05, - "loss": 0.1025, - "step": 21240 - }, - { - "epoch": 0.539281634725219, - "grad_norm": 1.3127238750457764, - "learning_rate": 1.6404789101831876e-05, - "loss": 0.0846, - "step": 21245 - }, - { - "epoch": 0.5394085543850743, - "grad_norm": 0.693895161151886, - "learning_rate": 1.6403942970766174e-05, - "loss": 0.0907, - "step": 21250 - }, - { - "epoch": 0.5395354740449295, - "grad_norm": 0.8234659433364868, - "learning_rate": 1.6403096839700472e-05, - "loss": 0.0895, - "step": 21255 - }, - { - "epoch": 0.5396623937047849, - "grad_norm": 0.5653076767921448, - "learning_rate": 1.6402250708634767e-05, - "loss": 0.0763, - "step": 21260 - }, - { - "epoch": 0.5397893133646402, - "grad_norm": 0.8016549348831177, - "learning_rate": 1.6401404577569066e-05, - "loss": 0.097, - "step": 21265 - }, - { - "epoch": 0.5399162330244955, - "grad_norm": 0.5602293014526367, - "learning_rate": 1.6400558446503364e-05, - "loss": 0.0995, - "step": 21270 - }, - { - "epoch": 0.5400431526843508, - "grad_norm": 0.36744970083236694, - "learning_rate": 1.6399712315437663e-05, - "loss": 0.0688, - "step": 21275 - }, - { - "epoch": 0.5401700723442061, - "grad_norm": 0.4742143452167511, - "learning_rate": 1.639886618437196e-05, - "loss": 0.0954, - "step": 21280 - }, - { - "epoch": 0.5402969920040614, - "grad_norm": 0.553385853767395, - "learning_rate": 1.639802005330626e-05, - "loss": 0.0993, - "step": 21285 - }, - { - "epoch": 0.5404239116639168, - "grad_norm": 0.3675328195095062, - "learning_rate": 1.6397173922240558e-05, - "loss": 0.1044, - "step": 21290 - }, - { - "epoch": 0.540550831323772, - "grad_norm": 0.8291075229644775, - "learning_rate": 1.6396327791174856e-05, - "loss": 0.0863, - "step": 21295 - }, - { - "epoch": 0.5406777509836274, - "grad_norm": 0.8157548904418945, - "learning_rate": 1.639548166010915e-05, - "loss": 0.1301, - "step": 21300 - }, - { - "epoch": 0.5408046706434827, - "grad_norm": 0.53499835729599, - "learning_rate": 1.639463552904345e-05, - "loss": 0.0871, - "step": 21305 - }, - { - "epoch": 0.540931590303338, - "grad_norm": 0.8246451616287231, - "learning_rate": 1.6393789397977748e-05, - "loss": 0.0932, - "step": 21310 - }, - { - "epoch": 0.5410585099631933, - "grad_norm": 0.5463002920150757, - "learning_rate": 1.6392943266912046e-05, - "loss": 0.0656, - "step": 21315 - }, - { - "epoch": 0.5411854296230486, - "grad_norm": 0.6515141129493713, - "learning_rate": 1.6392097135846345e-05, - "loss": 0.0974, - "step": 21320 - }, - { - "epoch": 0.541312349282904, - "grad_norm": 0.47651252150535583, - "learning_rate": 1.6391251004780643e-05, - "loss": 0.0898, - "step": 21325 - }, - { - "epoch": 0.5414392689427593, - "grad_norm": 0.6064968705177307, - "learning_rate": 1.639040487371494e-05, - "loss": 0.0701, - "step": 21330 - }, - { - "epoch": 0.5415661886026145, - "grad_norm": 0.5853434801101685, - "learning_rate": 1.638955874264924e-05, - "loss": 0.0918, - "step": 21335 - }, - { - "epoch": 0.5416931082624699, - "grad_norm": 0.47141239047050476, - "learning_rate": 1.6388712611583535e-05, - "loss": 0.093, - "step": 21340 - }, - { - "epoch": 0.5418200279223252, - "grad_norm": 0.7223420739173889, - "learning_rate": 1.6387866480517833e-05, - "loss": 0.0804, - "step": 21345 - }, - { - "epoch": 0.5419469475821804, - "grad_norm": 0.6058447957038879, - "learning_rate": 1.6387020349452132e-05, - "loss": 0.0948, - "step": 21350 - }, - { - "epoch": 0.5420738672420358, - "grad_norm": 0.5186620354652405, - "learning_rate": 1.638617421838643e-05, - "loss": 0.0968, - "step": 21355 - }, - { - "epoch": 0.5422007869018911, - "grad_norm": 0.9695602059364319, - "learning_rate": 1.6385328087320725e-05, - "loss": 0.0821, - "step": 21360 - }, - { - "epoch": 0.5423277065617464, - "grad_norm": 0.5473751425743103, - "learning_rate": 1.6384481956255024e-05, - "loss": 0.0989, - "step": 21365 - }, - { - "epoch": 0.5424546262216017, - "grad_norm": 0.4859209358692169, - "learning_rate": 1.6383635825189322e-05, - "loss": 0.0867, - "step": 21370 - }, - { - "epoch": 0.542581545881457, - "grad_norm": 1.044114351272583, - "learning_rate": 1.638278969412362e-05, - "loss": 0.0993, - "step": 21375 - }, - { - "epoch": 0.5427084655413124, - "grad_norm": 0.5691289305686951, - "learning_rate": 1.638194356305792e-05, - "loss": 0.0776, - "step": 21380 - }, - { - "epoch": 0.5428353852011677, - "grad_norm": 0.4279560446739197, - "learning_rate": 1.6381097431992217e-05, - "loss": 0.0802, - "step": 21385 - }, - { - "epoch": 0.5429623048610229, - "grad_norm": 0.5630630254745483, - "learning_rate": 1.6380251300926516e-05, - "loss": 0.0982, - "step": 21390 - }, - { - "epoch": 0.5430892245208783, - "grad_norm": 0.9270681738853455, - "learning_rate": 1.6379405169860814e-05, - "loss": 0.0876, - "step": 21395 - }, - { - "epoch": 0.5432161441807336, - "grad_norm": 0.6938778758049011, - "learning_rate": 1.637855903879511e-05, - "loss": 0.1112, - "step": 21400 - }, - { - "epoch": 0.543343063840589, - "grad_norm": 0.507729709148407, - "learning_rate": 1.6377712907729407e-05, - "loss": 0.0959, - "step": 21405 - }, - { - "epoch": 0.5434699835004442, - "grad_norm": 0.62525475025177, - "learning_rate": 1.6376866776663706e-05, - "loss": 0.1001, - "step": 21410 - }, - { - "epoch": 0.5435969031602995, - "grad_norm": 0.6262304186820984, - "learning_rate": 1.6376020645598004e-05, - "loss": 0.0751, - "step": 21415 - }, - { - "epoch": 0.5437238228201549, - "grad_norm": 1.0863368511199951, - "learning_rate": 1.6375174514532303e-05, - "loss": 0.0986, - "step": 21420 - }, - { - "epoch": 0.5438507424800102, - "grad_norm": 0.5043184161186218, - "learning_rate": 1.63743283834666e-05, - "loss": 0.1007, - "step": 21425 - }, - { - "epoch": 0.5439776621398654, - "grad_norm": 0.6376637816429138, - "learning_rate": 1.63734822524009e-05, - "loss": 0.0853, - "step": 21430 - }, - { - "epoch": 0.5441045817997208, - "grad_norm": 0.5656484365463257, - "learning_rate": 1.6372636121335198e-05, - "loss": 0.1142, - "step": 21435 - }, - { - "epoch": 0.5442315014595761, - "grad_norm": 1.2131199836730957, - "learning_rate": 1.6371789990269493e-05, - "loss": 0.098, - "step": 21440 - }, - { - "epoch": 0.5443584211194314, - "grad_norm": 0.5932275056838989, - "learning_rate": 1.637094385920379e-05, - "loss": 0.0858, - "step": 21445 - }, - { - "epoch": 0.5444853407792867, - "grad_norm": 0.7263017296791077, - "learning_rate": 1.637009772813809e-05, - "loss": 0.0956, - "step": 21450 - }, - { - "epoch": 0.544612260439142, - "grad_norm": 0.6305457353591919, - "learning_rate": 1.6369251597072388e-05, - "loss": 0.0981, - "step": 21455 - }, - { - "epoch": 0.5447391800989974, - "grad_norm": 1.2602494955062866, - "learning_rate": 1.6368405466006686e-05, - "loss": 0.0922, - "step": 21460 - }, - { - "epoch": 0.5448660997588527, - "grad_norm": 1.1606122255325317, - "learning_rate": 1.6367559334940985e-05, - "loss": 0.1021, - "step": 21465 - }, - { - "epoch": 0.5449930194187079, - "grad_norm": 0.6234676241874695, - "learning_rate": 1.6366713203875283e-05, - "loss": 0.084, - "step": 21470 - }, - { - "epoch": 0.5451199390785633, - "grad_norm": 0.6040022969245911, - "learning_rate": 1.636586707280958e-05, - "loss": 0.0936, - "step": 21475 - }, - { - "epoch": 0.5452468587384186, - "grad_norm": 0.5297578573226929, - "learning_rate": 1.6365020941743877e-05, - "loss": 0.0819, - "step": 21480 - }, - { - "epoch": 0.545373778398274, - "grad_norm": 2.2976346015930176, - "learning_rate": 1.6364174810678175e-05, - "loss": 0.105, - "step": 21485 - }, - { - "epoch": 0.5455006980581292, - "grad_norm": 0.7419076561927795, - "learning_rate": 1.6363328679612473e-05, - "loss": 0.0905, - "step": 21490 - }, - { - "epoch": 0.5456276177179845, - "grad_norm": 0.6626555323600769, - "learning_rate": 1.6362482548546772e-05, - "loss": 0.1109, - "step": 21495 - }, - { - "epoch": 0.5457545373778399, - "grad_norm": 0.4506874978542328, - "learning_rate": 1.6361636417481067e-05, - "loss": 0.0722, - "step": 21500 - }, - { - "epoch": 0.5458814570376951, - "grad_norm": 0.9225267767906189, - "learning_rate": 1.6360790286415365e-05, - "loss": 0.0976, - "step": 21505 - }, - { - "epoch": 0.5460083766975504, - "grad_norm": 1.848239779472351, - "learning_rate": 1.6359944155349664e-05, - "loss": 0.0951, - "step": 21510 - }, - { - "epoch": 0.5461352963574058, - "grad_norm": 0.7583701610565186, - "learning_rate": 1.6359098024283962e-05, - "loss": 0.1069, - "step": 21515 - }, - { - "epoch": 0.5462622160172611, - "grad_norm": 0.819911003112793, - "learning_rate": 1.635825189321826e-05, - "loss": 0.095, - "step": 21520 - }, - { - "epoch": 0.5463891356771163, - "grad_norm": 0.5948473215103149, - "learning_rate": 1.635740576215256e-05, - "loss": 0.0912, - "step": 21525 - }, - { - "epoch": 0.5465160553369717, - "grad_norm": 0.6497108936309814, - "learning_rate": 1.6356559631086857e-05, - "loss": 0.0846, - "step": 21530 - }, - { - "epoch": 0.546642974996827, - "grad_norm": 0.49831756949424744, - "learning_rate": 1.6355713500021156e-05, - "loss": 0.0938, - "step": 21535 - }, - { - "epoch": 0.5467698946566824, - "grad_norm": 0.4736022353172302, - "learning_rate": 1.635486736895545e-05, - "loss": 0.074, - "step": 21540 - }, - { - "epoch": 0.5468968143165376, - "grad_norm": 1.1643685102462769, - "learning_rate": 1.635402123788975e-05, - "loss": 0.0741, - "step": 21545 - }, - { - "epoch": 0.5470237339763929, - "grad_norm": 0.7262755632400513, - "learning_rate": 1.6353175106824048e-05, - "loss": 0.085, - "step": 21550 - }, - { - "epoch": 0.5471506536362483, - "grad_norm": 0.7345977425575256, - "learning_rate": 1.6352328975758346e-05, - "loss": 0.0858, - "step": 21555 - }, - { - "epoch": 0.5472775732961036, - "grad_norm": 0.8088849186897278, - "learning_rate": 1.6351482844692644e-05, - "loss": 0.1077, - "step": 21560 - }, - { - "epoch": 0.5474044929559588, - "grad_norm": 0.497389018535614, - "learning_rate": 1.6350636713626943e-05, - "loss": 0.1075, - "step": 21565 - }, - { - "epoch": 0.5475314126158142, - "grad_norm": 0.8586159348487854, - "learning_rate": 1.634979058256124e-05, - "loss": 0.1003, - "step": 21570 - }, - { - "epoch": 0.5476583322756695, - "grad_norm": 0.4734354615211487, - "learning_rate": 1.634894445149554e-05, - "loss": 0.0658, - "step": 21575 - }, - { - "epoch": 0.5477852519355249, - "grad_norm": 0.48463165760040283, - "learning_rate": 1.6348098320429835e-05, - "loss": 0.0877, - "step": 21580 - }, - { - "epoch": 0.5479121715953801, - "grad_norm": 0.6501711010932922, - "learning_rate": 1.6347252189364133e-05, - "loss": 0.079, - "step": 21585 - }, - { - "epoch": 0.5480390912552354, - "grad_norm": 0.5521665215492249, - "learning_rate": 1.634640605829843e-05, - "loss": 0.077, - "step": 21590 - }, - { - "epoch": 0.5481660109150908, - "grad_norm": 0.7483158707618713, - "learning_rate": 1.634555992723273e-05, - "loss": 0.0838, - "step": 21595 - }, - { - "epoch": 0.5482929305749461, - "grad_norm": 0.27863630652427673, - "learning_rate": 1.6344713796167028e-05, - "loss": 0.0732, - "step": 21600 - }, - { - "epoch": 0.5484198502348013, - "grad_norm": 0.40905362367630005, - "learning_rate": 1.6343867665101327e-05, - "loss": 0.0956, - "step": 21605 - }, - { - "epoch": 0.5485467698946567, - "grad_norm": 0.47117647528648376, - "learning_rate": 1.6343021534035625e-05, - "loss": 0.0728, - "step": 21610 - }, - { - "epoch": 0.548673689554512, - "grad_norm": 0.7091764807701111, - "learning_rate": 1.6342175402969923e-05, - "loss": 0.0899, - "step": 21615 - }, - { - "epoch": 0.5488006092143674, - "grad_norm": 0.5254268646240234, - "learning_rate": 1.634132927190422e-05, - "loss": 0.0848, - "step": 21620 - }, - { - "epoch": 0.5489275288742226, - "grad_norm": 0.4944975972175598, - "learning_rate": 1.6340483140838517e-05, - "loss": 0.0931, - "step": 21625 - }, - { - "epoch": 0.5490544485340779, - "grad_norm": 0.5686967372894287, - "learning_rate": 1.6339637009772815e-05, - "loss": 0.1115, - "step": 21630 - }, - { - "epoch": 0.5491813681939333, - "grad_norm": 0.5930113792419434, - "learning_rate": 1.6338790878707114e-05, - "loss": 0.082, - "step": 21635 - }, - { - "epoch": 0.5493082878537886, - "grad_norm": 0.500150740146637, - "learning_rate": 1.633794474764141e-05, - "loss": 0.1091, - "step": 21640 - }, - { - "epoch": 0.5494352075136438, - "grad_norm": 0.580836296081543, - "learning_rate": 1.6337098616575707e-05, - "loss": 0.1092, - "step": 21645 - }, - { - "epoch": 0.5495621271734992, - "grad_norm": 0.32387471199035645, - "learning_rate": 1.6336252485510005e-05, - "loss": 0.0638, - "step": 21650 - }, - { - "epoch": 0.5496890468333545, - "grad_norm": 0.4861380159854889, - "learning_rate": 1.6335406354444304e-05, - "loss": 0.0829, - "step": 21655 - }, - { - "epoch": 0.5498159664932099, - "grad_norm": 0.45339497923851013, - "learning_rate": 1.6334560223378602e-05, - "loss": 0.0892, - "step": 21660 - }, - { - "epoch": 0.5499428861530651, - "grad_norm": 0.45344114303588867, - "learning_rate": 1.63337140923129e-05, - "loss": 0.1041, - "step": 21665 - }, - { - "epoch": 0.5500698058129204, - "grad_norm": 0.5236192941665649, - "learning_rate": 1.63328679612472e-05, - "loss": 0.0682, - "step": 21670 - }, - { - "epoch": 0.5501967254727758, - "grad_norm": 1.7357641458511353, - "learning_rate": 1.6332021830181497e-05, - "loss": 0.0965, - "step": 21675 - }, - { - "epoch": 0.550323645132631, - "grad_norm": 0.41507989168167114, - "learning_rate": 1.6331175699115796e-05, - "loss": 0.0823, - "step": 21680 - }, - { - "epoch": 0.5504505647924863, - "grad_norm": 0.48781201243400574, - "learning_rate": 1.633032956805009e-05, - "loss": 0.1058, - "step": 21685 - }, - { - "epoch": 0.5505774844523417, - "grad_norm": 0.5264120697975159, - "learning_rate": 1.632948343698439e-05, - "loss": 0.1186, - "step": 21690 - }, - { - "epoch": 0.550704404112197, - "grad_norm": 0.6900447607040405, - "learning_rate": 1.6328637305918688e-05, - "loss": 0.0984, - "step": 21695 - }, - { - "epoch": 0.5508313237720522, - "grad_norm": 0.4949745535850525, - "learning_rate": 1.6327791174852986e-05, - "loss": 0.0805, - "step": 21700 - }, - { - "epoch": 0.5509582434319076, - "grad_norm": 0.34023773670196533, - "learning_rate": 1.6326945043787284e-05, - "loss": 0.106, - "step": 21705 - }, - { - "epoch": 0.5510851630917629, - "grad_norm": 0.7246706485748291, - "learning_rate": 1.6326098912721583e-05, - "loss": 0.0788, - "step": 21710 - }, - { - "epoch": 0.5512120827516183, - "grad_norm": 0.5671331882476807, - "learning_rate": 1.632525278165588e-05, - "loss": 0.0875, - "step": 21715 - }, - { - "epoch": 0.5513390024114735, - "grad_norm": 0.5624793171882629, - "learning_rate": 1.632440665059018e-05, - "loss": 0.0874, - "step": 21720 - }, - { - "epoch": 0.5514659220713288, - "grad_norm": 0.6153157949447632, - "learning_rate": 1.6323560519524475e-05, - "loss": 0.0994, - "step": 21725 - }, - { - "epoch": 0.5515928417311842, - "grad_norm": 0.49868881702423096, - "learning_rate": 1.6322714388458773e-05, - "loss": 0.0843, - "step": 21730 - }, - { - "epoch": 0.5517197613910395, - "grad_norm": 0.6545591354370117, - "learning_rate": 1.632186825739307e-05, - "loss": 0.0819, - "step": 21735 - }, - { - "epoch": 0.5518466810508947, - "grad_norm": 0.7131534218788147, - "learning_rate": 1.632102212632737e-05, - "loss": 0.0879, - "step": 21740 - }, - { - "epoch": 0.5519736007107501, - "grad_norm": 0.6406547427177429, - "learning_rate": 1.6320175995261668e-05, - "loss": 0.0962, - "step": 21745 - }, - { - "epoch": 0.5521005203706054, - "grad_norm": 0.4527834951877594, - "learning_rate": 1.6319329864195967e-05, - "loss": 0.0934, - "step": 21750 - }, - { - "epoch": 0.5522274400304608, - "grad_norm": 1.046244740486145, - "learning_rate": 1.6318483733130265e-05, - "loss": 0.0808, - "step": 21755 - }, - { - "epoch": 0.552354359690316, - "grad_norm": 0.943853497505188, - "learning_rate": 1.6317637602064563e-05, - "loss": 0.1112, - "step": 21760 - }, - { - "epoch": 0.5524812793501713, - "grad_norm": 0.5999763607978821, - "learning_rate": 1.631679147099886e-05, - "loss": 0.1057, - "step": 21765 - }, - { - "epoch": 0.5526081990100267, - "grad_norm": 0.521462082862854, - "learning_rate": 1.6315945339933157e-05, - "loss": 0.0757, - "step": 21770 - }, - { - "epoch": 0.552735118669882, - "grad_norm": 0.3615696132183075, - "learning_rate": 1.6315099208867455e-05, - "loss": 0.0838, - "step": 21775 - }, - { - "epoch": 0.5528620383297372, - "grad_norm": 0.4425829350948334, - "learning_rate": 1.6314253077801754e-05, - "loss": 0.0846, - "step": 21780 - }, - { - "epoch": 0.5529889579895926, - "grad_norm": 0.41002562642097473, - "learning_rate": 1.631340694673605e-05, - "loss": 0.0942, - "step": 21785 - }, - { - "epoch": 0.5531158776494479, - "grad_norm": 0.7227963209152222, - "learning_rate": 1.6312560815670347e-05, - "loss": 0.0878, - "step": 21790 - }, - { - "epoch": 0.5532427973093033, - "grad_norm": 0.43780118227005005, - "learning_rate": 1.6311714684604646e-05, - "loss": 0.0799, - "step": 21795 - }, - { - "epoch": 0.5533697169691585, - "grad_norm": 1.0729752779006958, - "learning_rate": 1.6310868553538944e-05, - "loss": 0.0945, - "step": 21800 - }, - { - "epoch": 0.5534966366290138, - "grad_norm": 0.5281301140785217, - "learning_rate": 1.6310022422473242e-05, - "loss": 0.1036, - "step": 21805 - }, - { - "epoch": 0.5536235562888692, - "grad_norm": 0.6745765805244446, - "learning_rate": 1.630917629140754e-05, - "loss": 0.1109, - "step": 21810 - }, - { - "epoch": 0.5537504759487245, - "grad_norm": 1.105872631072998, - "learning_rate": 1.630833016034184e-05, - "loss": 0.086, - "step": 21815 - }, - { - "epoch": 0.5538773956085797, - "grad_norm": 0.5176441073417664, - "learning_rate": 1.6307484029276138e-05, - "loss": 0.0803, - "step": 21820 - }, - { - "epoch": 0.5540043152684351, - "grad_norm": 0.4123047888278961, - "learning_rate": 1.6306637898210433e-05, - "loss": 0.0827, - "step": 21825 - }, - { - "epoch": 0.5541312349282904, - "grad_norm": 1.3370214700698853, - "learning_rate": 1.630579176714473e-05, - "loss": 0.1011, - "step": 21830 - }, - { - "epoch": 0.5542581545881458, - "grad_norm": 0.4900142252445221, - "learning_rate": 1.630494563607903e-05, - "loss": 0.0817, - "step": 21835 - }, - { - "epoch": 0.554385074248001, - "grad_norm": 0.7486854791641235, - "learning_rate": 1.6304099505013328e-05, - "loss": 0.0854, - "step": 21840 - }, - { - "epoch": 0.5545119939078563, - "grad_norm": 0.4738348424434662, - "learning_rate": 1.6303253373947626e-05, - "loss": 0.0828, - "step": 21845 - }, - { - "epoch": 0.5546389135677117, - "grad_norm": 0.5234520435333252, - "learning_rate": 1.6302407242881925e-05, - "loss": 0.0913, - "step": 21850 - }, - { - "epoch": 0.5547658332275669, - "grad_norm": 0.5177187323570251, - "learning_rate": 1.6301561111816223e-05, - "loss": 0.1019, - "step": 21855 - }, - { - "epoch": 0.5548927528874222, - "grad_norm": 0.9075934886932373, - "learning_rate": 1.630071498075052e-05, - "loss": 0.0913, - "step": 21860 - }, - { - "epoch": 0.5550196725472776, - "grad_norm": 0.39470571279525757, - "learning_rate": 1.6299868849684816e-05, - "loss": 0.0965, - "step": 21865 - }, - { - "epoch": 0.5551465922071329, - "grad_norm": 0.44572582840919495, - "learning_rate": 1.6299022718619115e-05, - "loss": 0.1085, - "step": 21870 - }, - { - "epoch": 0.5552735118669881, - "grad_norm": 0.555987536907196, - "learning_rate": 1.6298176587553413e-05, - "loss": 0.1159, - "step": 21875 - }, - { - "epoch": 0.5554004315268435, - "grad_norm": 0.6443449854850769, - "learning_rate": 1.629733045648771e-05, - "loss": 0.0634, - "step": 21880 - }, - { - "epoch": 0.5555273511866988, - "grad_norm": 0.41266801953315735, - "learning_rate": 1.629648432542201e-05, - "loss": 0.0659, - "step": 21885 - }, - { - "epoch": 0.5556542708465542, - "grad_norm": 0.6779760718345642, - "learning_rate": 1.629563819435631e-05, - "loss": 0.1012, - "step": 21890 - }, - { - "epoch": 0.5557811905064094, - "grad_norm": 0.37882205843925476, - "learning_rate": 1.6294792063290607e-05, - "loss": 0.0861, - "step": 21895 - }, - { - "epoch": 0.5559081101662647, - "grad_norm": 0.51219242811203, - "learning_rate": 1.6293945932224905e-05, - "loss": 0.0995, - "step": 21900 - }, - { - "epoch": 0.5560350298261201, - "grad_norm": 0.39538323879241943, - "learning_rate": 1.62930998011592e-05, - "loss": 0.0877, - "step": 21905 - }, - { - "epoch": 0.5561619494859754, - "grad_norm": 0.4612228572368622, - "learning_rate": 1.62922536700935e-05, - "loss": 0.0843, - "step": 21910 - }, - { - "epoch": 0.5562888691458306, - "grad_norm": 0.5247741341590881, - "learning_rate": 1.6291407539027797e-05, - "loss": 0.0825, - "step": 21915 - }, - { - "epoch": 0.556415788805686, - "grad_norm": 0.4389171302318573, - "learning_rate": 1.6290561407962095e-05, - "loss": 0.0675, - "step": 21920 - }, - { - "epoch": 0.5565427084655413, - "grad_norm": 0.9917736053466797, - "learning_rate": 1.628971527689639e-05, - "loss": 0.1041, - "step": 21925 - }, - { - "epoch": 0.5566696281253967, - "grad_norm": 0.4674464464187622, - "learning_rate": 1.628886914583069e-05, - "loss": 0.0808, - "step": 21930 - }, - { - "epoch": 0.5567965477852519, - "grad_norm": 0.552445650100708, - "learning_rate": 1.6288023014764987e-05, - "loss": 0.0697, - "step": 21935 - }, - { - "epoch": 0.5569234674451072, - "grad_norm": 0.5206578969955444, - "learning_rate": 1.6287176883699286e-05, - "loss": 0.0778, - "step": 21940 - }, - { - "epoch": 0.5570503871049626, - "grad_norm": 0.3905465602874756, - "learning_rate": 1.6286330752633584e-05, - "loss": 0.0727, - "step": 21945 - }, - { - "epoch": 0.5571773067648179, - "grad_norm": 0.6311579346656799, - "learning_rate": 1.6285484621567882e-05, - "loss": 0.0858, - "step": 21950 - }, - { - "epoch": 0.5573042264246731, - "grad_norm": 0.5061963200569153, - "learning_rate": 1.628463849050218e-05, - "loss": 0.1197, - "step": 21955 - }, - { - "epoch": 0.5574311460845285, - "grad_norm": 0.7620043754577637, - "learning_rate": 1.628379235943648e-05, - "loss": 0.1033, - "step": 21960 - }, - { - "epoch": 0.5575580657443838, - "grad_norm": 0.4181519150733948, - "learning_rate": 1.6282946228370774e-05, - "loss": 0.0945, - "step": 21965 - }, - { - "epoch": 0.5576849854042392, - "grad_norm": 0.591120183467865, - "learning_rate": 1.6282100097305073e-05, - "loss": 0.0948, - "step": 21970 - }, - { - "epoch": 0.5578119050640944, - "grad_norm": 0.4533957540988922, - "learning_rate": 1.628125396623937e-05, - "loss": 0.0737, - "step": 21975 - }, - { - "epoch": 0.5579388247239497, - "grad_norm": 0.48264646530151367, - "learning_rate": 1.628040783517367e-05, - "loss": 0.0911, - "step": 21980 - }, - { - "epoch": 0.5580657443838051, - "grad_norm": 0.41293027997016907, - "learning_rate": 1.6279561704107968e-05, - "loss": 0.0701, - "step": 21985 - }, - { - "epoch": 0.5581926640436604, - "grad_norm": 0.7246072888374329, - "learning_rate": 1.6278715573042266e-05, - "loss": 0.0932, - "step": 21990 - }, - { - "epoch": 0.5583195837035156, - "grad_norm": 0.7140325903892517, - "learning_rate": 1.6277869441976565e-05, - "loss": 0.0788, - "step": 21995 - }, - { - "epoch": 0.558446503363371, - "grad_norm": 0.7684769034385681, - "learning_rate": 1.6277023310910863e-05, - "loss": 0.0848, - "step": 22000 - }, - { - "epoch": 0.5585734230232263, - "grad_norm": 0.7915398478507996, - "learning_rate": 1.6276177179845158e-05, - "loss": 0.0913, - "step": 22005 - }, - { - "epoch": 0.5587003426830817, - "grad_norm": 0.670234739780426, - "learning_rate": 1.6275331048779456e-05, - "loss": 0.0889, - "step": 22010 - }, - { - "epoch": 0.5588272623429369, - "grad_norm": 0.7886123657226562, - "learning_rate": 1.6274484917713755e-05, - "loss": 0.0953, - "step": 22015 - }, - { - "epoch": 0.5589541820027922, - "grad_norm": 1.184658408164978, - "learning_rate": 1.6273638786648053e-05, - "loss": 0.1071, - "step": 22020 - }, - { - "epoch": 0.5590811016626476, - "grad_norm": 0.5911443829536438, - "learning_rate": 1.627279265558235e-05, - "loss": 0.0882, - "step": 22025 - }, - { - "epoch": 0.5592080213225028, - "grad_norm": 0.7807164192199707, - "learning_rate": 1.627194652451665e-05, - "loss": 0.0914, - "step": 22030 - }, - { - "epoch": 0.5593349409823581, - "grad_norm": 0.43034932017326355, - "learning_rate": 1.627110039345095e-05, - "loss": 0.0894, - "step": 22035 - }, - { - "epoch": 0.5594618606422135, - "grad_norm": 0.3880872428417206, - "learning_rate": 1.6270254262385247e-05, - "loss": 0.0826, - "step": 22040 - }, - { - "epoch": 0.5595887803020688, - "grad_norm": 0.6503465175628662, - "learning_rate": 1.6269408131319542e-05, - "loss": 0.0761, - "step": 22045 - }, - { - "epoch": 0.559715699961924, - "grad_norm": 0.647838830947876, - "learning_rate": 1.626856200025384e-05, - "loss": 0.0865, - "step": 22050 - }, - { - "epoch": 0.5598426196217794, - "grad_norm": 0.7757494449615479, - "learning_rate": 1.626771586918814e-05, - "loss": 0.0884, - "step": 22055 - }, - { - "epoch": 0.5599695392816347, - "grad_norm": 0.5099535584449768, - "learning_rate": 1.6266869738122437e-05, - "loss": 0.0785, - "step": 22060 - }, - { - "epoch": 0.5600964589414901, - "grad_norm": 0.6164714097976685, - "learning_rate": 1.6266023607056732e-05, - "loss": 0.1265, - "step": 22065 - }, - { - "epoch": 0.5602233786013453, - "grad_norm": 0.3624456822872162, - "learning_rate": 1.626517747599103e-05, - "loss": 0.0828, - "step": 22070 - }, - { - "epoch": 0.5603502982612006, - "grad_norm": 0.38736042380332947, - "learning_rate": 1.626433134492533e-05, - "loss": 0.0789, - "step": 22075 - }, - { - "epoch": 0.560477217921056, - "grad_norm": 0.35272470116615295, - "learning_rate": 1.6263485213859627e-05, - "loss": 0.0753, - "step": 22080 - }, - { - "epoch": 0.5606041375809113, - "grad_norm": 0.5462349057197571, - "learning_rate": 1.6262639082793926e-05, - "loss": 0.1066, - "step": 22085 - }, - { - "epoch": 0.5607310572407666, - "grad_norm": 0.6048976182937622, - "learning_rate": 1.6261792951728224e-05, - "loss": 0.0997, - "step": 22090 - }, - { - "epoch": 0.5608579769006219, - "grad_norm": 0.3924385607242584, - "learning_rate": 1.6260946820662523e-05, - "loss": 0.1025, - "step": 22095 - }, - { - "epoch": 0.5609848965604772, - "grad_norm": 0.6853782534599304, - "learning_rate": 1.626010068959682e-05, - "loss": 0.0829, - "step": 22100 - }, - { - "epoch": 0.5611118162203326, - "grad_norm": 0.4416559636592865, - "learning_rate": 1.6259254558531116e-05, - "loss": 0.0964, - "step": 22105 - }, - { - "epoch": 0.5612387358801878, - "grad_norm": 0.45733100175857544, - "learning_rate": 1.6258408427465414e-05, - "loss": 0.0925, - "step": 22110 - }, - { - "epoch": 0.5613656555400431, - "grad_norm": 1.2183588743209839, - "learning_rate": 1.6257562296399713e-05, - "loss": 0.085, - "step": 22115 - }, - { - "epoch": 0.5614925751998985, - "grad_norm": 0.5369249582290649, - "learning_rate": 1.625671616533401e-05, - "loss": 0.0889, - "step": 22120 - }, - { - "epoch": 0.5616194948597538, - "grad_norm": 0.4361705780029297, - "learning_rate": 1.625587003426831e-05, - "loss": 0.0939, - "step": 22125 - }, - { - "epoch": 0.561746414519609, - "grad_norm": 0.611616849899292, - "learning_rate": 1.6255023903202608e-05, - "loss": 0.1047, - "step": 22130 - }, - { - "epoch": 0.5618733341794644, - "grad_norm": 0.6163204312324524, - "learning_rate": 1.6254177772136906e-05, - "loss": 0.099, - "step": 22135 - }, - { - "epoch": 0.5620002538393197, - "grad_norm": 0.40420013666152954, - "learning_rate": 1.6253331641071205e-05, - "loss": 0.104, - "step": 22140 - }, - { - "epoch": 0.5621271734991751, - "grad_norm": 0.6457376480102539, - "learning_rate": 1.62524855100055e-05, - "loss": 0.106, - "step": 22145 - }, - { - "epoch": 0.5622540931590303, - "grad_norm": 0.6331921815872192, - "learning_rate": 1.6251639378939798e-05, - "loss": 0.0801, - "step": 22150 - }, - { - "epoch": 0.5623810128188856, - "grad_norm": 0.6923295259475708, - "learning_rate": 1.6250793247874097e-05, - "loss": 0.0901, - "step": 22155 - }, - { - "epoch": 0.562507932478741, - "grad_norm": 0.7161895036697388, - "learning_rate": 1.6249947116808395e-05, - "loss": 0.0892, - "step": 22160 - }, - { - "epoch": 0.5626348521385963, - "grad_norm": 0.6597152948379517, - "learning_rate": 1.6249100985742693e-05, - "loss": 0.0728, - "step": 22165 - }, - { - "epoch": 0.5627617717984515, - "grad_norm": 0.4585283100605011, - "learning_rate": 1.6248254854676992e-05, - "loss": 0.074, - "step": 22170 - }, - { - "epoch": 0.5628886914583069, - "grad_norm": 1.4169914722442627, - "learning_rate": 1.624740872361129e-05, - "loss": 0.106, - "step": 22175 - }, - { - "epoch": 0.5630156111181622, - "grad_norm": 0.6650621891021729, - "learning_rate": 1.624656259254559e-05, - "loss": 0.0954, - "step": 22180 - }, - { - "epoch": 0.5631425307780176, - "grad_norm": 0.5206394791603088, - "learning_rate": 1.6245716461479884e-05, - "loss": 0.0874, - "step": 22185 - }, - { - "epoch": 0.5632694504378728, - "grad_norm": 0.49399057030677795, - "learning_rate": 1.6244870330414182e-05, - "loss": 0.076, - "step": 22190 - }, - { - "epoch": 0.5633963700977281, - "grad_norm": 0.6015126705169678, - "learning_rate": 1.624402419934848e-05, - "loss": 0.1039, - "step": 22195 - }, - { - "epoch": 0.5635232897575835, - "grad_norm": 0.4737623333930969, - "learning_rate": 1.624317806828278e-05, - "loss": 0.0991, - "step": 22200 - }, - { - "epoch": 0.5636502094174387, - "grad_norm": 0.7916406393051147, - "learning_rate": 1.6242331937217077e-05, - "loss": 0.076, - "step": 22205 - }, - { - "epoch": 0.563777129077294, - "grad_norm": 0.5893296599388123, - "learning_rate": 1.6241485806151372e-05, - "loss": 0.0905, - "step": 22210 - }, - { - "epoch": 0.5639040487371494, - "grad_norm": 0.619590699672699, - "learning_rate": 1.624063967508567e-05, - "loss": 0.1001, - "step": 22215 - }, - { - "epoch": 0.5640309683970047, - "grad_norm": 0.5284013748168945, - "learning_rate": 1.623979354401997e-05, - "loss": 0.0683, - "step": 22220 - }, - { - "epoch": 0.56415788805686, - "grad_norm": 0.4666178226470947, - "learning_rate": 1.6238947412954267e-05, - "loss": 0.0976, - "step": 22225 - }, - { - "epoch": 0.5642848077167153, - "grad_norm": 0.43846145272254944, - "learning_rate": 1.6238101281888566e-05, - "loss": 0.0937, - "step": 22230 - }, - { - "epoch": 0.5644117273765706, - "grad_norm": 0.603554368019104, - "learning_rate": 1.6237255150822864e-05, - "loss": 0.1124, - "step": 22235 - }, - { - "epoch": 0.564538647036426, - "grad_norm": 0.5572454929351807, - "learning_rate": 1.6236409019757163e-05, - "loss": 0.0943, - "step": 22240 - }, - { - "epoch": 0.5646655666962812, - "grad_norm": 0.4660736620426178, - "learning_rate": 1.623556288869146e-05, - "loss": 0.1016, - "step": 22245 - }, - { - "epoch": 0.5647924863561365, - "grad_norm": 0.576115608215332, - "learning_rate": 1.6234716757625756e-05, - "loss": 0.0917, - "step": 22250 - }, - { - "epoch": 0.5649194060159919, - "grad_norm": 0.46939367055892944, - "learning_rate": 1.6233870626560054e-05, - "loss": 0.0677, - "step": 22255 - }, - { - "epoch": 0.5650463256758472, - "grad_norm": 0.48845845460891724, - "learning_rate": 1.6233024495494353e-05, - "loss": 0.0836, - "step": 22260 - }, - { - "epoch": 0.5651732453357025, - "grad_norm": 1.2683420181274414, - "learning_rate": 1.623217836442865e-05, - "loss": 0.0856, - "step": 22265 - }, - { - "epoch": 0.5653001649955578, - "grad_norm": 0.737240195274353, - "learning_rate": 1.623133223336295e-05, - "loss": 0.0902, - "step": 22270 - }, - { - "epoch": 0.5654270846554131, - "grad_norm": 0.5922069549560547, - "learning_rate": 1.6230486102297248e-05, - "loss": 0.1002, - "step": 22275 - }, - { - "epoch": 0.5655540043152685, - "grad_norm": 0.6426932215690613, - "learning_rate": 1.6229639971231546e-05, - "loss": 0.0769, - "step": 22280 - }, - { - "epoch": 0.5656809239751237, - "grad_norm": 0.549544632434845, - "learning_rate": 1.6228793840165845e-05, - "loss": 0.1058, - "step": 22285 - }, - { - "epoch": 0.565807843634979, - "grad_norm": 0.7556437253952026, - "learning_rate": 1.622794770910014e-05, - "loss": 0.0851, - "step": 22290 - }, - { - "epoch": 0.5659347632948344, - "grad_norm": 0.4005235731601715, - "learning_rate": 1.6227101578034438e-05, - "loss": 0.0769, - "step": 22295 - }, - { - "epoch": 0.5660616829546897, - "grad_norm": 0.7151000499725342, - "learning_rate": 1.6226255446968737e-05, - "loss": 0.098, - "step": 22300 - }, - { - "epoch": 0.566188602614545, - "grad_norm": 0.557793915271759, - "learning_rate": 1.6225409315903035e-05, - "loss": 0.0862, - "step": 22305 - }, - { - "epoch": 0.5663155222744003, - "grad_norm": 0.5847555994987488, - "learning_rate": 1.6224563184837333e-05, - "loss": 0.0814, - "step": 22310 - }, - { - "epoch": 0.5664424419342556, - "grad_norm": 0.4653325080871582, - "learning_rate": 1.6223717053771632e-05, - "loss": 0.0835, - "step": 22315 - }, - { - "epoch": 0.566569361594111, - "grad_norm": 0.6593708395957947, - "learning_rate": 1.622287092270593e-05, - "loss": 0.0807, - "step": 22320 - }, - { - "epoch": 0.5666962812539662, - "grad_norm": 0.6339888572692871, - "learning_rate": 1.622202479164023e-05, - "loss": 0.0975, - "step": 22325 - }, - { - "epoch": 0.5668232009138215, - "grad_norm": 0.7069841623306274, - "learning_rate": 1.6221178660574524e-05, - "loss": 0.0944, - "step": 22330 - }, - { - "epoch": 0.5669501205736769, - "grad_norm": 0.9138179421424866, - "learning_rate": 1.6220332529508822e-05, - "loss": 0.0892, - "step": 22335 - }, - { - "epoch": 0.5670770402335322, - "grad_norm": 0.68320631980896, - "learning_rate": 1.621948639844312e-05, - "loss": 0.0979, - "step": 22340 - }, - { - "epoch": 0.5672039598933875, - "grad_norm": 0.47460460662841797, - "learning_rate": 1.621864026737742e-05, - "loss": 0.1074, - "step": 22345 - }, - { - "epoch": 0.5673308795532428, - "grad_norm": 0.5551549792289734, - "learning_rate": 1.6217794136311714e-05, - "loss": 0.09, - "step": 22350 - }, - { - "epoch": 0.5674577992130981, - "grad_norm": 0.7425202131271362, - "learning_rate": 1.6216948005246012e-05, - "loss": 0.0821, - "step": 22355 - }, - { - "epoch": 0.5675847188729535, - "grad_norm": 0.5304387211799622, - "learning_rate": 1.621610187418031e-05, - "loss": 0.0729, - "step": 22360 - }, - { - "epoch": 0.5677116385328087, - "grad_norm": 0.6818942427635193, - "learning_rate": 1.621525574311461e-05, - "loss": 0.0921, - "step": 22365 - }, - { - "epoch": 0.567838558192664, - "grad_norm": 0.3285800516605377, - "learning_rate": 1.6214409612048908e-05, - "loss": 0.1062, - "step": 22370 - }, - { - "epoch": 0.5679654778525194, - "grad_norm": 0.735349178314209, - "learning_rate": 1.6213563480983206e-05, - "loss": 0.0888, - "step": 22375 - }, - { - "epoch": 0.5680923975123746, - "grad_norm": 0.8524607419967651, - "learning_rate": 1.6212717349917504e-05, - "loss": 0.0898, - "step": 22380 - }, - { - "epoch": 0.56821931717223, - "grad_norm": 0.41945740580558777, - "learning_rate": 1.6211871218851803e-05, - "loss": 0.0959, - "step": 22385 - }, - { - "epoch": 0.5683462368320853, - "grad_norm": 0.8476134538650513, - "learning_rate": 1.6211025087786098e-05, - "loss": 0.0927, - "step": 22390 - }, - { - "epoch": 0.5684731564919406, - "grad_norm": 0.5658868551254272, - "learning_rate": 1.6210178956720396e-05, - "loss": 0.0905, - "step": 22395 - }, - { - "epoch": 0.5686000761517959, - "grad_norm": 0.4700900912284851, - "learning_rate": 1.6209332825654695e-05, - "loss": 0.0799, - "step": 22400 - }, - { - "epoch": 0.5687269958116512, - "grad_norm": 0.6100898385047913, - "learning_rate": 1.6208486694588993e-05, - "loss": 0.0802, - "step": 22405 - }, - { - "epoch": 0.5688539154715065, - "grad_norm": 0.6237778663635254, - "learning_rate": 1.620764056352329e-05, - "loss": 0.0985, - "step": 22410 - }, - { - "epoch": 0.5689808351313619, - "grad_norm": 0.8595746159553528, - "learning_rate": 1.620679443245759e-05, - "loss": 0.0689, - "step": 22415 - }, - { - "epoch": 0.5691077547912171, - "grad_norm": 0.6352137923240662, - "learning_rate": 1.6205948301391888e-05, - "loss": 0.0974, - "step": 22420 - }, - { - "epoch": 0.5692346744510725, - "grad_norm": 0.5715554356575012, - "learning_rate": 1.6205102170326187e-05, - "loss": 0.0689, - "step": 22425 - }, - { - "epoch": 0.5693615941109278, - "grad_norm": 0.998336672782898, - "learning_rate": 1.620425603926048e-05, - "loss": 0.0773, - "step": 22430 - }, - { - "epoch": 0.5694885137707831, - "grad_norm": 0.5226443409919739, - "learning_rate": 1.620340990819478e-05, - "loss": 0.1173, - "step": 22435 - }, - { - "epoch": 0.5696154334306384, - "grad_norm": 0.504980206489563, - "learning_rate": 1.620256377712908e-05, - "loss": 0.1035, - "step": 22440 - }, - { - "epoch": 0.5697423530904937, - "grad_norm": 0.657501220703125, - "learning_rate": 1.6201717646063377e-05, - "loss": 0.0785, - "step": 22445 - }, - { - "epoch": 0.569869272750349, - "grad_norm": 0.49377474188804626, - "learning_rate": 1.6200871514997675e-05, - "loss": 0.0837, - "step": 22450 - }, - { - "epoch": 0.5699961924102044, - "grad_norm": 0.6073167324066162, - "learning_rate": 1.6200025383931974e-05, - "loss": 0.0782, - "step": 22455 - }, - { - "epoch": 0.5701231120700596, - "grad_norm": 0.3685174882411957, - "learning_rate": 1.6199179252866272e-05, - "loss": 0.0866, - "step": 22460 - }, - { - "epoch": 0.570250031729915, - "grad_norm": 0.5493044853210449, - "learning_rate": 1.619833312180057e-05, - "loss": 0.1161, - "step": 22465 - }, - { - "epoch": 0.5703769513897703, - "grad_norm": 0.5567957758903503, - "learning_rate": 1.6197486990734865e-05, - "loss": 0.0868, - "step": 22470 - }, - { - "epoch": 0.5705038710496256, - "grad_norm": 0.49605730175971985, - "learning_rate": 1.6196640859669164e-05, - "loss": 0.0711, - "step": 22475 - }, - { - "epoch": 0.5706307907094809, - "grad_norm": 0.5467023253440857, - "learning_rate": 1.6195794728603462e-05, - "loss": 0.0935, - "step": 22480 - }, - { - "epoch": 0.5707577103693362, - "grad_norm": 0.6679650545120239, - "learning_rate": 1.619494859753776e-05, - "loss": 0.0962, - "step": 22485 - }, - { - "epoch": 0.5708846300291915, - "grad_norm": 0.5466540455818176, - "learning_rate": 1.6194102466472056e-05, - "loss": 0.0703, - "step": 22490 - }, - { - "epoch": 0.5710115496890469, - "grad_norm": 0.5569736361503601, - "learning_rate": 1.6193256335406354e-05, - "loss": 0.0867, - "step": 22495 - }, - { - "epoch": 0.5711384693489021, - "grad_norm": 0.4220254421234131, - "learning_rate": 1.6192410204340652e-05, - "loss": 0.0975, - "step": 22500 - }, - { - "epoch": 0.5712653890087575, - "grad_norm": 2.337512969970703, - "learning_rate": 1.619156407327495e-05, - "loss": 0.0926, - "step": 22505 - }, - { - "epoch": 0.5713923086686128, - "grad_norm": 0.6221218705177307, - "learning_rate": 1.619071794220925e-05, - "loss": 0.0746, - "step": 22510 - }, - { - "epoch": 0.5715192283284681, - "grad_norm": 0.4586502015590668, - "learning_rate": 1.6189871811143548e-05, - "loss": 0.0713, - "step": 22515 - }, - { - "epoch": 0.5716461479883234, - "grad_norm": 0.3851875364780426, - "learning_rate": 1.6189025680077846e-05, - "loss": 0.0864, - "step": 22520 - }, - { - "epoch": 0.5717730676481787, - "grad_norm": 0.4600007236003876, - "learning_rate": 1.6188179549012144e-05, - "loss": 0.0938, - "step": 22525 - }, - { - "epoch": 0.571899987308034, - "grad_norm": 0.7743951678276062, - "learning_rate": 1.618733341794644e-05, - "loss": 0.0997, - "step": 22530 - }, - { - "epoch": 0.5720269069678894, - "grad_norm": 0.5516650080680847, - "learning_rate": 1.6186487286880738e-05, - "loss": 0.0583, - "step": 22535 - }, - { - "epoch": 0.5721538266277446, - "grad_norm": 0.7024224400520325, - "learning_rate": 1.6185641155815036e-05, - "loss": 0.0913, - "step": 22540 - }, - { - "epoch": 0.5722807462876, - "grad_norm": 0.6992104649543762, - "learning_rate": 1.6184795024749335e-05, - "loss": 0.0801, - "step": 22545 - }, - { - "epoch": 0.5724076659474553, - "grad_norm": 0.4939887821674347, - "learning_rate": 1.6183948893683633e-05, - "loss": 0.0811, - "step": 22550 - }, - { - "epoch": 0.5725345856073105, - "grad_norm": 0.4603104889392853, - "learning_rate": 1.618310276261793e-05, - "loss": 0.1035, - "step": 22555 - }, - { - "epoch": 0.5726615052671659, - "grad_norm": 0.31675589084625244, - "learning_rate": 1.618225663155223e-05, - "loss": 0.0963, - "step": 22560 - }, - { - "epoch": 0.5727884249270212, - "grad_norm": 0.7469910979270935, - "learning_rate": 1.6181410500486528e-05, - "loss": 0.0974, - "step": 22565 - }, - { - "epoch": 0.5729153445868765, - "grad_norm": 1.4683796167373657, - "learning_rate": 1.6180564369420823e-05, - "loss": 0.0948, - "step": 22570 - }, - { - "epoch": 0.5730422642467318, - "grad_norm": 1.4719878435134888, - "learning_rate": 1.617971823835512e-05, - "loss": 0.0917, - "step": 22575 - }, - { - "epoch": 0.5731691839065871, - "grad_norm": 0.6715970635414124, - "learning_rate": 1.617887210728942e-05, - "loss": 0.0997, - "step": 22580 - }, - { - "epoch": 0.5732961035664424, - "grad_norm": 0.7226307392120361, - "learning_rate": 1.617802597622372e-05, - "loss": 0.1069, - "step": 22585 - }, - { - "epoch": 0.5734230232262978, - "grad_norm": 0.7140295505523682, - "learning_rate": 1.6177179845158017e-05, - "loss": 0.0907, - "step": 22590 - }, - { - "epoch": 0.573549942886153, - "grad_norm": 0.6926888823509216, - "learning_rate": 1.6176333714092315e-05, - "loss": 0.0871, - "step": 22595 - }, - { - "epoch": 0.5736768625460084, - "grad_norm": 0.5580757260322571, - "learning_rate": 1.6175487583026614e-05, - "loss": 0.069, - "step": 22600 - }, - { - "epoch": 0.5738037822058637, - "grad_norm": 0.5340913534164429, - "learning_rate": 1.6174641451960912e-05, - "loss": 0.0822, - "step": 22605 - }, - { - "epoch": 0.573930701865719, - "grad_norm": 0.5937989950180054, - "learning_rate": 1.6173795320895207e-05, - "loss": 0.1018, - "step": 22610 - }, - { - "epoch": 0.5740576215255743, - "grad_norm": 1.0594823360443115, - "learning_rate": 1.6172949189829506e-05, - "loss": 0.0882, - "step": 22615 - }, - { - "epoch": 0.5741845411854296, - "grad_norm": 0.43265148997306824, - "learning_rate": 1.6172103058763804e-05, - "loss": 0.0783, - "step": 22620 - }, - { - "epoch": 0.574311460845285, - "grad_norm": 0.766289234161377, - "learning_rate": 1.6171256927698102e-05, - "loss": 0.0986, - "step": 22625 - }, - { - "epoch": 0.5744383805051403, - "grad_norm": 0.43480077385902405, - "learning_rate": 1.6170410796632397e-05, - "loss": 0.1, - "step": 22630 - }, - { - "epoch": 0.5745653001649955, - "grad_norm": 0.36741289496421814, - "learning_rate": 1.6169564665566696e-05, - "loss": 0.0838, - "step": 22635 - }, - { - "epoch": 0.5746922198248509, - "grad_norm": 1.0720186233520508, - "learning_rate": 1.6168718534500994e-05, - "loss": 0.0845, - "step": 22640 - }, - { - "epoch": 0.5748191394847062, - "grad_norm": 0.5484128594398499, - "learning_rate": 1.6167872403435293e-05, - "loss": 0.097, - "step": 22645 - }, - { - "epoch": 0.5749460591445615, - "grad_norm": 0.6835561990737915, - "learning_rate": 1.616702627236959e-05, - "loss": 0.0852, - "step": 22650 - }, - { - "epoch": 0.5750729788044168, - "grad_norm": 3.5969021320343018, - "learning_rate": 1.616618014130389e-05, - "loss": 0.0835, - "step": 22655 - }, - { - "epoch": 0.5751998984642721, - "grad_norm": 0.45986613631248474, - "learning_rate": 1.6165334010238188e-05, - "loss": 0.101, - "step": 22660 - }, - { - "epoch": 0.5753268181241274, - "grad_norm": 0.5771880745887756, - "learning_rate": 1.6164487879172486e-05, - "loss": 0.0912, - "step": 22665 - }, - { - "epoch": 0.5754537377839828, - "grad_norm": 0.6915527582168579, - "learning_rate": 1.616364174810678e-05, - "loss": 0.0781, - "step": 22670 - }, - { - "epoch": 0.575580657443838, - "grad_norm": 0.4499618709087372, - "learning_rate": 1.616279561704108e-05, - "loss": 0.0983, - "step": 22675 - }, - { - "epoch": 0.5757075771036934, - "grad_norm": 0.5162990093231201, - "learning_rate": 1.6161949485975378e-05, - "loss": 0.073, - "step": 22680 - }, - { - "epoch": 0.5758344967635487, - "grad_norm": 1.2978672981262207, - "learning_rate": 1.6161103354909676e-05, - "loss": 0.1026, - "step": 22685 - }, - { - "epoch": 0.575961416423404, - "grad_norm": 0.4882715940475464, - "learning_rate": 1.6160257223843975e-05, - "loss": 0.0817, - "step": 22690 - }, - { - "epoch": 0.5760883360832593, - "grad_norm": 0.9426423907279968, - "learning_rate": 1.6159411092778273e-05, - "loss": 0.1202, - "step": 22695 - }, - { - "epoch": 0.5762152557431146, - "grad_norm": 0.4206107556819916, - "learning_rate": 1.615856496171257e-05, - "loss": 0.0988, - "step": 22700 - }, - { - "epoch": 0.57634217540297, - "grad_norm": 0.819412112236023, - "learning_rate": 1.615771883064687e-05, - "loss": 0.0719, - "step": 22705 - }, - { - "epoch": 0.5764690950628252, - "grad_norm": 0.4132535457611084, - "learning_rate": 1.615687269958117e-05, - "loss": 0.0583, - "step": 22710 - }, - { - "epoch": 0.5765960147226805, - "grad_norm": 1.0596213340759277, - "learning_rate": 1.6156026568515463e-05, - "loss": 0.0959, - "step": 22715 - }, - { - "epoch": 0.5767229343825359, - "grad_norm": 0.6595755219459534, - "learning_rate": 1.6155180437449762e-05, - "loss": 0.1111, - "step": 22720 - }, - { - "epoch": 0.5768498540423912, - "grad_norm": 0.793822169303894, - "learning_rate": 1.615433430638406e-05, - "loss": 0.0843, - "step": 22725 - }, - { - "epoch": 0.5769767737022464, - "grad_norm": 0.5447860360145569, - "learning_rate": 1.615348817531836e-05, - "loss": 0.0837, - "step": 22730 - }, - { - "epoch": 0.5771036933621018, - "grad_norm": 0.4085116386413574, - "learning_rate": 1.6152642044252657e-05, - "loss": 0.0622, - "step": 22735 - }, - { - "epoch": 0.5772306130219571, - "grad_norm": 0.7849953770637512, - "learning_rate": 1.6151795913186955e-05, - "loss": 0.1079, - "step": 22740 - }, - { - "epoch": 0.5773575326818124, - "grad_norm": 0.5559983253479004, - "learning_rate": 1.6150949782121254e-05, - "loss": 0.0897, - "step": 22745 - }, - { - "epoch": 0.5774844523416677, - "grad_norm": 0.6596624255180359, - "learning_rate": 1.6150103651055552e-05, - "loss": 0.0823, - "step": 22750 - }, - { - "epoch": 0.577611372001523, - "grad_norm": 0.5663520693778992, - "learning_rate": 1.6149257519989847e-05, - "loss": 0.0876, - "step": 22755 - }, - { - "epoch": 0.5777382916613784, - "grad_norm": 0.5595954656600952, - "learning_rate": 1.6148411388924146e-05, - "loss": 0.0872, - "step": 22760 - }, - { - "epoch": 0.5778652113212337, - "grad_norm": 0.4168432354927063, - "learning_rate": 1.6147565257858444e-05, - "loss": 0.0663, - "step": 22765 - }, - { - "epoch": 0.5779921309810889, - "grad_norm": 0.39394426345825195, - "learning_rate": 1.6146719126792742e-05, - "loss": 0.0756, - "step": 22770 - }, - { - "epoch": 0.5781190506409443, - "grad_norm": 0.7718660235404968, - "learning_rate": 1.6145872995727037e-05, - "loss": 0.0834, - "step": 22775 - }, - { - "epoch": 0.5782459703007996, - "grad_norm": 0.490644246339798, - "learning_rate": 1.6145026864661336e-05, - "loss": 0.098, - "step": 22780 - }, - { - "epoch": 0.578372889960655, - "grad_norm": 0.4339923560619354, - "learning_rate": 1.6144180733595634e-05, - "loss": 0.0774, - "step": 22785 - }, - { - "epoch": 0.5784998096205102, - "grad_norm": 0.5866853594779968, - "learning_rate": 1.6143334602529933e-05, - "loss": 0.0671, - "step": 22790 - }, - { - "epoch": 0.5786267292803655, - "grad_norm": 0.48826083540916443, - "learning_rate": 1.614248847146423e-05, - "loss": 0.059, - "step": 22795 - }, - { - "epoch": 0.5787536489402209, - "grad_norm": 0.5620165467262268, - "learning_rate": 1.614164234039853e-05, - "loss": 0.0687, - "step": 22800 - }, - { - "epoch": 0.5788805686000762, - "grad_norm": 0.6916995048522949, - "learning_rate": 1.6140796209332828e-05, - "loss": 0.093, - "step": 22805 - }, - { - "epoch": 0.5790074882599314, - "grad_norm": 0.5873765349388123, - "learning_rate": 1.6139950078267126e-05, - "loss": 0.104, - "step": 22810 - }, - { - "epoch": 0.5791344079197868, - "grad_norm": 1.1643048524856567, - "learning_rate": 1.613910394720142e-05, - "loss": 0.0843, - "step": 22815 - }, - { - "epoch": 0.5792613275796421, - "grad_norm": 0.5307011008262634, - "learning_rate": 1.613825781613572e-05, - "loss": 0.0937, - "step": 22820 - }, - { - "epoch": 0.5793882472394974, - "grad_norm": 0.36967065930366516, - "learning_rate": 1.6137411685070018e-05, - "loss": 0.1064, - "step": 22825 - }, - { - "epoch": 0.5795151668993527, - "grad_norm": 0.41062140464782715, - "learning_rate": 1.6136565554004316e-05, - "loss": 0.0624, - "step": 22830 - }, - { - "epoch": 0.579642086559208, - "grad_norm": 0.7640340924263, - "learning_rate": 1.6135719422938615e-05, - "loss": 0.0842, - "step": 22835 - }, - { - "epoch": 0.5797690062190634, - "grad_norm": 0.5999107360839844, - "learning_rate": 1.6134873291872913e-05, - "loss": 0.1157, - "step": 22840 - }, - { - "epoch": 0.5798959258789187, - "grad_norm": 0.7659938335418701, - "learning_rate": 1.613402716080721e-05, - "loss": 0.1037, - "step": 22845 - }, - { - "epoch": 0.5800228455387739, - "grad_norm": 0.49680569767951965, - "learning_rate": 1.613318102974151e-05, - "loss": 0.0801, - "step": 22850 - }, - { - "epoch": 0.5801497651986293, - "grad_norm": 0.4890427887439728, - "learning_rate": 1.6132334898675805e-05, - "loss": 0.0716, - "step": 22855 - }, - { - "epoch": 0.5802766848584846, - "grad_norm": 0.5224676132202148, - "learning_rate": 1.6131488767610103e-05, - "loss": 0.0911, - "step": 22860 - }, - { - "epoch": 0.5804036045183399, - "grad_norm": 0.5069570541381836, - "learning_rate": 1.6130642636544402e-05, - "loss": 0.0887, - "step": 22865 - }, - { - "epoch": 0.5805305241781952, - "grad_norm": 0.5656378865242004, - "learning_rate": 1.61297965054787e-05, - "loss": 0.1011, - "step": 22870 - }, - { - "epoch": 0.5806574438380505, - "grad_norm": 0.6614961624145508, - "learning_rate": 1.6128950374413e-05, - "loss": 0.0834, - "step": 22875 - }, - { - "epoch": 0.5807843634979059, - "grad_norm": 0.8797395825386047, - "learning_rate": 1.6128104243347297e-05, - "loss": 0.1071, - "step": 22880 - }, - { - "epoch": 0.5809112831577611, - "grad_norm": 0.6793659329414368, - "learning_rate": 1.6127258112281596e-05, - "loss": 0.0673, - "step": 22885 - }, - { - "epoch": 0.5810382028176164, - "grad_norm": 0.5059266686439514, - "learning_rate": 1.6126411981215894e-05, - "loss": 0.0739, - "step": 22890 - }, - { - "epoch": 0.5811651224774718, - "grad_norm": 0.4341461658477783, - "learning_rate": 1.612556585015019e-05, - "loss": 0.0527, - "step": 22895 - }, - { - "epoch": 0.5812920421373271, - "grad_norm": 0.4639774262905121, - "learning_rate": 1.6124719719084487e-05, - "loss": 0.0982, - "step": 22900 - }, - { - "epoch": 0.5814189617971823, - "grad_norm": 0.7371289134025574, - "learning_rate": 1.6123873588018786e-05, - "loss": 0.0744, - "step": 22905 - }, - { - "epoch": 0.5815458814570377, - "grad_norm": 0.504769504070282, - "learning_rate": 1.6123027456953084e-05, - "loss": 0.0856, - "step": 22910 - }, - { - "epoch": 0.581672801116893, - "grad_norm": 0.469954252243042, - "learning_rate": 1.612218132588738e-05, - "loss": 0.0868, - "step": 22915 - }, - { - "epoch": 0.5817997207767484, - "grad_norm": 0.3918348550796509, - "learning_rate": 1.6121335194821678e-05, - "loss": 0.083, - "step": 22920 - }, - { - "epoch": 0.5819266404366036, - "grad_norm": 0.6323044896125793, - "learning_rate": 1.6120489063755976e-05, - "loss": 0.095, - "step": 22925 - }, - { - "epoch": 0.5820535600964589, - "grad_norm": 0.43754178285598755, - "learning_rate": 1.6119642932690274e-05, - "loss": 0.0884, - "step": 22930 - }, - { - "epoch": 0.5821804797563143, - "grad_norm": 0.5622814893722534, - "learning_rate": 1.6118796801624573e-05, - "loss": 0.085, - "step": 22935 - }, - { - "epoch": 0.5823073994161696, - "grad_norm": 0.4995182156562805, - "learning_rate": 1.611795067055887e-05, - "loss": 0.0992, - "step": 22940 - }, - { - "epoch": 0.5824343190760248, - "grad_norm": 0.5832148194313049, - "learning_rate": 1.611710453949317e-05, - "loss": 0.0926, - "step": 22945 - }, - { - "epoch": 0.5825612387358802, - "grad_norm": 0.49006107449531555, - "learning_rate": 1.6116258408427468e-05, - "loss": 0.0881, - "step": 22950 - }, - { - "epoch": 0.5826881583957355, - "grad_norm": 0.5674822926521301, - "learning_rate": 1.6115412277361763e-05, - "loss": 0.1042, - "step": 22955 - }, - { - "epoch": 0.5828150780555909, - "grad_norm": 0.5498344302177429, - "learning_rate": 1.611456614629606e-05, - "loss": 0.0716, - "step": 22960 - }, - { - "epoch": 0.5829419977154461, - "grad_norm": 0.5218973159790039, - "learning_rate": 1.611372001523036e-05, - "loss": 0.0778, - "step": 22965 - }, - { - "epoch": 0.5830689173753014, - "grad_norm": 0.7110181450843811, - "learning_rate": 1.6112873884164658e-05, - "loss": 0.1114, - "step": 22970 - }, - { - "epoch": 0.5831958370351568, - "grad_norm": 0.6917306780815125, - "learning_rate": 1.6112027753098957e-05, - "loss": 0.078, - "step": 22975 - }, - { - "epoch": 0.5833227566950121, - "grad_norm": 0.5121098756790161, - "learning_rate": 1.6111181622033255e-05, - "loss": 0.0892, - "step": 22980 - }, - { - "epoch": 0.5834496763548673, - "grad_norm": 0.4409685432910919, - "learning_rate": 1.6110335490967553e-05, - "loss": 0.0822, - "step": 22985 - }, - { - "epoch": 0.5835765960147227, - "grad_norm": 0.6445302367210388, - "learning_rate": 1.6109489359901852e-05, - "loss": 0.0768, - "step": 22990 - }, - { - "epoch": 0.583703515674578, - "grad_norm": 0.5278316736221313, - "learning_rate": 1.6108643228836147e-05, - "loss": 0.1035, - "step": 22995 - }, - { - "epoch": 0.5838304353344334, - "grad_norm": 0.4861685335636139, - "learning_rate": 1.6107797097770445e-05, - "loss": 0.0738, - "step": 23000 - }, - { - "epoch": 0.5839573549942886, - "grad_norm": 0.5520419478416443, - "learning_rate": 1.6106950966704744e-05, - "loss": 0.081, - "step": 23005 - }, - { - "epoch": 0.5840842746541439, - "grad_norm": 0.624512255191803, - "learning_rate": 1.6106104835639042e-05, - "loss": 0.0715, - "step": 23010 - }, - { - "epoch": 0.5842111943139993, - "grad_norm": 0.4131576716899872, - "learning_rate": 1.610525870457334e-05, - "loss": 0.0633, - "step": 23015 - }, - { - "epoch": 0.5843381139738546, - "grad_norm": 0.7573831081390381, - "learning_rate": 1.610441257350764e-05, - "loss": 0.0721, - "step": 23020 - }, - { - "epoch": 0.5844650336337098, - "grad_norm": 0.3471788763999939, - "learning_rate": 1.6103566442441937e-05, - "loss": 0.0992, - "step": 23025 - }, - { - "epoch": 0.5845919532935652, - "grad_norm": 0.5518091917037964, - "learning_rate": 1.6102720311376236e-05, - "loss": 0.0895, - "step": 23030 - }, - { - "epoch": 0.5847188729534205, - "grad_norm": 1.5126512050628662, - "learning_rate": 1.610187418031053e-05, - "loss": 0.0821, - "step": 23035 - }, - { - "epoch": 0.5848457926132758, - "grad_norm": 0.9777607321739197, - "learning_rate": 1.610102804924483e-05, - "loss": 0.0894, - "step": 23040 - }, - { - "epoch": 0.5849727122731311, - "grad_norm": 0.3668428659439087, - "learning_rate": 1.6100181918179127e-05, - "loss": 0.109, - "step": 23045 - }, - { - "epoch": 0.5850996319329864, - "grad_norm": 0.4683702886104584, - "learning_rate": 1.6099335787113426e-05, - "loss": 0.0896, - "step": 23050 - }, - { - "epoch": 0.5852265515928418, - "grad_norm": 0.6418293714523315, - "learning_rate": 1.609848965604772e-05, - "loss": 0.0857, - "step": 23055 - }, - { - "epoch": 0.585353471252697, - "grad_norm": 0.5254943370819092, - "learning_rate": 1.609764352498202e-05, - "loss": 0.0864, - "step": 23060 - }, - { - "epoch": 0.5854803909125523, - "grad_norm": 0.5192406177520752, - "learning_rate": 1.6096797393916318e-05, - "loss": 0.0774, - "step": 23065 - }, - { - "epoch": 0.5856073105724077, - "grad_norm": 0.49449384212493896, - "learning_rate": 1.6095951262850616e-05, - "loss": 0.0706, - "step": 23070 - }, - { - "epoch": 0.585734230232263, - "grad_norm": 1.5167604684829712, - "learning_rate": 1.6095105131784914e-05, - "loss": 0.1066, - "step": 23075 - }, - { - "epoch": 0.5858611498921182, - "grad_norm": 0.6414330005645752, - "learning_rate": 1.6094259000719213e-05, - "loss": 0.082, - "step": 23080 - }, - { - "epoch": 0.5859880695519736, - "grad_norm": 0.7783939242362976, - "learning_rate": 1.609341286965351e-05, - "loss": 0.0752, - "step": 23085 - }, - { - "epoch": 0.5861149892118289, - "grad_norm": 0.6716271042823792, - "learning_rate": 1.609256673858781e-05, - "loss": 0.0869, - "step": 23090 - }, - { - "epoch": 0.5862419088716843, - "grad_norm": 0.8965374827384949, - "learning_rate": 1.6091720607522105e-05, - "loss": 0.112, - "step": 23095 - }, - { - "epoch": 0.5863688285315395, - "grad_norm": 0.5631676912307739, - "learning_rate": 1.6090874476456403e-05, - "loss": 0.1066, - "step": 23100 - }, - { - "epoch": 0.5864957481913948, - "grad_norm": 0.5074991583824158, - "learning_rate": 1.60900283453907e-05, - "loss": 0.0812, - "step": 23105 - }, - { - "epoch": 0.5866226678512502, - "grad_norm": 0.5374479293823242, - "learning_rate": 1.6089182214325e-05, - "loss": 0.0992, - "step": 23110 - }, - { - "epoch": 0.5867495875111055, - "grad_norm": 0.5720285177230835, - "learning_rate": 1.6088336083259298e-05, - "loss": 0.0893, - "step": 23115 - }, - { - "epoch": 0.5868765071709607, - "grad_norm": 0.6388254761695862, - "learning_rate": 1.6087489952193597e-05, - "loss": 0.1086, - "step": 23120 - }, - { - "epoch": 0.5870034268308161, - "grad_norm": 0.577297031879425, - "learning_rate": 1.6086643821127895e-05, - "loss": 0.0875, - "step": 23125 - }, - { - "epoch": 0.5871303464906714, - "grad_norm": 0.56074059009552, - "learning_rate": 1.6085797690062193e-05, - "loss": 0.0965, - "step": 23130 - }, - { - "epoch": 0.5872572661505268, - "grad_norm": 0.5349249839782715, - "learning_rate": 1.608495155899649e-05, - "loss": 0.0752, - "step": 23135 - }, - { - "epoch": 0.587384185810382, - "grad_norm": 0.4057276248931885, - "learning_rate": 1.6084105427930787e-05, - "loss": 0.0891, - "step": 23140 - }, - { - "epoch": 0.5875111054702373, - "grad_norm": 0.3864908516407013, - "learning_rate": 1.6083259296865085e-05, - "loss": 0.0694, - "step": 23145 - }, - { - "epoch": 0.5876380251300927, - "grad_norm": 0.4443625509738922, - "learning_rate": 1.6082413165799384e-05, - "loss": 0.1003, - "step": 23150 - }, - { - "epoch": 0.587764944789948, - "grad_norm": 0.5156288146972656, - "learning_rate": 1.6081567034733682e-05, - "loss": 0.0958, - "step": 23155 - }, - { - "epoch": 0.5878918644498032, - "grad_norm": 0.44444578886032104, - "learning_rate": 1.608072090366798e-05, - "loss": 0.0706, - "step": 23160 - }, - { - "epoch": 0.5880187841096586, - "grad_norm": 0.4428846538066864, - "learning_rate": 1.607987477260228e-05, - "loss": 0.0733, - "step": 23165 - }, - { - "epoch": 0.5881457037695139, - "grad_norm": 0.5442954301834106, - "learning_rate": 1.6079028641536577e-05, - "loss": 0.1003, - "step": 23170 - }, - { - "epoch": 0.5882726234293693, - "grad_norm": 0.3584955632686615, - "learning_rate": 1.6078182510470872e-05, - "loss": 0.0841, - "step": 23175 - }, - { - "epoch": 0.5883995430892245, - "grad_norm": 0.5988057851791382, - "learning_rate": 1.607733637940517e-05, - "loss": 0.0887, - "step": 23180 - }, - { - "epoch": 0.5885264627490798, - "grad_norm": 0.6465273499488831, - "learning_rate": 1.607649024833947e-05, - "loss": 0.0856, - "step": 23185 - }, - { - "epoch": 0.5886533824089352, - "grad_norm": 2.6066131591796875, - "learning_rate": 1.6075644117273768e-05, - "loss": 0.0822, - "step": 23190 - }, - { - "epoch": 0.5887803020687905, - "grad_norm": 1.1567188501358032, - "learning_rate": 1.6074797986208063e-05, - "loss": 0.0763, - "step": 23195 - }, - { - "epoch": 0.5889072217286457, - "grad_norm": 0.5389106869697571, - "learning_rate": 1.607395185514236e-05, - "loss": 0.0804, - "step": 23200 - }, - { - "epoch": 0.5890341413885011, - "grad_norm": 1.3067126274108887, - "learning_rate": 1.607310572407666e-05, - "loss": 0.0892, - "step": 23205 - }, - { - "epoch": 0.5891610610483564, - "grad_norm": 0.6480451822280884, - "learning_rate": 1.6072259593010958e-05, - "loss": 0.1022, - "step": 23210 - }, - { - "epoch": 0.5892879807082118, - "grad_norm": 0.6400402784347534, - "learning_rate": 1.6071413461945256e-05, - "loss": 0.0956, - "step": 23215 - }, - { - "epoch": 0.589414900368067, - "grad_norm": 1.3103232383728027, - "learning_rate": 1.6070567330879555e-05, - "loss": 0.0815, - "step": 23220 - }, - { - "epoch": 0.5895418200279223, - "grad_norm": 0.6123467683792114, - "learning_rate": 1.6069721199813853e-05, - "loss": 0.099, - "step": 23225 - }, - { - "epoch": 0.5896687396877777, - "grad_norm": 0.509892463684082, - "learning_rate": 1.606887506874815e-05, - "loss": 0.0782, - "step": 23230 - }, - { - "epoch": 0.5897956593476329, - "grad_norm": 0.48236170411109924, - "learning_rate": 1.606802893768245e-05, - "loss": 0.0695, - "step": 23235 - }, - { - "epoch": 0.5899225790074882, - "grad_norm": 0.6597408056259155, - "learning_rate": 1.6067182806616745e-05, - "loss": 0.0823, - "step": 23240 - }, - { - "epoch": 0.5900494986673436, - "grad_norm": 0.5529798269271851, - "learning_rate": 1.6066336675551043e-05, - "loss": 0.079, - "step": 23245 - }, - { - "epoch": 0.5901764183271989, - "grad_norm": 0.28958213329315186, - "learning_rate": 1.606549054448534e-05, - "loss": 0.0901, - "step": 23250 - }, - { - "epoch": 0.5903033379870541, - "grad_norm": 0.4272881746292114, - "learning_rate": 1.606464441341964e-05, - "loss": 0.0713, - "step": 23255 - }, - { - "epoch": 0.5904302576469095, - "grad_norm": 0.4263005256652832, - "learning_rate": 1.606379828235394e-05, - "loss": 0.0757, - "step": 23260 - }, - { - "epoch": 0.5905571773067648, - "grad_norm": 0.44379186630249023, - "learning_rate": 1.6062952151288237e-05, - "loss": 0.0829, - "step": 23265 - }, - { - "epoch": 0.5906840969666202, - "grad_norm": 1.467645287513733, - "learning_rate": 1.6062106020222535e-05, - "loss": 0.0842, - "step": 23270 - }, - { - "epoch": 0.5908110166264754, - "grad_norm": 0.7395593523979187, - "learning_rate": 1.6061259889156834e-05, - "loss": 0.0932, - "step": 23275 - }, - { - "epoch": 0.5909379362863307, - "grad_norm": 0.6183434128761292, - "learning_rate": 1.606041375809113e-05, - "loss": 0.0684, - "step": 23280 - }, - { - "epoch": 0.5910648559461861, - "grad_norm": 0.5293750166893005, - "learning_rate": 1.6059567627025427e-05, - "loss": 0.105, - "step": 23285 - }, - { - "epoch": 0.5911917756060414, - "grad_norm": 1.0042459964752197, - "learning_rate": 1.6058721495959725e-05, - "loss": 0.0862, - "step": 23290 - }, - { - "epoch": 0.5913186952658966, - "grad_norm": 3.5209484100341797, - "learning_rate": 1.6057875364894024e-05, - "loss": 0.0755, - "step": 23295 - }, - { - "epoch": 0.591445614925752, - "grad_norm": 0.4491889476776123, - "learning_rate": 1.6057029233828322e-05, - "loss": 0.0674, - "step": 23300 - }, - { - "epoch": 0.5915725345856073, - "grad_norm": 0.7636537551879883, - "learning_rate": 1.605618310276262e-05, - "loss": 0.1219, - "step": 23305 - }, - { - "epoch": 0.5916994542454627, - "grad_norm": 0.6084511280059814, - "learning_rate": 1.605533697169692e-05, - "loss": 0.0922, - "step": 23310 - }, - { - "epoch": 0.5918263739053179, - "grad_norm": 1.1049736738204956, - "learning_rate": 1.6054490840631217e-05, - "loss": 0.0969, - "step": 23315 - }, - { - "epoch": 0.5919532935651732, - "grad_norm": 0.598724365234375, - "learning_rate": 1.6053644709565512e-05, - "loss": 0.0864, - "step": 23320 - }, - { - "epoch": 0.5920802132250286, - "grad_norm": 0.5001533627510071, - "learning_rate": 1.605279857849981e-05, - "loss": 0.0589, - "step": 23325 - }, - { - "epoch": 0.5922071328848839, - "grad_norm": 0.36072853207588196, - "learning_rate": 1.605195244743411e-05, - "loss": 0.0854, - "step": 23330 - }, - { - "epoch": 0.5923340525447391, - "grad_norm": 0.6538400650024414, - "learning_rate": 1.6051106316368408e-05, - "loss": 0.0817, - "step": 23335 - }, - { - "epoch": 0.5924609722045945, - "grad_norm": 0.79857337474823, - "learning_rate": 1.6050260185302703e-05, - "loss": 0.0805, - "step": 23340 - }, - { - "epoch": 0.5925878918644498, - "grad_norm": 0.45374882221221924, - "learning_rate": 1.6049414054237e-05, - "loss": 0.0888, - "step": 23345 - }, - { - "epoch": 0.5927148115243052, - "grad_norm": 0.36473870277404785, - "learning_rate": 1.60485679231713e-05, - "loss": 0.0678, - "step": 23350 - }, - { - "epoch": 0.5928417311841604, - "grad_norm": 0.44165652990341187, - "learning_rate": 1.6047721792105598e-05, - "loss": 0.08, - "step": 23355 - }, - { - "epoch": 0.5929686508440157, - "grad_norm": 0.523646354675293, - "learning_rate": 1.6046875661039896e-05, - "loss": 0.0897, - "step": 23360 - }, - { - "epoch": 0.5930955705038711, - "grad_norm": 0.9425628185272217, - "learning_rate": 1.6046029529974195e-05, - "loss": 0.0933, - "step": 23365 - }, - { - "epoch": 0.5932224901637264, - "grad_norm": 1.7013756036758423, - "learning_rate": 1.6045183398908493e-05, - "loss": 0.1118, - "step": 23370 - }, - { - "epoch": 0.5933494098235816, - "grad_norm": 0.46064719557762146, - "learning_rate": 1.604433726784279e-05, - "loss": 0.088, - "step": 23375 - }, - { - "epoch": 0.593476329483437, - "grad_norm": 0.5432181358337402, - "learning_rate": 1.6043491136777086e-05, - "loss": 0.0822, - "step": 23380 - }, - { - "epoch": 0.5936032491432923, - "grad_norm": 0.7337126731872559, - "learning_rate": 1.6042645005711385e-05, - "loss": 0.0989, - "step": 23385 - }, - { - "epoch": 0.5937301688031477, - "grad_norm": 0.9595813751220703, - "learning_rate": 1.6041798874645683e-05, - "loss": 0.08, - "step": 23390 - }, - { - "epoch": 0.5938570884630029, - "grad_norm": 0.5318570733070374, - "learning_rate": 1.604095274357998e-05, - "loss": 0.0822, - "step": 23395 - }, - { - "epoch": 0.5939840081228582, - "grad_norm": 0.5107890963554382, - "learning_rate": 1.604010661251428e-05, - "loss": 0.0994, - "step": 23400 - }, - { - "epoch": 0.5941109277827136, - "grad_norm": 0.56360924243927, - "learning_rate": 1.603926048144858e-05, - "loss": 0.0758, - "step": 23405 - }, - { - "epoch": 0.5942378474425688, - "grad_norm": 0.6554743647575378, - "learning_rate": 1.6038414350382877e-05, - "loss": 0.0698, - "step": 23410 - }, - { - "epoch": 0.5943647671024241, - "grad_norm": 0.44236454367637634, - "learning_rate": 1.6037568219317175e-05, - "loss": 0.0911, - "step": 23415 - }, - { - "epoch": 0.5944916867622795, - "grad_norm": 0.37322503328323364, - "learning_rate": 1.603672208825147e-05, - "loss": 0.0833, - "step": 23420 - }, - { - "epoch": 0.5946186064221348, - "grad_norm": 0.477827250957489, - "learning_rate": 1.603587595718577e-05, - "loss": 0.0543, - "step": 23425 - }, - { - "epoch": 0.59474552608199, - "grad_norm": 1.221618413925171, - "learning_rate": 1.6035029826120067e-05, - "loss": 0.0846, - "step": 23430 - }, - { - "epoch": 0.5948724457418454, - "grad_norm": 0.7519368529319763, - "learning_rate": 1.6034183695054366e-05, - "loss": 0.0858, - "step": 23435 - }, - { - "epoch": 0.5949993654017007, - "grad_norm": 0.8774020671844482, - "learning_rate": 1.6033337563988664e-05, - "loss": 0.0721, - "step": 23440 - }, - { - "epoch": 0.5951262850615561, - "grad_norm": 0.6847954988479614, - "learning_rate": 1.6032491432922962e-05, - "loss": 0.0858, - "step": 23445 - }, - { - "epoch": 0.5952532047214113, - "grad_norm": 0.6238142251968384, - "learning_rate": 1.603164530185726e-05, - "loss": 0.0908, - "step": 23450 - }, - { - "epoch": 0.5953801243812666, - "grad_norm": 0.52669358253479, - "learning_rate": 1.603079917079156e-05, - "loss": 0.0565, - "step": 23455 - }, - { - "epoch": 0.595507044041122, - "grad_norm": 0.6860011219978333, - "learning_rate": 1.6029953039725854e-05, - "loss": 0.0751, - "step": 23460 - }, - { - "epoch": 0.5956339637009773, - "grad_norm": 0.7870742082595825, - "learning_rate": 1.6029106908660153e-05, - "loss": 0.0936, - "step": 23465 - }, - { - "epoch": 0.5957608833608325, - "grad_norm": 0.884994626045227, - "learning_rate": 1.602826077759445e-05, - "loss": 0.0888, - "step": 23470 - }, - { - "epoch": 0.5958878030206879, - "grad_norm": 0.7706344127655029, - "learning_rate": 1.602741464652875e-05, - "loss": 0.0896, - "step": 23475 - }, - { - "epoch": 0.5960147226805432, - "grad_norm": 0.391023725271225, - "learning_rate": 1.6026568515463044e-05, - "loss": 0.0902, - "step": 23480 - }, - { - "epoch": 0.5961416423403986, - "grad_norm": 0.6132872104644775, - "learning_rate": 1.6025722384397343e-05, - "loss": 0.0698, - "step": 23485 - }, - { - "epoch": 0.5962685620002538, - "grad_norm": 0.6483832597732544, - "learning_rate": 1.602487625333164e-05, - "loss": 0.0955, - "step": 23490 - }, - { - "epoch": 0.5963954816601091, - "grad_norm": 0.40600281953811646, - "learning_rate": 1.602403012226594e-05, - "loss": 0.0838, - "step": 23495 - }, - { - "epoch": 0.5965224013199645, - "grad_norm": 0.5633193850517273, - "learning_rate": 1.6023183991200238e-05, - "loss": 0.0758, - "step": 23500 - }, - { - "epoch": 0.5966493209798198, - "grad_norm": 0.9579320549964905, - "learning_rate": 1.6022337860134536e-05, - "loss": 0.0669, - "step": 23505 - }, - { - "epoch": 0.596776240639675, - "grad_norm": 0.5756638050079346, - "learning_rate": 1.6021491729068835e-05, - "loss": 0.1042, - "step": 23510 - }, - { - "epoch": 0.5969031602995304, - "grad_norm": 0.5470210313796997, - "learning_rate": 1.6020645598003133e-05, - "loss": 0.1049, - "step": 23515 - }, - { - "epoch": 0.5970300799593857, - "grad_norm": 1.2627500295639038, - "learning_rate": 1.6019799466937428e-05, - "loss": 0.086, - "step": 23520 - }, - { - "epoch": 0.5971569996192411, - "grad_norm": 0.4489853084087372, - "learning_rate": 1.6018953335871727e-05, - "loss": 0.0936, - "step": 23525 - }, - { - "epoch": 0.5972839192790963, - "grad_norm": 0.9055612683296204, - "learning_rate": 1.6018107204806025e-05, - "loss": 0.0762, - "step": 23530 - }, - { - "epoch": 0.5974108389389516, - "grad_norm": 0.764091432094574, - "learning_rate": 1.6017261073740323e-05, - "loss": 0.0779, - "step": 23535 - }, - { - "epoch": 0.597537758598807, - "grad_norm": 0.3206979036331177, - "learning_rate": 1.6016414942674622e-05, - "loss": 0.0837, - "step": 23540 - }, - { - "epoch": 0.5976646782586623, - "grad_norm": 0.8662707209587097, - "learning_rate": 1.601556881160892e-05, - "loss": 0.0862, - "step": 23545 - }, - { - "epoch": 0.5977915979185175, - "grad_norm": 0.4560585021972656, - "learning_rate": 1.601472268054322e-05, - "loss": 0.0635, - "step": 23550 - }, - { - "epoch": 0.5979185175783729, - "grad_norm": 0.4547397792339325, - "learning_rate": 1.6013876549477517e-05, - "loss": 0.0856, - "step": 23555 - }, - { - "epoch": 0.5980454372382282, - "grad_norm": 0.5555941462516785, - "learning_rate": 1.6013030418411812e-05, - "loss": 0.0762, - "step": 23560 - }, - { - "epoch": 0.5981723568980836, - "grad_norm": 0.4352744519710541, - "learning_rate": 1.601218428734611e-05, - "loss": 0.0918, - "step": 23565 - }, - { - "epoch": 0.5982992765579388, - "grad_norm": 0.5621304512023926, - "learning_rate": 1.601133815628041e-05, - "loss": 0.0695, - "step": 23570 - }, - { - "epoch": 0.5984261962177941, - "grad_norm": 0.6805803775787354, - "learning_rate": 1.6010492025214707e-05, - "loss": 0.0757, - "step": 23575 - }, - { - "epoch": 0.5985531158776495, - "grad_norm": 0.474968820810318, - "learning_rate": 1.6009645894149006e-05, - "loss": 0.09, - "step": 23580 - }, - { - "epoch": 0.5986800355375047, - "grad_norm": 0.6384949684143066, - "learning_rate": 1.6008799763083304e-05, - "loss": 0.0787, - "step": 23585 - }, - { - "epoch": 0.59880695519736, - "grad_norm": 0.45498260855674744, - "learning_rate": 1.6007953632017602e-05, - "loss": 0.1005, - "step": 23590 - }, - { - "epoch": 0.5989338748572154, - "grad_norm": 0.42776307463645935, - "learning_rate": 1.60071075009519e-05, - "loss": 0.0857, - "step": 23595 - }, - { - "epoch": 0.5990607945170707, - "grad_norm": 0.485735148191452, - "learning_rate": 1.6006261369886196e-05, - "loss": 0.0793, - "step": 23600 - }, - { - "epoch": 0.599187714176926, - "grad_norm": 0.6367374062538147, - "learning_rate": 1.6005415238820494e-05, - "loss": 0.0708, - "step": 23605 - }, - { - "epoch": 0.5993146338367813, - "grad_norm": 0.6591060161590576, - "learning_rate": 1.6004569107754793e-05, - "loss": 0.0905, - "step": 23610 - }, - { - "epoch": 0.5994415534966366, - "grad_norm": 0.6677157282829285, - "learning_rate": 1.600372297668909e-05, - "loss": 0.0801, - "step": 23615 - }, - { - "epoch": 0.599568473156492, - "grad_norm": 0.4154126048088074, - "learning_rate": 1.6002876845623386e-05, - "loss": 0.0795, - "step": 23620 - }, - { - "epoch": 0.5996953928163472, - "grad_norm": 0.6807536482810974, - "learning_rate": 1.6002030714557684e-05, - "loss": 0.0861, - "step": 23625 - }, - { - "epoch": 0.5998223124762025, - "grad_norm": 0.3436277508735657, - "learning_rate": 1.6001184583491983e-05, - "loss": 0.072, - "step": 23630 - }, - { - "epoch": 0.5999492321360579, - "grad_norm": 0.37607312202453613, - "learning_rate": 1.600033845242628e-05, - "loss": 0.0661, - "step": 23635 - }, - { - "epoch": 0.6000761517959132, - "grad_norm": 0.5774019360542297, - "learning_rate": 1.599949232136058e-05, - "loss": 0.0878, - "step": 23640 - }, - { - "epoch": 0.6002030714557685, - "grad_norm": 0.9495500922203064, - "learning_rate": 1.5998646190294878e-05, - "loss": 0.0907, - "step": 23645 - }, - { - "epoch": 0.6003299911156238, - "grad_norm": 0.4929032623767853, - "learning_rate": 1.5997800059229176e-05, - "loss": 0.0949, - "step": 23650 - }, - { - "epoch": 0.6004569107754791, - "grad_norm": 1.10200834274292, - "learning_rate": 1.5996953928163475e-05, - "loss": 0.077, - "step": 23655 - }, - { - "epoch": 0.6005838304353345, - "grad_norm": 0.6601969003677368, - "learning_rate": 1.599610779709777e-05, - "loss": 0.0952, - "step": 23660 - }, - { - "epoch": 0.6007107500951897, - "grad_norm": 0.5036243796348572, - "learning_rate": 1.5995261666032068e-05, - "loss": 0.0836, - "step": 23665 - }, - { - "epoch": 0.600837669755045, - "grad_norm": 0.3999279737472534, - "learning_rate": 1.5994415534966367e-05, - "loss": 0.0816, - "step": 23670 - }, - { - "epoch": 0.6009645894149004, - "grad_norm": 0.9573605060577393, - "learning_rate": 1.5993569403900665e-05, - "loss": 0.0825, - "step": 23675 - }, - { - "epoch": 0.6010915090747557, - "grad_norm": 0.6953319311141968, - "learning_rate": 1.5992723272834964e-05, - "loss": 0.0841, - "step": 23680 - }, - { - "epoch": 0.601218428734611, - "grad_norm": 0.5276809930801392, - "learning_rate": 1.5991877141769262e-05, - "loss": 0.0906, - "step": 23685 - }, - { - "epoch": 0.6013453483944663, - "grad_norm": 0.4043518900871277, - "learning_rate": 1.599103101070356e-05, - "loss": 0.0708, - "step": 23690 - }, - { - "epoch": 0.6014722680543216, - "grad_norm": 0.8877809643745422, - "learning_rate": 1.599018487963786e-05, - "loss": 0.096, - "step": 23695 - }, - { - "epoch": 0.601599187714177, - "grad_norm": 0.3521532416343689, - "learning_rate": 1.5989338748572154e-05, - "loss": 0.0807, - "step": 23700 - }, - { - "epoch": 0.6017261073740322, - "grad_norm": 0.48366567492485046, - "learning_rate": 1.5988492617506452e-05, - "loss": 0.0929, - "step": 23705 - }, - { - "epoch": 0.6018530270338875, - "grad_norm": 0.4041817784309387, - "learning_rate": 1.598764648644075e-05, - "loss": 0.0659, - "step": 23710 - }, - { - "epoch": 0.6019799466937429, - "grad_norm": 0.726164698600769, - "learning_rate": 1.598680035537505e-05, - "loss": 0.0822, - "step": 23715 - }, - { - "epoch": 0.6021068663535982, - "grad_norm": 0.458091676235199, - "learning_rate": 1.5985954224309347e-05, - "loss": 0.0822, - "step": 23720 - }, - { - "epoch": 0.6022337860134535, - "grad_norm": 0.5492805242538452, - "learning_rate": 1.5985108093243646e-05, - "loss": 0.079, - "step": 23725 - }, - { - "epoch": 0.6023607056733088, - "grad_norm": 0.355085551738739, - "learning_rate": 1.5984261962177944e-05, - "loss": 0.064, - "step": 23730 - }, - { - "epoch": 0.6024876253331641, - "grad_norm": 0.6264757513999939, - "learning_rate": 1.5983415831112243e-05, - "loss": 0.0933, - "step": 23735 - }, - { - "epoch": 0.6026145449930195, - "grad_norm": 0.45168566703796387, - "learning_rate": 1.5982569700046538e-05, - "loss": 0.0628, - "step": 23740 - }, - { - "epoch": 0.6027414646528747, - "grad_norm": 0.5066260099411011, - "learning_rate": 1.5981723568980836e-05, - "loss": 0.1, - "step": 23745 - }, - { - "epoch": 0.60286838431273, - "grad_norm": 0.9544098973274231, - "learning_rate": 1.5980877437915134e-05, - "loss": 0.0939, - "step": 23750 - }, - { - "epoch": 0.6029953039725854, - "grad_norm": 0.5095725059509277, - "learning_rate": 1.5980031306849433e-05, - "loss": 0.0873, - "step": 23755 - }, - { - "epoch": 0.6031222236324406, - "grad_norm": 0.6042221188545227, - "learning_rate": 1.597918517578373e-05, - "loss": 0.0751, - "step": 23760 - }, - { - "epoch": 0.603249143292296, - "grad_norm": 0.5004549026489258, - "learning_rate": 1.5978339044718026e-05, - "loss": 0.0744, - "step": 23765 - }, - { - "epoch": 0.6033760629521513, - "grad_norm": 0.6921148896217346, - "learning_rate": 1.5977492913652325e-05, - "loss": 0.0836, - "step": 23770 - }, - { - "epoch": 0.6035029826120066, - "grad_norm": 0.569765031337738, - "learning_rate": 1.5976646782586623e-05, - "loss": 0.0676, - "step": 23775 - }, - { - "epoch": 0.6036299022718619, - "grad_norm": 0.6473386883735657, - "learning_rate": 1.597580065152092e-05, - "loss": 0.0801, - "step": 23780 - }, - { - "epoch": 0.6037568219317172, - "grad_norm": 0.599888026714325, - "learning_rate": 1.597495452045522e-05, - "loss": 0.0815, - "step": 23785 - }, - { - "epoch": 0.6038837415915725, - "grad_norm": 0.6545892953872681, - "learning_rate": 1.5974108389389518e-05, - "loss": 0.0899, - "step": 23790 - }, - { - "epoch": 0.6040106612514279, - "grad_norm": 0.6592133045196533, - "learning_rate": 1.5973262258323817e-05, - "loss": 0.0631, - "step": 23795 - }, - { - "epoch": 0.6041375809112831, - "grad_norm": 0.42520982027053833, - "learning_rate": 1.5972416127258115e-05, - "loss": 0.0869, - "step": 23800 - }, - { - "epoch": 0.6042645005711385, - "grad_norm": 0.44906380772590637, - "learning_rate": 1.597156999619241e-05, - "loss": 0.0787, - "step": 23805 - }, - { - "epoch": 0.6043914202309938, - "grad_norm": 0.38189512491226196, - "learning_rate": 1.597072386512671e-05, - "loss": 0.0733, - "step": 23810 - }, - { - "epoch": 0.6045183398908491, - "grad_norm": 1.3097407817840576, - "learning_rate": 1.5969877734061007e-05, - "loss": 0.0934, - "step": 23815 - }, - { - "epoch": 0.6046452595507044, - "grad_norm": 0.6504003405570984, - "learning_rate": 1.5969031602995305e-05, - "loss": 0.0724, - "step": 23820 - }, - { - "epoch": 0.6047721792105597, - "grad_norm": 0.5183903574943542, - "learning_rate": 1.5968185471929604e-05, - "loss": 0.1205, - "step": 23825 - }, - { - "epoch": 0.604899098870415, - "grad_norm": 0.5202925801277161, - "learning_rate": 1.5967339340863902e-05, - "loss": 0.0888, - "step": 23830 - }, - { - "epoch": 0.6050260185302704, - "grad_norm": 0.8438544869422913, - "learning_rate": 1.59664932097982e-05, - "loss": 0.0706, - "step": 23835 - }, - { - "epoch": 0.6051529381901256, - "grad_norm": 0.5644053816795349, - "learning_rate": 1.59656470787325e-05, - "loss": 0.0871, - "step": 23840 - }, - { - "epoch": 0.605279857849981, - "grad_norm": 0.48020821809768677, - "learning_rate": 1.5964800947666794e-05, - "loss": 0.1026, - "step": 23845 - }, - { - "epoch": 0.6054067775098363, - "grad_norm": 0.41262638568878174, - "learning_rate": 1.5963954816601092e-05, - "loss": 0.0818, - "step": 23850 - }, - { - "epoch": 0.6055336971696916, - "grad_norm": 0.4543174207210541, - "learning_rate": 1.596310868553539e-05, - "loss": 0.0829, - "step": 23855 - }, - { - "epoch": 0.6056606168295469, - "grad_norm": 0.6299386024475098, - "learning_rate": 1.596226255446969e-05, - "loss": 0.1053, - "step": 23860 - }, - { - "epoch": 0.6057875364894022, - "grad_norm": 0.4665926992893219, - "learning_rate": 1.5961416423403987e-05, - "loss": 0.0719, - "step": 23865 - }, - { - "epoch": 0.6059144561492575, - "grad_norm": 0.6819168925285339, - "learning_rate": 1.5960570292338286e-05, - "loss": 0.0762, - "step": 23870 - }, - { - "epoch": 0.6060413758091129, - "grad_norm": 0.5550897121429443, - "learning_rate": 1.5959724161272584e-05, - "loss": 0.0842, - "step": 23875 - }, - { - "epoch": 0.6061682954689681, - "grad_norm": 0.6129116415977478, - "learning_rate": 1.5958878030206883e-05, - "loss": 0.074, - "step": 23880 - }, - { - "epoch": 0.6062952151288235, - "grad_norm": 0.4563463032245636, - "learning_rate": 1.5958031899141178e-05, - "loss": 0.0773, - "step": 23885 - }, - { - "epoch": 0.6064221347886788, - "grad_norm": 0.5315578579902649, - "learning_rate": 1.5957185768075476e-05, - "loss": 0.0801, - "step": 23890 - }, - { - "epoch": 0.6065490544485341, - "grad_norm": 0.541745662689209, - "learning_rate": 1.5956339637009774e-05, - "loss": 0.093, - "step": 23895 - }, - { - "epoch": 0.6066759741083894, - "grad_norm": 0.37740516662597656, - "learning_rate": 1.5955493505944073e-05, - "loss": 0.0713, - "step": 23900 - }, - { - "epoch": 0.6068028937682447, - "grad_norm": 0.5872690677642822, - "learning_rate": 1.5954647374878368e-05, - "loss": 0.0954, - "step": 23905 - }, - { - "epoch": 0.6069298134281, - "grad_norm": 0.7279961705207825, - "learning_rate": 1.5953801243812666e-05, - "loss": 0.0667, - "step": 23910 - }, - { - "epoch": 0.6070567330879553, - "grad_norm": 0.632905900478363, - "learning_rate": 1.5952955112746965e-05, - "loss": 0.074, - "step": 23915 - }, - { - "epoch": 0.6071836527478106, - "grad_norm": 0.3888963460922241, - "learning_rate": 1.5952108981681263e-05, - "loss": 0.0842, - "step": 23920 - }, - { - "epoch": 0.607310572407666, - "grad_norm": 0.4582400321960449, - "learning_rate": 1.595126285061556e-05, - "loss": 0.0851, - "step": 23925 - }, - { - "epoch": 0.6074374920675213, - "grad_norm": 0.4225403368473053, - "learning_rate": 1.595041671954986e-05, - "loss": 0.081, - "step": 23930 - }, - { - "epoch": 0.6075644117273765, - "grad_norm": 0.4716184735298157, - "learning_rate": 1.5949570588484158e-05, - "loss": 0.0731, - "step": 23935 - }, - { - "epoch": 0.6076913313872319, - "grad_norm": 0.3883778154850006, - "learning_rate": 1.5948724457418457e-05, - "loss": 0.0757, - "step": 23940 - }, - { - "epoch": 0.6078182510470872, - "grad_norm": 0.7500013709068298, - "learning_rate": 1.5947878326352752e-05, - "loss": 0.0923, - "step": 23945 - }, - { - "epoch": 0.6079451707069425, - "grad_norm": 0.5881956219673157, - "learning_rate": 1.594703219528705e-05, - "loss": 0.0678, - "step": 23950 - }, - { - "epoch": 0.6080720903667978, - "grad_norm": 0.6003598570823669, - "learning_rate": 1.594618606422135e-05, - "loss": 0.0925, - "step": 23955 - }, - { - "epoch": 0.6081990100266531, - "grad_norm": 0.633449912071228, - "learning_rate": 1.5945339933155647e-05, - "loss": 0.1011, - "step": 23960 - }, - { - "epoch": 0.6083259296865084, - "grad_norm": 0.7102411389350891, - "learning_rate": 1.5944493802089945e-05, - "loss": 0.1077, - "step": 23965 - }, - { - "epoch": 0.6084528493463638, - "grad_norm": 0.638373613357544, - "learning_rate": 1.5943647671024244e-05, - "loss": 0.1217, - "step": 23970 - }, - { - "epoch": 0.608579769006219, - "grad_norm": 0.5783666968345642, - "learning_rate": 1.5942801539958542e-05, - "loss": 0.096, - "step": 23975 - }, - { - "epoch": 0.6087066886660744, - "grad_norm": 0.5881286859512329, - "learning_rate": 1.594195540889284e-05, - "loss": 0.0592, - "step": 23980 - }, - { - "epoch": 0.6088336083259297, - "grad_norm": 0.4907335042953491, - "learning_rate": 1.5941109277827136e-05, - "loss": 0.0925, - "step": 23985 - }, - { - "epoch": 0.608960527985785, - "grad_norm": 0.8788831233978271, - "learning_rate": 1.5940263146761434e-05, - "loss": 0.0755, - "step": 23990 - }, - { - "epoch": 0.6090874476456403, - "grad_norm": 0.5535174608230591, - "learning_rate": 1.5939417015695732e-05, - "loss": 0.0876, - "step": 23995 - }, - { - "epoch": 0.6092143673054956, - "grad_norm": 0.4087245464324951, - "learning_rate": 1.593857088463003e-05, - "loss": 0.0969, - "step": 24000 - }, - { - "epoch": 0.609341286965351, - "grad_norm": 0.6086934208869934, - "learning_rate": 1.593772475356433e-05, - "loss": 0.0859, - "step": 24005 - }, - { - "epoch": 0.6094682066252063, - "grad_norm": 0.669948935508728, - "learning_rate": 1.5936878622498628e-05, - "loss": 0.0736, - "step": 24010 - }, - { - "epoch": 0.6095951262850615, - "grad_norm": 0.6012802124023438, - "learning_rate": 1.5936032491432926e-05, - "loss": 0.0827, - "step": 24015 - }, - { - "epoch": 0.6097220459449169, - "grad_norm": 0.6244839429855347, - "learning_rate": 1.5935186360367224e-05, - "loss": 0.0893, - "step": 24020 - }, - { - "epoch": 0.6098489656047722, - "grad_norm": 0.5970250368118286, - "learning_rate": 1.593434022930152e-05, - "loss": 0.104, - "step": 24025 - }, - { - "epoch": 0.6099758852646275, - "grad_norm": 1.0761605501174927, - "learning_rate": 1.5933494098235818e-05, - "loss": 0.0841, - "step": 24030 - }, - { - "epoch": 0.6101028049244828, - "grad_norm": 0.46256086230278015, - "learning_rate": 1.5932647967170116e-05, - "loss": 0.1143, - "step": 24035 - }, - { - "epoch": 0.6102297245843381, - "grad_norm": 0.5272823572158813, - "learning_rate": 1.5931801836104415e-05, - "loss": 0.1057, - "step": 24040 - }, - { - "epoch": 0.6103566442441934, - "grad_norm": 0.5101450085639954, - "learning_rate": 1.593095570503871e-05, - "loss": 0.0896, - "step": 24045 - }, - { - "epoch": 0.6104835639040488, - "grad_norm": 0.42602410912513733, - "learning_rate": 1.5930109573973008e-05, - "loss": 0.1067, - "step": 24050 - }, - { - "epoch": 0.610610483563904, - "grad_norm": 0.3849015235900879, - "learning_rate": 1.5929263442907306e-05, - "loss": 0.0799, - "step": 24055 - }, - { - "epoch": 0.6107374032237594, - "grad_norm": 0.45254653692245483, - "learning_rate": 1.5928417311841605e-05, - "loss": 0.0771, - "step": 24060 - }, - { - "epoch": 0.6108643228836147, - "grad_norm": 0.53164142370224, - "learning_rate": 1.5927571180775903e-05, - "loss": 0.0647, - "step": 24065 - }, - { - "epoch": 0.61099124254347, - "grad_norm": 0.49431154131889343, - "learning_rate": 1.59267250497102e-05, - "loss": 0.095, - "step": 24070 - }, - { - "epoch": 0.6111181622033253, - "grad_norm": 0.9183122515678406, - "learning_rate": 1.59258789186445e-05, - "loss": 0.0797, - "step": 24075 - }, - { - "epoch": 0.6112450818631806, - "grad_norm": 0.5122854113578796, - "learning_rate": 1.59250327875788e-05, - "loss": 0.0819, - "step": 24080 - }, - { - "epoch": 0.611372001523036, - "grad_norm": 0.5101209878921509, - "learning_rate": 1.5924186656513093e-05, - "loss": 0.0811, - "step": 24085 - }, - { - "epoch": 0.6114989211828912, - "grad_norm": 0.5018807649612427, - "learning_rate": 1.5923340525447392e-05, - "loss": 0.0661, - "step": 24090 - }, - { - "epoch": 0.6116258408427465, - "grad_norm": 0.6142767667770386, - "learning_rate": 1.592249439438169e-05, - "loss": 0.0846, - "step": 24095 - }, - { - "epoch": 0.6117527605026019, - "grad_norm": 0.622661828994751, - "learning_rate": 1.592164826331599e-05, - "loss": 0.082, - "step": 24100 - }, - { - "epoch": 0.6118796801624572, - "grad_norm": 0.7366836667060852, - "learning_rate": 1.5920802132250287e-05, - "loss": 0.0791, - "step": 24105 - }, - { - "epoch": 0.6120065998223124, - "grad_norm": 0.40291300415992737, - "learning_rate": 1.5919956001184585e-05, - "loss": 0.089, - "step": 24110 - }, - { - "epoch": 0.6121335194821678, - "grad_norm": 0.48520028591156006, - "learning_rate": 1.5919109870118884e-05, - "loss": 0.1043, - "step": 24115 - }, - { - "epoch": 0.6122604391420231, - "grad_norm": 0.6662589907646179, - "learning_rate": 1.5918263739053182e-05, - "loss": 0.1031, - "step": 24120 - }, - { - "epoch": 0.6123873588018784, - "grad_norm": 0.8260883688926697, - "learning_rate": 1.5917417607987477e-05, - "loss": 0.073, - "step": 24125 - }, - { - "epoch": 0.6125142784617337, - "grad_norm": 0.5220597982406616, - "learning_rate": 1.5916571476921776e-05, - "loss": 0.0969, - "step": 24130 - }, - { - "epoch": 0.612641198121589, - "grad_norm": 0.4808913767337799, - "learning_rate": 1.5915725345856074e-05, - "loss": 0.0806, - "step": 24135 - }, - { - "epoch": 0.6127681177814444, - "grad_norm": 0.45611876249313354, - "learning_rate": 1.5914879214790372e-05, - "loss": 0.0689, - "step": 24140 - }, - { - "epoch": 0.6128950374412997, - "grad_norm": 0.6769543290138245, - "learning_rate": 1.591403308372467e-05, - "loss": 0.0789, - "step": 24145 - }, - { - "epoch": 0.6130219571011549, - "grad_norm": 0.7015541791915894, - "learning_rate": 1.591318695265897e-05, - "loss": 0.0822, - "step": 24150 - }, - { - "epoch": 0.6131488767610103, - "grad_norm": 0.7999812960624695, - "learning_rate": 1.5912340821593268e-05, - "loss": 0.0952, - "step": 24155 - }, - { - "epoch": 0.6132757964208656, - "grad_norm": 0.3981418013572693, - "learning_rate": 1.5911494690527566e-05, - "loss": 0.0805, - "step": 24160 - }, - { - "epoch": 0.6134027160807209, - "grad_norm": 0.8143892884254456, - "learning_rate": 1.591064855946186e-05, - "loss": 0.0836, - "step": 24165 - }, - { - "epoch": 0.6135296357405762, - "grad_norm": 0.6436524391174316, - "learning_rate": 1.590980242839616e-05, - "loss": 0.0822, - "step": 24170 - }, - { - "epoch": 0.6136565554004315, - "grad_norm": 0.5603113174438477, - "learning_rate": 1.5908956297330458e-05, - "loss": 0.0703, - "step": 24175 - }, - { - "epoch": 0.6137834750602869, - "grad_norm": 0.7269699573516846, - "learning_rate": 1.5908110166264756e-05, - "loss": 0.0826, - "step": 24180 - }, - { - "epoch": 0.6139103947201422, - "grad_norm": 0.3452830910682678, - "learning_rate": 1.590726403519905e-05, - "loss": 0.0797, - "step": 24185 - }, - { - "epoch": 0.6140373143799974, - "grad_norm": 0.41289547085762024, - "learning_rate": 1.590641790413335e-05, - "loss": 0.0672, - "step": 24190 - }, - { - "epoch": 0.6141642340398528, - "grad_norm": 0.5285106897354126, - "learning_rate": 1.5905571773067648e-05, - "loss": 0.0878, - "step": 24195 - }, - { - "epoch": 0.6142911536997081, - "grad_norm": 0.624372661113739, - "learning_rate": 1.5904725642001946e-05, - "loss": 0.0851, - "step": 24200 - }, - { - "epoch": 0.6144180733595634, - "grad_norm": 0.7406287789344788, - "learning_rate": 1.5903879510936245e-05, - "loss": 0.0943, - "step": 24205 - }, - { - "epoch": 0.6145449930194187, - "grad_norm": 0.46395134925842285, - "learning_rate": 1.5903033379870543e-05, - "loss": 0.0903, - "step": 24210 - }, - { - "epoch": 0.614671912679274, - "grad_norm": 0.43800562620162964, - "learning_rate": 1.590218724880484e-05, - "loss": 0.0996, - "step": 24215 - }, - { - "epoch": 0.6147988323391294, - "grad_norm": 0.5071261525154114, - "learning_rate": 1.590134111773914e-05, - "loss": 0.0974, - "step": 24220 - }, - { - "epoch": 0.6149257519989847, - "grad_norm": 0.8782238960266113, - "learning_rate": 1.5900494986673435e-05, - "loss": 0.0837, - "step": 24225 - }, - { - "epoch": 0.6150526716588399, - "grad_norm": 0.6726325154304504, - "learning_rate": 1.5899648855607734e-05, - "loss": 0.0762, - "step": 24230 - }, - { - "epoch": 0.6151795913186953, - "grad_norm": 0.5945216417312622, - "learning_rate": 1.5898802724542032e-05, - "loss": 0.0879, - "step": 24235 - }, - { - "epoch": 0.6153065109785506, - "grad_norm": 0.6471598148345947, - "learning_rate": 1.589795659347633e-05, - "loss": 0.0875, - "step": 24240 - }, - { - "epoch": 0.6154334306384059, - "grad_norm": 0.7237224578857422, - "learning_rate": 1.589711046241063e-05, - "loss": 0.0984, - "step": 24245 - }, - { - "epoch": 0.6155603502982612, - "grad_norm": 0.7347675561904907, - "learning_rate": 1.5896264331344927e-05, - "loss": 0.0816, - "step": 24250 - }, - { - "epoch": 0.6156872699581165, - "grad_norm": 0.5792115330696106, - "learning_rate": 1.5895418200279226e-05, - "loss": 0.0819, - "step": 24255 - }, - { - "epoch": 0.6158141896179719, - "grad_norm": 0.43627870082855225, - "learning_rate": 1.5894572069213524e-05, - "loss": 0.0742, - "step": 24260 - }, - { - "epoch": 0.6159411092778271, - "grad_norm": 0.532949686050415, - "learning_rate": 1.5893725938147822e-05, - "loss": 0.0622, - "step": 24265 - }, - { - "epoch": 0.6160680289376824, - "grad_norm": 0.4859938621520996, - "learning_rate": 1.5892879807082117e-05, - "loss": 0.0743, - "step": 24270 - }, - { - "epoch": 0.6161949485975378, - "grad_norm": 0.9110642075538635, - "learning_rate": 1.5892033676016416e-05, - "loss": 0.0886, - "step": 24275 - }, - { - "epoch": 0.6163218682573931, - "grad_norm": 1.4426074028015137, - "learning_rate": 1.5891187544950714e-05, - "loss": 0.088, - "step": 24280 - }, - { - "epoch": 0.6164487879172483, - "grad_norm": 0.5257732272148132, - "learning_rate": 1.5890341413885013e-05, - "loss": 0.0706, - "step": 24285 - }, - { - "epoch": 0.6165757075771037, - "grad_norm": 1.3795831203460693, - "learning_rate": 1.588949528281931e-05, - "loss": 0.0795, - "step": 24290 - }, - { - "epoch": 0.616702627236959, - "grad_norm": 0.5005890130996704, - "learning_rate": 1.588864915175361e-05, - "loss": 0.0851, - "step": 24295 - }, - { - "epoch": 0.6168295468968144, - "grad_norm": 0.7331749200820923, - "learning_rate": 1.5887803020687908e-05, - "loss": 0.077, - "step": 24300 - }, - { - "epoch": 0.6169564665566696, - "grad_norm": 0.9254713654518127, - "learning_rate": 1.5886956889622206e-05, - "loss": 0.0775, - "step": 24305 - }, - { - "epoch": 0.6170833862165249, - "grad_norm": 0.6209977269172668, - "learning_rate": 1.58861107585565e-05, - "loss": 0.0805, - "step": 24310 - }, - { - "epoch": 0.6172103058763803, - "grad_norm": 0.9463977217674255, - "learning_rate": 1.58852646274908e-05, - "loss": 0.0842, - "step": 24315 - }, - { - "epoch": 0.6173372255362356, - "grad_norm": 0.39627334475517273, - "learning_rate": 1.5884418496425098e-05, - "loss": 0.0824, - "step": 24320 - }, - { - "epoch": 0.6174641451960908, - "grad_norm": 0.5214909911155701, - "learning_rate": 1.5883572365359396e-05, - "loss": 0.0738, - "step": 24325 - }, - { - "epoch": 0.6175910648559462, - "grad_norm": 1.785869836807251, - "learning_rate": 1.588272623429369e-05, - "loss": 0.0885, - "step": 24330 - }, - { - "epoch": 0.6177179845158015, - "grad_norm": 0.47995489835739136, - "learning_rate": 1.588188010322799e-05, - "loss": 0.087, - "step": 24335 - }, - { - "epoch": 0.6178449041756569, - "grad_norm": 0.5746110677719116, - "learning_rate": 1.5881033972162288e-05, - "loss": 0.0846, - "step": 24340 - }, - { - "epoch": 0.6179718238355121, - "grad_norm": 0.6164671778678894, - "learning_rate": 1.5880187841096587e-05, - "loss": 0.0738, - "step": 24345 - }, - { - "epoch": 0.6180987434953674, - "grad_norm": 0.4316805899143219, - "learning_rate": 1.5879341710030885e-05, - "loss": 0.0628, - "step": 24350 - }, - { - "epoch": 0.6182256631552228, - "grad_norm": 0.8916974663734436, - "learning_rate": 1.5878495578965183e-05, - "loss": 0.0807, - "step": 24355 - }, - { - "epoch": 0.6183525828150781, - "grad_norm": 0.5242815017700195, - "learning_rate": 1.5877649447899482e-05, - "loss": 0.0778, - "step": 24360 - }, - { - "epoch": 0.6184795024749333, - "grad_norm": 0.5374359488487244, - "learning_rate": 1.587680331683378e-05, - "loss": 0.0896, - "step": 24365 - }, - { - "epoch": 0.6186064221347887, - "grad_norm": 0.7005773186683655, - "learning_rate": 1.5875957185768075e-05, - "loss": 0.0714, - "step": 24370 - }, - { - "epoch": 0.618733341794644, - "grad_norm": 0.7706083655357361, - "learning_rate": 1.5875111054702374e-05, - "loss": 0.0714, - "step": 24375 - }, - { - "epoch": 0.6188602614544993, - "grad_norm": 0.4338093101978302, - "learning_rate": 1.5874264923636672e-05, - "loss": 0.0788, - "step": 24380 - }, - { - "epoch": 0.6189871811143546, - "grad_norm": 0.4611413776874542, - "learning_rate": 1.587341879257097e-05, - "loss": 0.0799, - "step": 24385 - }, - { - "epoch": 0.6191141007742099, - "grad_norm": 0.5862218141555786, - "learning_rate": 1.587257266150527e-05, - "loss": 0.083, - "step": 24390 - }, - { - "epoch": 0.6192410204340653, - "grad_norm": 0.42613300681114197, - "learning_rate": 1.5871726530439567e-05, - "loss": 0.069, - "step": 24395 - }, - { - "epoch": 0.6193679400939206, - "grad_norm": 0.49834221601486206, - "learning_rate": 1.5870880399373866e-05, - "loss": 0.0949, - "step": 24400 - }, - { - "epoch": 0.6194948597537758, - "grad_norm": 0.6678621768951416, - "learning_rate": 1.5870034268308164e-05, - "loss": 0.095, - "step": 24405 - }, - { - "epoch": 0.6196217794136312, - "grad_norm": 0.40410828590393066, - "learning_rate": 1.586918813724246e-05, - "loss": 0.09, - "step": 24410 - }, - { - "epoch": 0.6197486990734865, - "grad_norm": 0.7039076685905457, - "learning_rate": 1.5868342006176757e-05, - "loss": 0.0841, - "step": 24415 - }, - { - "epoch": 0.6198756187333418, - "grad_norm": 0.5756010413169861, - "learning_rate": 1.5867495875111056e-05, - "loss": 0.0667, - "step": 24420 - }, - { - "epoch": 0.6200025383931971, - "grad_norm": 0.6351935863494873, - "learning_rate": 1.5866649744045354e-05, - "loss": 0.084, - "step": 24425 - }, - { - "epoch": 0.6201294580530524, - "grad_norm": 0.429601788520813, - "learning_rate": 1.5865803612979653e-05, - "loss": 0.0756, - "step": 24430 - }, - { - "epoch": 0.6202563777129078, - "grad_norm": 0.42356738448143005, - "learning_rate": 1.586495748191395e-05, - "loss": 0.0838, - "step": 24435 - }, - { - "epoch": 0.620383297372763, - "grad_norm": 0.5472513437271118, - "learning_rate": 1.586411135084825e-05, - "loss": 0.1045, - "step": 24440 - }, - { - "epoch": 0.6205102170326183, - "grad_norm": 0.42921897768974304, - "learning_rate": 1.5863265219782548e-05, - "loss": 0.0731, - "step": 24445 - }, - { - "epoch": 0.6206371366924737, - "grad_norm": 1.076877236366272, - "learning_rate": 1.5862419088716843e-05, - "loss": 0.0736, - "step": 24450 - }, - { - "epoch": 0.620764056352329, - "grad_norm": 0.411905437707901, - "learning_rate": 1.586157295765114e-05, - "loss": 0.0876, - "step": 24455 - }, - { - "epoch": 0.6208909760121842, - "grad_norm": 0.870644211769104, - "learning_rate": 1.586072682658544e-05, - "loss": 0.0697, - "step": 24460 - }, - { - "epoch": 0.6210178956720396, - "grad_norm": 0.5089964270591736, - "learning_rate": 1.5859880695519738e-05, - "loss": 0.0934, - "step": 24465 - }, - { - "epoch": 0.6211448153318949, - "grad_norm": 0.35810407996177673, - "learning_rate": 1.5859034564454033e-05, - "loss": 0.0677, - "step": 24470 - }, - { - "epoch": 0.6212717349917503, - "grad_norm": 0.6504899263381958, - "learning_rate": 1.585818843338833e-05, - "loss": 0.0929, - "step": 24475 - }, - { - "epoch": 0.6213986546516055, - "grad_norm": 0.491267591714859, - "learning_rate": 1.585734230232263e-05, - "loss": 0.0637, - "step": 24480 - }, - { - "epoch": 0.6215255743114608, - "grad_norm": 0.3099006712436676, - "learning_rate": 1.5856496171256928e-05, - "loss": 0.0693, - "step": 24485 - }, - { - "epoch": 0.6216524939713162, - "grad_norm": 0.5096613764762878, - "learning_rate": 1.5855650040191227e-05, - "loss": 0.1013, - "step": 24490 - }, - { - "epoch": 0.6217794136311715, - "grad_norm": 1.10066819190979, - "learning_rate": 1.5854803909125525e-05, - "loss": 0.0779, - "step": 24495 - }, - { - "epoch": 0.6219063332910267, - "grad_norm": 0.49033620953559875, - "learning_rate": 1.5853957778059824e-05, - "loss": 0.0832, - "step": 24500 - }, - { - "epoch": 0.6220332529508821, - "grad_norm": 0.5653120875358582, - "learning_rate": 1.5853111646994122e-05, - "loss": 0.0682, - "step": 24505 - }, - { - "epoch": 0.6221601726107374, - "grad_norm": 0.6363314986228943, - "learning_rate": 1.5852265515928417e-05, - "loss": 0.0967, - "step": 24510 - }, - { - "epoch": 0.6222870922705928, - "grad_norm": 0.5178027153015137, - "learning_rate": 1.5851419384862715e-05, - "loss": 0.0954, - "step": 24515 - }, - { - "epoch": 0.622414011930448, - "grad_norm": 0.4617108702659607, - "learning_rate": 1.5850573253797014e-05, - "loss": 0.0838, - "step": 24520 - }, - { - "epoch": 0.6225409315903033, - "grad_norm": 1.108429193496704, - "learning_rate": 1.5849727122731312e-05, - "loss": 0.1126, - "step": 24525 - }, - { - "epoch": 0.6226678512501587, - "grad_norm": 0.5080247521400452, - "learning_rate": 1.584888099166561e-05, - "loss": 0.0867, - "step": 24530 - }, - { - "epoch": 0.622794770910014, - "grad_norm": 0.4822142720222473, - "learning_rate": 1.584803486059991e-05, - "loss": 0.077, - "step": 24535 - }, - { - "epoch": 0.6229216905698692, - "grad_norm": 0.5217092633247375, - "learning_rate": 1.5847188729534207e-05, - "loss": 0.0875, - "step": 24540 - }, - { - "epoch": 0.6230486102297246, - "grad_norm": 0.5316239595413208, - "learning_rate": 1.5846342598468506e-05, - "loss": 0.0846, - "step": 24545 - }, - { - "epoch": 0.6231755298895799, - "grad_norm": 0.4411020874977112, - "learning_rate": 1.58454964674028e-05, - "loss": 0.0886, - "step": 24550 - }, - { - "epoch": 0.6233024495494353, - "grad_norm": 0.5857901573181152, - "learning_rate": 1.58446503363371e-05, - "loss": 0.1001, - "step": 24555 - }, - { - "epoch": 0.6234293692092905, - "grad_norm": 0.8708264827728271, - "learning_rate": 1.5843804205271398e-05, - "loss": 0.0654, - "step": 24560 - }, - { - "epoch": 0.6235562888691458, - "grad_norm": 0.6041045784950256, - "learning_rate": 1.5842958074205696e-05, - "loss": 0.0718, - "step": 24565 - }, - { - "epoch": 0.6236832085290012, - "grad_norm": 0.49768301844596863, - "learning_rate": 1.5842111943139994e-05, - "loss": 0.0648, - "step": 24570 - }, - { - "epoch": 0.6238101281888565, - "grad_norm": 0.7778043150901794, - "learning_rate": 1.5841265812074293e-05, - "loss": 0.0774, - "step": 24575 - }, - { - "epoch": 0.6239370478487117, - "grad_norm": 0.5501663088798523, - "learning_rate": 1.584041968100859e-05, - "loss": 0.0769, - "step": 24580 - }, - { - "epoch": 0.6240639675085671, - "grad_norm": 0.6336368322372437, - "learning_rate": 1.583957354994289e-05, - "loss": 0.0828, - "step": 24585 - }, - { - "epoch": 0.6241908871684224, - "grad_norm": 0.5474095940589905, - "learning_rate": 1.5838727418877185e-05, - "loss": 0.0778, - "step": 24590 - }, - { - "epoch": 0.6243178068282778, - "grad_norm": 0.35438036918640137, - "learning_rate": 1.5837881287811483e-05, - "loss": 0.0571, - "step": 24595 - }, - { - "epoch": 0.624444726488133, - "grad_norm": 0.6588054895401001, - "learning_rate": 1.583703515674578e-05, - "loss": 0.0922, - "step": 24600 - }, - { - "epoch": 0.6245716461479883, - "grad_norm": 0.4324241876602173, - "learning_rate": 1.583618902568008e-05, - "loss": 0.0841, - "step": 24605 - }, - { - "epoch": 0.6246985658078437, - "grad_norm": 0.542403519153595, - "learning_rate": 1.5835342894614375e-05, - "loss": 0.0804, - "step": 24610 - }, - { - "epoch": 0.6248254854676989, - "grad_norm": 0.34521570801734924, - "learning_rate": 1.5834496763548673e-05, - "loss": 0.0572, - "step": 24615 - }, - { - "epoch": 0.6249524051275542, - "grad_norm": 1.1576576232910156, - "learning_rate": 1.583365063248297e-05, - "loss": 0.0919, - "step": 24620 - }, - { - "epoch": 0.6250793247874096, - "grad_norm": 0.640070378780365, - "learning_rate": 1.583280450141727e-05, - "loss": 0.0801, - "step": 24625 - }, - { - "epoch": 0.6252062444472649, - "grad_norm": 0.9572058916091919, - "learning_rate": 1.583195837035157e-05, - "loss": 0.0683, - "step": 24630 - }, - { - "epoch": 0.6253331641071201, - "grad_norm": 0.8202519416809082, - "learning_rate": 1.5831112239285867e-05, - "loss": 0.0632, - "step": 24635 - }, - { - "epoch": 0.6254600837669755, - "grad_norm": 0.6814483404159546, - "learning_rate": 1.5830266108220165e-05, - "loss": 0.0958, - "step": 24640 - }, - { - "epoch": 0.6255870034268308, - "grad_norm": 0.7525166273117065, - "learning_rate": 1.5829419977154464e-05, - "loss": 0.1061, - "step": 24645 - }, - { - "epoch": 0.6257139230866862, - "grad_norm": 0.4943377375602722, - "learning_rate": 1.582857384608876e-05, - "loss": 0.1051, - "step": 24650 - }, - { - "epoch": 0.6258408427465414, - "grad_norm": 0.44829824566841125, - "learning_rate": 1.5827727715023057e-05, - "loss": 0.0895, - "step": 24655 - }, - { - "epoch": 0.6259677624063967, - "grad_norm": 0.48225080966949463, - "learning_rate": 1.5826881583957355e-05, - "loss": 0.0751, - "step": 24660 - }, - { - "epoch": 0.6260946820662521, - "grad_norm": 1.1525591611862183, - "learning_rate": 1.5826035452891654e-05, - "loss": 0.091, - "step": 24665 - }, - { - "epoch": 0.6262216017261074, - "grad_norm": 0.5572652220726013, - "learning_rate": 1.5825189321825952e-05, - "loss": 0.0737, - "step": 24670 - }, - { - "epoch": 0.6263485213859626, - "grad_norm": 0.37252986431121826, - "learning_rate": 1.582434319076025e-05, - "loss": 0.0758, - "step": 24675 - }, - { - "epoch": 0.626475441045818, - "grad_norm": 0.5802052617073059, - "learning_rate": 1.582349705969455e-05, - "loss": 0.0875, - "step": 24680 - }, - { - "epoch": 0.6266023607056733, - "grad_norm": 0.5310128927230835, - "learning_rate": 1.5822650928628847e-05, - "loss": 0.0815, - "step": 24685 - }, - { - "epoch": 0.6267292803655287, - "grad_norm": 0.568204939365387, - "learning_rate": 1.5821804797563142e-05, - "loss": 0.0915, - "step": 24690 - }, - { - "epoch": 0.6268562000253839, - "grad_norm": 0.6114065051078796, - "learning_rate": 1.582095866649744e-05, - "loss": 0.0902, - "step": 24695 - }, - { - "epoch": 0.6269831196852392, - "grad_norm": 0.8378125429153442, - "learning_rate": 1.582011253543174e-05, - "loss": 0.1054, - "step": 24700 - }, - { - "epoch": 0.6271100393450946, - "grad_norm": 0.6611642241477966, - "learning_rate": 1.5819266404366038e-05, - "loss": 0.0807, - "step": 24705 - }, - { - "epoch": 0.6272369590049499, - "grad_norm": 0.29750922322273254, - "learning_rate": 1.5818420273300336e-05, - "loss": 0.0737, - "step": 24710 - }, - { - "epoch": 0.6273638786648051, - "grad_norm": 0.5942132472991943, - "learning_rate": 1.5817574142234634e-05, - "loss": 0.0787, - "step": 24715 - }, - { - "epoch": 0.6274907983246605, - "grad_norm": 0.4327792227268219, - "learning_rate": 1.5816728011168933e-05, - "loss": 0.0798, - "step": 24720 - }, - { - "epoch": 0.6276177179845158, - "grad_norm": 0.5384342074394226, - "learning_rate": 1.581588188010323e-05, - "loss": 0.0895, - "step": 24725 - }, - { - "epoch": 0.6277446376443712, - "grad_norm": 0.6055826544761658, - "learning_rate": 1.5815035749037526e-05, - "loss": 0.0734, - "step": 24730 - }, - { - "epoch": 0.6278715573042264, - "grad_norm": 0.5289015769958496, - "learning_rate": 1.5814189617971825e-05, - "loss": 0.0967, - "step": 24735 - }, - { - "epoch": 0.6279984769640817, - "grad_norm": 0.47920817136764526, - "learning_rate": 1.5813343486906123e-05, - "loss": 0.0724, - "step": 24740 - }, - { - "epoch": 0.6281253966239371, - "grad_norm": 0.44396841526031494, - "learning_rate": 1.581249735584042e-05, - "loss": 0.0824, - "step": 24745 - }, - { - "epoch": 0.6282523162837924, - "grad_norm": 0.6049718260765076, - "learning_rate": 1.5811651224774716e-05, - "loss": 0.0756, - "step": 24750 - }, - { - "epoch": 0.6283792359436476, - "grad_norm": 0.7941556572914124, - "learning_rate": 1.5810805093709015e-05, - "loss": 0.08, - "step": 24755 - }, - { - "epoch": 0.628506155603503, - "grad_norm": 0.2887498140335083, - "learning_rate": 1.5809958962643313e-05, - "loss": 0.0811, - "step": 24760 - }, - { - "epoch": 0.6286330752633583, - "grad_norm": 0.6991428732872009, - "learning_rate": 1.5809112831577612e-05, - "loss": 0.0993, - "step": 24765 - }, - { - "epoch": 0.6287599949232137, - "grad_norm": 0.5032515525817871, - "learning_rate": 1.580826670051191e-05, - "loss": 0.1002, - "step": 24770 - }, - { - "epoch": 0.6288869145830689, - "grad_norm": 0.6173897385597229, - "learning_rate": 1.580742056944621e-05, - "loss": 0.1008, - "step": 24775 - }, - { - "epoch": 0.6290138342429242, - "grad_norm": 0.5455065369606018, - "learning_rate": 1.5806574438380507e-05, - "loss": 0.0874, - "step": 24780 - }, - { - "epoch": 0.6291407539027796, - "grad_norm": 0.6983262300491333, - "learning_rate": 1.5805728307314805e-05, - "loss": 0.0645, - "step": 24785 - }, - { - "epoch": 0.6292676735626348, - "grad_norm": 0.5968006253242493, - "learning_rate": 1.5804882176249104e-05, - "loss": 0.0894, - "step": 24790 - }, - { - "epoch": 0.6293945932224901, - "grad_norm": 0.8259603381156921, - "learning_rate": 1.58040360451834e-05, - "loss": 0.0655, - "step": 24795 - }, - { - "epoch": 0.6295215128823455, - "grad_norm": 0.5077802538871765, - "learning_rate": 1.5803189914117697e-05, - "loss": 0.0876, - "step": 24800 - }, - { - "epoch": 0.6296484325422008, - "grad_norm": 0.5532286763191223, - "learning_rate": 1.5802343783051996e-05, - "loss": 0.067, - "step": 24805 - }, - { - "epoch": 0.629775352202056, - "grad_norm": 0.550063967704773, - "learning_rate": 1.5801497651986294e-05, - "loss": 0.0826, - "step": 24810 - }, - { - "epoch": 0.6299022718619114, - "grad_norm": 0.9969014525413513, - "learning_rate": 1.5800651520920592e-05, - "loss": 0.0606, - "step": 24815 - }, - { - "epoch": 0.6300291915217667, - "grad_norm": 0.4082219898700714, - "learning_rate": 1.579980538985489e-05, - "loss": 0.0714, - "step": 24820 - }, - { - "epoch": 0.6301561111816221, - "grad_norm": 0.832500696182251, - "learning_rate": 1.579895925878919e-05, - "loss": 0.0867, - "step": 24825 - }, - { - "epoch": 0.6302830308414773, - "grad_norm": 0.790317177772522, - "learning_rate": 1.5798113127723488e-05, - "loss": 0.1014, - "step": 24830 - }, - { - "epoch": 0.6304099505013326, - "grad_norm": 0.5301023721694946, - "learning_rate": 1.5797266996657783e-05, - "loss": 0.0961, - "step": 24835 - }, - { - "epoch": 0.630536870161188, - "grad_norm": 0.6267194747924805, - "learning_rate": 1.579642086559208e-05, - "loss": 0.0961, - "step": 24840 - }, - { - "epoch": 0.6306637898210433, - "grad_norm": 1.0455292463302612, - "learning_rate": 1.579557473452638e-05, - "loss": 0.0913, - "step": 24845 - }, - { - "epoch": 0.6307907094808985, - "grad_norm": 0.5183723568916321, - "learning_rate": 1.5794728603460678e-05, - "loss": 0.0638, - "step": 24850 - }, - { - "epoch": 0.6309176291407539, - "grad_norm": 0.4003438353538513, - "learning_rate": 1.5793882472394976e-05, - "loss": 0.0769, - "step": 24855 - }, - { - "epoch": 0.6310445488006092, - "grad_norm": 0.5873885750770569, - "learning_rate": 1.5793036341329275e-05, - "loss": 0.0673, - "step": 24860 - }, - { - "epoch": 0.6311714684604646, - "grad_norm": 0.5806111693382263, - "learning_rate": 1.5792190210263573e-05, - "loss": 0.0799, - "step": 24865 - }, - { - "epoch": 0.6312983881203198, - "grad_norm": 0.453599750995636, - "learning_rate": 1.579134407919787e-05, - "loss": 0.0699, - "step": 24870 - }, - { - "epoch": 0.6314253077801751, - "grad_norm": 0.4540586471557617, - "learning_rate": 1.5790497948132166e-05, - "loss": 0.0838, - "step": 24875 - }, - { - "epoch": 0.6315522274400305, - "grad_norm": 0.6527369618415833, - "learning_rate": 1.5789651817066465e-05, - "loss": 0.0678, - "step": 24880 - }, - { - "epoch": 0.6316791470998858, - "grad_norm": 0.6662213802337646, - "learning_rate": 1.5788805686000763e-05, - "loss": 0.0779, - "step": 24885 - }, - { - "epoch": 0.631806066759741, - "grad_norm": 0.5039227604866028, - "learning_rate": 1.578795955493506e-05, - "loss": 0.0857, - "step": 24890 - }, - { - "epoch": 0.6319329864195964, - "grad_norm": 0.6238901615142822, - "learning_rate": 1.5787113423869357e-05, - "loss": 0.0958, - "step": 24895 - }, - { - "epoch": 0.6320599060794517, - "grad_norm": 1.1615737676620483, - "learning_rate": 1.5786267292803655e-05, - "loss": 0.1116, - "step": 24900 - }, - { - "epoch": 0.6321868257393071, - "grad_norm": 1.2597403526306152, - "learning_rate": 1.5785421161737953e-05, - "loss": 0.0855, - "step": 24905 - }, - { - "epoch": 0.6323137453991623, - "grad_norm": 0.9595597982406616, - "learning_rate": 1.5784575030672252e-05, - "loss": 0.0922, - "step": 24910 - }, - { - "epoch": 0.6324406650590176, - "grad_norm": 0.6671690940856934, - "learning_rate": 1.578372889960655e-05, - "loss": 0.0688, - "step": 24915 - }, - { - "epoch": 0.632567584718873, - "grad_norm": 0.6453531980514526, - "learning_rate": 1.578288276854085e-05, - "loss": 0.0853, - "step": 24920 - }, - { - "epoch": 0.6326945043787283, - "grad_norm": 0.457960307598114, - "learning_rate": 1.5782036637475147e-05, - "loss": 0.083, - "step": 24925 - }, - { - "epoch": 0.6328214240385835, - "grad_norm": 0.6794431805610657, - "learning_rate": 1.5781190506409445e-05, - "loss": 0.0968, - "step": 24930 - }, - { - "epoch": 0.6329483436984389, - "grad_norm": 0.37798288464546204, - "learning_rate": 1.578034437534374e-05, - "loss": 0.0744, - "step": 24935 - }, - { - "epoch": 0.6330752633582942, - "grad_norm": 0.49191829562187195, - "learning_rate": 1.577949824427804e-05, - "loss": 0.1019, - "step": 24940 - }, - { - "epoch": 0.6332021830181495, - "grad_norm": 0.4922636151313782, - "learning_rate": 1.5778652113212337e-05, - "loss": 0.0659, - "step": 24945 - }, - { - "epoch": 0.6333291026780048, - "grad_norm": 0.40640997886657715, - "learning_rate": 1.5777805982146636e-05, - "loss": 0.0861, - "step": 24950 - }, - { - "epoch": 0.6334560223378601, - "grad_norm": 0.4276249408721924, - "learning_rate": 1.5776959851080934e-05, - "loss": 0.0894, - "step": 24955 - }, - { - "epoch": 0.6335829419977155, - "grad_norm": 0.6054560542106628, - "learning_rate": 1.5776113720015232e-05, - "loss": 0.0986, - "step": 24960 - }, - { - "epoch": 0.6337098616575707, - "grad_norm": 0.6265735030174255, - "learning_rate": 1.577526758894953e-05, - "loss": 0.0902, - "step": 24965 - }, - { - "epoch": 0.633836781317426, - "grad_norm": 0.4479024112224579, - "learning_rate": 1.577442145788383e-05, - "loss": 0.1036, - "step": 24970 - }, - { - "epoch": 0.6339637009772814, - "grad_norm": 1.0107135772705078, - "learning_rate": 1.5773575326818124e-05, - "loss": 0.0883, - "step": 24975 - }, - { - "epoch": 0.6340906206371367, - "grad_norm": 0.5163013935089111, - "learning_rate": 1.5772729195752423e-05, - "loss": 0.0945, - "step": 24980 - }, - { - "epoch": 0.634217540296992, - "grad_norm": 0.5766080617904663, - "learning_rate": 1.577188306468672e-05, - "loss": 0.0872, - "step": 24985 - }, - { - "epoch": 0.6343444599568473, - "grad_norm": 0.49331027269363403, - "learning_rate": 1.577103693362102e-05, - "loss": 0.0777, - "step": 24990 - }, - { - "epoch": 0.6344713796167026, - "grad_norm": 0.43355441093444824, - "learning_rate": 1.5770190802555318e-05, - "loss": 0.0771, - "step": 24995 - }, - { - "epoch": 0.634598299276558, - "grad_norm": 0.4749916195869446, - "learning_rate": 1.5769344671489616e-05, - "loss": 0.1076, - "step": 25000 - }, - { - "epoch": 0.6347252189364132, - "grad_norm": 0.46755969524383545, - "learning_rate": 1.5768498540423915e-05, - "loss": 0.0804, - "step": 25005 - }, - { - "epoch": 0.6348521385962685, - "grad_norm": 0.5187056660652161, - "learning_rate": 1.5767652409358213e-05, - "loss": 0.0704, - "step": 25010 - }, - { - "epoch": 0.6349790582561239, - "grad_norm": 0.3963109254837036, - "learning_rate": 1.5766806278292508e-05, - "loss": 0.0721, - "step": 25015 - }, - { - "epoch": 0.6351059779159792, - "grad_norm": 0.7593163847923279, - "learning_rate": 1.5765960147226806e-05, - "loss": 0.1026, - "step": 25020 - }, - { - "epoch": 0.6352328975758345, - "grad_norm": 0.40160071849823, - "learning_rate": 1.5765114016161105e-05, - "loss": 0.0684, - "step": 25025 - }, - { - "epoch": 0.6353598172356898, - "grad_norm": 0.8341411352157593, - "learning_rate": 1.5764267885095403e-05, - "loss": 0.1122, - "step": 25030 - }, - { - "epoch": 0.6354867368955451, - "grad_norm": 0.44370952248573303, - "learning_rate": 1.57634217540297e-05, - "loss": 0.0718, - "step": 25035 - }, - { - "epoch": 0.6356136565554005, - "grad_norm": 0.3952770233154297, - "learning_rate": 1.5762575622963997e-05, - "loss": 0.0736, - "step": 25040 - }, - { - "epoch": 0.6357405762152557, - "grad_norm": 0.9716493487358093, - "learning_rate": 1.5761729491898295e-05, - "loss": 0.0875, - "step": 25045 - }, - { - "epoch": 0.635867495875111, - "grad_norm": 0.5776504874229431, - "learning_rate": 1.5760883360832594e-05, - "loss": 0.0807, - "step": 25050 - }, - { - "epoch": 0.6359944155349664, - "grad_norm": 0.7000341415405273, - "learning_rate": 1.5760037229766892e-05, - "loss": 0.0754, - "step": 25055 - }, - { - "epoch": 0.6361213351948217, - "grad_norm": 0.7491811513900757, - "learning_rate": 1.575919109870119e-05, - "loss": 0.0694, - "step": 25060 - }, - { - "epoch": 0.636248254854677, - "grad_norm": 0.8230956792831421, - "learning_rate": 1.575834496763549e-05, - "loss": 0.0734, - "step": 25065 - }, - { - "epoch": 0.6363751745145323, - "grad_norm": 0.4198894500732422, - "learning_rate": 1.5757498836569787e-05, - "loss": 0.0513, - "step": 25070 - }, - { - "epoch": 0.6365020941743876, - "grad_norm": 0.5113964080810547, - "learning_rate": 1.5756652705504082e-05, - "loss": 0.0943, - "step": 25075 - }, - { - "epoch": 0.636629013834243, - "grad_norm": 0.5407677888870239, - "learning_rate": 1.575580657443838e-05, - "loss": 0.0719, - "step": 25080 - }, - { - "epoch": 0.6367559334940982, - "grad_norm": 0.5791190266609192, - "learning_rate": 1.575496044337268e-05, - "loss": 0.0886, - "step": 25085 - }, - { - "epoch": 0.6368828531539535, - "grad_norm": 0.9357761144638062, - "learning_rate": 1.5754114312306977e-05, - "loss": 0.1058, - "step": 25090 - }, - { - "epoch": 0.6370097728138089, - "grad_norm": 1.6785907745361328, - "learning_rate": 1.5753268181241276e-05, - "loss": 0.0785, - "step": 25095 - }, - { - "epoch": 0.6371366924736642, - "grad_norm": 0.48527228832244873, - "learning_rate": 1.5752422050175574e-05, - "loss": 0.0641, - "step": 25100 - }, - { - "epoch": 0.6372636121335195, - "grad_norm": 0.5825497508049011, - "learning_rate": 1.5751575919109873e-05, - "loss": 0.0895, - "step": 25105 - }, - { - "epoch": 0.6373905317933748, - "grad_norm": 0.5130526423454285, - "learning_rate": 1.575072978804417e-05, - "loss": 0.0985, - "step": 25110 - }, - { - "epoch": 0.6375174514532301, - "grad_norm": 0.4674387276172638, - "learning_rate": 1.5749883656978466e-05, - "loss": 0.0879, - "step": 25115 - }, - { - "epoch": 0.6376443711130854, - "grad_norm": 0.7437220215797424, - "learning_rate": 1.5749037525912764e-05, - "loss": 0.0917, - "step": 25120 - }, - { - "epoch": 0.6377712907729407, - "grad_norm": 0.4298150837421417, - "learning_rate": 1.5748191394847063e-05, - "loss": 0.076, - "step": 25125 - }, - { - "epoch": 0.637898210432796, - "grad_norm": 0.5214298963546753, - "learning_rate": 1.574734526378136e-05, - "loss": 0.0883, - "step": 25130 - }, - { - "epoch": 0.6380251300926514, - "grad_norm": 0.8442317247390747, - "learning_rate": 1.574649913271566e-05, - "loss": 0.0814, - "step": 25135 - }, - { - "epoch": 0.6381520497525066, - "grad_norm": 0.37801408767700195, - "learning_rate": 1.5745653001649958e-05, - "loss": 0.0811, - "step": 25140 - }, - { - "epoch": 0.638278969412362, - "grad_norm": 0.6077278852462769, - "learning_rate": 1.5744806870584256e-05, - "loss": 0.0846, - "step": 25145 - }, - { - "epoch": 0.6384058890722173, - "grad_norm": 0.5063503384590149, - "learning_rate": 1.5743960739518555e-05, - "loss": 0.0692, - "step": 25150 - }, - { - "epoch": 0.6385328087320726, - "grad_norm": 0.6449683308601379, - "learning_rate": 1.574311460845285e-05, - "loss": 0.0853, - "step": 25155 - }, - { - "epoch": 0.6386597283919279, - "grad_norm": 0.5882150530815125, - "learning_rate": 1.5742268477387148e-05, - "loss": 0.0899, - "step": 25160 - }, - { - "epoch": 0.6387866480517832, - "grad_norm": 0.5673905611038208, - "learning_rate": 1.5741422346321447e-05, - "loss": 0.0864, - "step": 25165 - }, - { - "epoch": 0.6389135677116385, - "grad_norm": 0.6435888409614563, - "learning_rate": 1.5740576215255745e-05, - "loss": 0.0815, - "step": 25170 - }, - { - "epoch": 0.6390404873714939, - "grad_norm": 1.178375482559204, - "learning_rate": 1.573973008419004e-05, - "loss": 0.0904, - "step": 25175 - }, - { - "epoch": 0.6391674070313491, - "grad_norm": 0.41983655095100403, - "learning_rate": 1.573888395312434e-05, - "loss": 0.0839, - "step": 25180 - }, - { - "epoch": 0.6392943266912045, - "grad_norm": 0.47975555062294006, - "learning_rate": 1.5738037822058637e-05, - "loss": 0.0793, - "step": 25185 - }, - { - "epoch": 0.6394212463510598, - "grad_norm": 0.5810163617134094, - "learning_rate": 1.5737191690992935e-05, - "loss": 0.077, - "step": 25190 - }, - { - "epoch": 0.6395481660109151, - "grad_norm": 0.5368776917457581, - "learning_rate": 1.5736345559927234e-05, - "loss": 0.0938, - "step": 25195 - }, - { - "epoch": 0.6396750856707704, - "grad_norm": 0.406087726354599, - "learning_rate": 1.5735499428861532e-05, - "loss": 0.0973, - "step": 25200 - }, - { - "epoch": 0.6398020053306257, - "grad_norm": 0.6757369637489319, - "learning_rate": 1.573465329779583e-05, - "loss": 0.0762, - "step": 25205 - }, - { - "epoch": 0.639928924990481, - "grad_norm": 0.8620173335075378, - "learning_rate": 1.573380716673013e-05, - "loss": 0.0977, - "step": 25210 - }, - { - "epoch": 0.6400558446503364, - "grad_norm": 0.5144402980804443, - "learning_rate": 1.5732961035664424e-05, - "loss": 0.0789, - "step": 25215 - }, - { - "epoch": 0.6401827643101916, - "grad_norm": 0.43198537826538086, - "learning_rate": 1.5732114904598722e-05, - "loss": 0.0819, - "step": 25220 - }, - { - "epoch": 0.640309683970047, - "grad_norm": 0.6034700870513916, - "learning_rate": 1.573126877353302e-05, - "loss": 0.0833, - "step": 25225 - }, - { - "epoch": 0.6404366036299023, - "grad_norm": 0.5658402442932129, - "learning_rate": 1.573042264246732e-05, - "loss": 0.0626, - "step": 25230 - }, - { - "epoch": 0.6405635232897576, - "grad_norm": 0.6033934950828552, - "learning_rate": 1.5729576511401617e-05, - "loss": 0.0934, - "step": 25235 - }, - { - "epoch": 0.6406904429496129, - "grad_norm": 0.41744908690452576, - "learning_rate": 1.5728730380335916e-05, - "loss": 0.0638, - "step": 25240 - }, - { - "epoch": 0.6408173626094682, - "grad_norm": 0.4544709324836731, - "learning_rate": 1.5727884249270214e-05, - "loss": 0.0675, - "step": 25245 - }, - { - "epoch": 0.6409442822693235, - "grad_norm": 0.5676567554473877, - "learning_rate": 1.5727038118204513e-05, - "loss": 0.0823, - "step": 25250 - }, - { - "epoch": 0.6410712019291789, - "grad_norm": 0.6486465930938721, - "learning_rate": 1.5726191987138808e-05, - "loss": 0.0811, - "step": 25255 - }, - { - "epoch": 0.6411981215890341, - "grad_norm": 0.692294716835022, - "learning_rate": 1.5725345856073106e-05, - "loss": 0.0853, - "step": 25260 - }, - { - "epoch": 0.6413250412488894, - "grad_norm": 0.6959081292152405, - "learning_rate": 1.5724499725007404e-05, - "loss": 0.0835, - "step": 25265 - }, - { - "epoch": 0.6414519609087448, - "grad_norm": 0.7971503734588623, - "learning_rate": 1.5723653593941703e-05, - "loss": 0.0859, - "step": 25270 - }, - { - "epoch": 0.6415788805686001, - "grad_norm": 0.5245769023895264, - "learning_rate": 1.5722807462876e-05, - "loss": 0.0884, - "step": 25275 - }, - { - "epoch": 0.6417058002284554, - "grad_norm": 0.615537703037262, - "learning_rate": 1.57219613318103e-05, - "loss": 0.0744, - "step": 25280 - }, - { - "epoch": 0.6418327198883107, - "grad_norm": 0.4356411099433899, - "learning_rate": 1.5721115200744598e-05, - "loss": 0.0684, - "step": 25285 - }, - { - "epoch": 0.641959639548166, - "grad_norm": 0.6902073621749878, - "learning_rate": 1.5720269069678896e-05, - "loss": 0.0719, - "step": 25290 - }, - { - "epoch": 0.6420865592080213, - "grad_norm": 0.5288101434707642, - "learning_rate": 1.5719422938613195e-05, - "loss": 0.0968, - "step": 25295 - }, - { - "epoch": 0.6422134788678766, - "grad_norm": 0.5840097665786743, - "learning_rate": 1.571857680754749e-05, - "loss": 0.0854, - "step": 25300 - }, - { - "epoch": 0.642340398527732, - "grad_norm": 0.26768070459365845, - "learning_rate": 1.5717730676481788e-05, - "loss": 0.0716, - "step": 25305 - }, - { - "epoch": 0.6424673181875873, - "grad_norm": 0.6060917377471924, - "learning_rate": 1.5716884545416087e-05, - "loss": 0.1021, - "step": 25310 - }, - { - "epoch": 0.6425942378474425, - "grad_norm": 0.4351825714111328, - "learning_rate": 1.5716038414350385e-05, - "loss": 0.0814, - "step": 25315 - }, - { - "epoch": 0.6427211575072979, - "grad_norm": 0.5831695795059204, - "learning_rate": 1.571519228328468e-05, - "loss": 0.0701, - "step": 25320 - }, - { - "epoch": 0.6428480771671532, - "grad_norm": 0.4682687222957611, - "learning_rate": 1.571434615221898e-05, - "loss": 0.0774, - "step": 25325 - }, - { - "epoch": 0.6429749968270085, - "grad_norm": 0.4125877916812897, - "learning_rate": 1.5713500021153277e-05, - "loss": 0.0737, - "step": 25330 - }, - { - "epoch": 0.6431019164868638, - "grad_norm": 0.6307210922241211, - "learning_rate": 1.5712653890087575e-05, - "loss": 0.0881, - "step": 25335 - }, - { - "epoch": 0.6432288361467191, - "grad_norm": 0.6235036849975586, - "learning_rate": 1.5711807759021874e-05, - "loss": 0.0867, - "step": 25340 - }, - { - "epoch": 0.6433557558065744, - "grad_norm": 0.4621311128139496, - "learning_rate": 1.5710961627956172e-05, - "loss": 0.0778, - "step": 25345 - }, - { - "epoch": 0.6434826754664298, - "grad_norm": 0.5798943638801575, - "learning_rate": 1.571011549689047e-05, - "loss": 0.0955, - "step": 25350 - }, - { - "epoch": 0.643609595126285, - "grad_norm": 0.6170827746391296, - "learning_rate": 1.570926936582477e-05, - "loss": 0.0901, - "step": 25355 - }, - { - "epoch": 0.6437365147861404, - "grad_norm": 0.5072792768478394, - "learning_rate": 1.5708423234759064e-05, - "loss": 0.0753, - "step": 25360 - }, - { - "epoch": 0.6438634344459957, - "grad_norm": 0.5759771466255188, - "learning_rate": 1.5707577103693362e-05, - "loss": 0.0966, - "step": 25365 - }, - { - "epoch": 0.643990354105851, - "grad_norm": 0.4683034121990204, - "learning_rate": 1.570673097262766e-05, - "loss": 0.0893, - "step": 25370 - }, - { - "epoch": 0.6441172737657063, - "grad_norm": 0.6328133344650269, - "learning_rate": 1.570588484156196e-05, - "loss": 0.1022, - "step": 25375 - }, - { - "epoch": 0.6442441934255616, - "grad_norm": 0.41160765290260315, - "learning_rate": 1.5705038710496258e-05, - "loss": 0.0567, - "step": 25380 - }, - { - "epoch": 0.644371113085417, - "grad_norm": 0.48699265718460083, - "learning_rate": 1.5704192579430556e-05, - "loss": 0.0726, - "step": 25385 - }, - { - "epoch": 0.6444980327452723, - "grad_norm": 0.34343841671943665, - "learning_rate": 1.5703346448364854e-05, - "loss": 0.0655, - "step": 25390 - }, - { - "epoch": 0.6446249524051275, - "grad_norm": 0.3940892517566681, - "learning_rate": 1.5702500317299153e-05, - "loss": 0.0699, - "step": 25395 - }, - { - "epoch": 0.6447518720649829, - "grad_norm": 0.35162121057510376, - "learning_rate": 1.5701654186233448e-05, - "loss": 0.0594, - "step": 25400 - }, - { - "epoch": 0.6448787917248382, - "grad_norm": 0.5204195380210876, - "learning_rate": 1.5700808055167746e-05, - "loss": 0.0727, - "step": 25405 - }, - { - "epoch": 0.6450057113846935, - "grad_norm": 0.5388683080673218, - "learning_rate": 1.5699961924102045e-05, - "loss": 0.0862, - "step": 25410 - }, - { - "epoch": 0.6451326310445488, - "grad_norm": 0.43350496888160706, - "learning_rate": 1.5699115793036343e-05, - "loss": 0.0741, - "step": 25415 - }, - { - "epoch": 0.6452595507044041, - "grad_norm": 0.7648800611495972, - "learning_rate": 1.569826966197064e-05, - "loss": 0.0887, - "step": 25420 - }, - { - "epoch": 0.6453864703642594, - "grad_norm": 0.6005515456199646, - "learning_rate": 1.569742353090494e-05, - "loss": 0.0749, - "step": 25425 - }, - { - "epoch": 0.6455133900241148, - "grad_norm": 0.4773634672164917, - "learning_rate": 1.5696577399839238e-05, - "loss": 0.0832, - "step": 25430 - }, - { - "epoch": 0.64564030968397, - "grad_norm": 0.6347850561141968, - "learning_rate": 1.5695731268773537e-05, - "loss": 0.0833, - "step": 25435 - }, - { - "epoch": 0.6457672293438254, - "grad_norm": 0.6636502146720886, - "learning_rate": 1.569488513770783e-05, - "loss": 0.0641, - "step": 25440 - }, - { - "epoch": 0.6458941490036807, - "grad_norm": 0.6915820837020874, - "learning_rate": 1.569403900664213e-05, - "loss": 0.1077, - "step": 25445 - }, - { - "epoch": 0.646021068663536, - "grad_norm": 0.5507266521453857, - "learning_rate": 1.569319287557643e-05, - "loss": 0.0991, - "step": 25450 - }, - { - "epoch": 0.6461479883233913, - "grad_norm": 0.3157038390636444, - "learning_rate": 1.5692346744510727e-05, - "loss": 0.0877, - "step": 25455 - }, - { - "epoch": 0.6462749079832466, - "grad_norm": 0.7550495862960815, - "learning_rate": 1.5691500613445022e-05, - "loss": 0.1023, - "step": 25460 - }, - { - "epoch": 0.646401827643102, - "grad_norm": 0.7171215415000916, - "learning_rate": 1.569065448237932e-05, - "loss": 0.0858, - "step": 25465 - }, - { - "epoch": 0.6465287473029572, - "grad_norm": 0.5552189350128174, - "learning_rate": 1.568980835131362e-05, - "loss": 0.0744, - "step": 25470 - }, - { - "epoch": 0.6466556669628125, - "grad_norm": 2.82741117477417, - "learning_rate": 1.5688962220247917e-05, - "loss": 0.0855, - "step": 25475 - }, - { - "epoch": 0.6467825866226679, - "grad_norm": 0.6552743315696716, - "learning_rate": 1.5688116089182215e-05, - "loss": 0.091, - "step": 25480 - }, - { - "epoch": 0.6469095062825232, - "grad_norm": 0.4932712912559509, - "learning_rate": 1.5687269958116514e-05, - "loss": 0.0833, - "step": 25485 - }, - { - "epoch": 0.6470364259423784, - "grad_norm": 0.7069079875946045, - "learning_rate": 1.5686423827050812e-05, - "loss": 0.084, - "step": 25490 - }, - { - "epoch": 0.6471633456022338, - "grad_norm": 0.5660663843154907, - "learning_rate": 1.568557769598511e-05, - "loss": 0.0804, - "step": 25495 - }, - { - "epoch": 0.6472902652620891, - "grad_norm": 0.6304050087928772, - "learning_rate": 1.5684731564919406e-05, - "loss": 0.0698, - "step": 25500 - }, - { - "epoch": 0.6474171849219444, - "grad_norm": 0.37053629755973816, - "learning_rate": 1.5683885433853704e-05, - "loss": 0.0845, - "step": 25505 - }, - { - "epoch": 0.6475441045817997, - "grad_norm": 0.7222714424133301, - "learning_rate": 1.5683039302788002e-05, - "loss": 0.0695, - "step": 25510 - }, - { - "epoch": 0.647671024241655, - "grad_norm": 0.5463433265686035, - "learning_rate": 1.56821931717223e-05, - "loss": 0.1029, - "step": 25515 - }, - { - "epoch": 0.6477979439015104, - "grad_norm": 0.4478244483470917, - "learning_rate": 1.56813470406566e-05, - "loss": 0.0726, - "step": 25520 - }, - { - "epoch": 0.6479248635613657, - "grad_norm": 0.8553683161735535, - "learning_rate": 1.5680500909590898e-05, - "loss": 0.0771, - "step": 25525 - }, - { - "epoch": 0.6480517832212209, - "grad_norm": 0.34222641587257385, - "learning_rate": 1.5679654778525196e-05, - "loss": 0.0717, - "step": 25530 - }, - { - "epoch": 0.6481787028810763, - "grad_norm": 0.4215271770954132, - "learning_rate": 1.5678808647459494e-05, - "loss": 0.0797, - "step": 25535 - }, - { - "epoch": 0.6483056225409316, - "grad_norm": 0.5592857003211975, - "learning_rate": 1.567796251639379e-05, - "loss": 0.0578, - "step": 25540 - }, - { - "epoch": 0.6484325422007869, - "grad_norm": 0.7047847509384155, - "learning_rate": 1.5677116385328088e-05, - "loss": 0.0876, - "step": 25545 - }, - { - "epoch": 0.6485594618606422, - "grad_norm": 0.7352387309074402, - "learning_rate": 1.5676270254262386e-05, - "loss": 0.0659, - "step": 25550 - }, - { - "epoch": 0.6486863815204975, - "grad_norm": 0.5921996831893921, - "learning_rate": 1.5675424123196685e-05, - "loss": 0.0953, - "step": 25555 - }, - { - "epoch": 0.6488133011803529, - "grad_norm": 0.4314962327480316, - "learning_rate": 1.5674577992130983e-05, - "loss": 0.0706, - "step": 25560 - }, - { - "epoch": 0.6489402208402082, - "grad_norm": 0.883608877658844, - "learning_rate": 1.567373186106528e-05, - "loss": 0.0696, - "step": 25565 - }, - { - "epoch": 0.6490671405000634, - "grad_norm": 0.623775064945221, - "learning_rate": 1.567288572999958e-05, - "loss": 0.0987, - "step": 25570 - }, - { - "epoch": 0.6491940601599188, - "grad_norm": 0.5738108158111572, - "learning_rate": 1.5672039598933878e-05, - "loss": 0.0641, - "step": 25575 - }, - { - "epoch": 0.6493209798197741, - "grad_norm": 0.6410079002380371, - "learning_rate": 1.5671193467868173e-05, - "loss": 0.0874, - "step": 25580 - }, - { - "epoch": 0.6494478994796294, - "grad_norm": 0.9937822222709656, - "learning_rate": 1.5670347336802472e-05, - "loss": 0.0578, - "step": 25585 - }, - { - "epoch": 0.6495748191394847, - "grad_norm": 0.638034462928772, - "learning_rate": 1.566950120573677e-05, - "loss": 0.0741, - "step": 25590 - }, - { - "epoch": 0.64970173879934, - "grad_norm": 0.5879042148590088, - "learning_rate": 1.566865507467107e-05, - "loss": 0.08, - "step": 25595 - }, - { - "epoch": 0.6498286584591954, - "grad_norm": 0.9159595966339111, - "learning_rate": 1.5667808943605364e-05, - "loss": 0.066, - "step": 25600 - }, - { - "epoch": 0.6499555781190507, - "grad_norm": 1.1072641611099243, - "learning_rate": 1.5666962812539662e-05, - "loss": 0.06, - "step": 25605 - }, - { - "epoch": 0.6500824977789059, - "grad_norm": 0.8090154528617859, - "learning_rate": 1.566611668147396e-05, - "loss": 0.0728, - "step": 25610 - }, - { - "epoch": 0.6502094174387613, - "grad_norm": 0.567435622215271, - "learning_rate": 1.566527055040826e-05, - "loss": 0.0805, - "step": 25615 - }, - { - "epoch": 0.6503363370986166, - "grad_norm": 0.6096593737602234, - "learning_rate": 1.5664424419342557e-05, - "loss": 0.085, - "step": 25620 - }, - { - "epoch": 0.6504632567584719, - "grad_norm": 0.5817996859550476, - "learning_rate": 1.5663578288276856e-05, - "loss": 0.0897, - "step": 25625 - }, - { - "epoch": 0.6505901764183272, - "grad_norm": 0.6338269114494324, - "learning_rate": 1.5662732157211154e-05, - "loss": 0.0849, - "step": 25630 - }, - { - "epoch": 0.6507170960781825, - "grad_norm": 1.3228682279586792, - "learning_rate": 1.5661886026145452e-05, - "loss": 0.1026, - "step": 25635 - }, - { - "epoch": 0.6508440157380379, - "grad_norm": 0.4394277036190033, - "learning_rate": 1.5661039895079747e-05, - "loss": 0.0847, - "step": 25640 - }, - { - "epoch": 0.6509709353978931, - "grad_norm": 0.7408164739608765, - "learning_rate": 1.5660193764014046e-05, - "loss": 0.1019, - "step": 25645 - }, - { - "epoch": 0.6510978550577484, - "grad_norm": 0.6141287684440613, - "learning_rate": 1.5659347632948344e-05, - "loss": 0.0703, - "step": 25650 - }, - { - "epoch": 0.6512247747176038, - "grad_norm": 0.4555867314338684, - "learning_rate": 1.5658501501882643e-05, - "loss": 0.0607, - "step": 25655 - }, - { - "epoch": 0.6513516943774591, - "grad_norm": 0.51973956823349, - "learning_rate": 1.565765537081694e-05, - "loss": 0.1044, - "step": 25660 - }, - { - "epoch": 0.6514786140373143, - "grad_norm": 0.654887855052948, - "learning_rate": 1.565680923975124e-05, - "loss": 0.0859, - "step": 25665 - }, - { - "epoch": 0.6516055336971697, - "grad_norm": 0.43388181924819946, - "learning_rate": 1.5655963108685538e-05, - "loss": 0.0828, - "step": 25670 - }, - { - "epoch": 0.651732453357025, - "grad_norm": 0.5858169794082642, - "learning_rate": 1.5655116977619836e-05, - "loss": 0.0801, - "step": 25675 - }, - { - "epoch": 0.6518593730168804, - "grad_norm": 0.8622133731842041, - "learning_rate": 1.565427084655413e-05, - "loss": 0.0688, - "step": 25680 - }, - { - "epoch": 0.6519862926767356, - "grad_norm": 0.41488638520240784, - "learning_rate": 1.565342471548843e-05, - "loss": 0.0586, - "step": 25685 - }, - { - "epoch": 0.6521132123365909, - "grad_norm": 0.430266797542572, - "learning_rate": 1.5652578584422728e-05, - "loss": 0.0683, - "step": 25690 - }, - { - "epoch": 0.6522401319964463, - "grad_norm": 0.47830310463905334, - "learning_rate": 1.5651732453357026e-05, - "loss": 0.0809, - "step": 25695 - }, - { - "epoch": 0.6523670516563016, - "grad_norm": 0.5511640310287476, - "learning_rate": 1.5650886322291325e-05, - "loss": 0.0838, - "step": 25700 - }, - { - "epoch": 0.6524939713161568, - "grad_norm": 0.6120385527610779, - "learning_rate": 1.5650040191225623e-05, - "loss": 0.0722, - "step": 25705 - }, - { - "epoch": 0.6526208909760122, - "grad_norm": 1.1862961053848267, - "learning_rate": 1.564919406015992e-05, - "loss": 0.0883, - "step": 25710 - }, - { - "epoch": 0.6527478106358675, - "grad_norm": 0.46621525287628174, - "learning_rate": 1.564834792909422e-05, - "loss": 0.0783, - "step": 25715 - }, - { - "epoch": 0.6528747302957228, - "grad_norm": 0.5254164338111877, - "learning_rate": 1.5647501798028515e-05, - "loss": 0.0694, - "step": 25720 - }, - { - "epoch": 0.6530016499555781, - "grad_norm": 0.5067979097366333, - "learning_rate": 1.5646655666962813e-05, - "loss": 0.0671, - "step": 25725 - }, - { - "epoch": 0.6531285696154334, - "grad_norm": 0.5554617643356323, - "learning_rate": 1.5645809535897112e-05, - "loss": 0.0833, - "step": 25730 - }, - { - "epoch": 0.6532554892752888, - "grad_norm": 0.47998425364494324, - "learning_rate": 1.564496340483141e-05, - "loss": 0.0764, - "step": 25735 - }, - { - "epoch": 0.6533824089351441, - "grad_norm": 0.4807543456554413, - "learning_rate": 1.5644117273765705e-05, - "loss": 0.0884, - "step": 25740 - }, - { - "epoch": 0.6535093285949993, - "grad_norm": 0.38659149408340454, - "learning_rate": 1.5643271142700004e-05, - "loss": 0.0709, - "step": 25745 - }, - { - "epoch": 0.6536362482548547, - "grad_norm": 0.3257778286933899, - "learning_rate": 1.5642425011634302e-05, - "loss": 0.0662, - "step": 25750 - }, - { - "epoch": 0.65376316791471, - "grad_norm": 0.4637419879436493, - "learning_rate": 1.56415788805686e-05, - "loss": 0.0744, - "step": 25755 - }, - { - "epoch": 0.6538900875745653, - "grad_norm": 0.8940372467041016, - "learning_rate": 1.56407327495029e-05, - "loss": 0.0786, - "step": 25760 - }, - { - "epoch": 0.6540170072344206, - "grad_norm": 0.7305512428283691, - "learning_rate": 1.5639886618437197e-05, - "loss": 0.0805, - "step": 25765 - }, - { - "epoch": 0.6541439268942759, - "grad_norm": 0.7679525017738342, - "learning_rate": 1.5639040487371496e-05, - "loss": 0.0705, - "step": 25770 - }, - { - "epoch": 0.6542708465541313, - "grad_norm": 0.876660168170929, - "learning_rate": 1.5638194356305794e-05, - "loss": 0.0797, - "step": 25775 - }, - { - "epoch": 0.6543977662139866, - "grad_norm": 0.6702693700790405, - "learning_rate": 1.563734822524009e-05, - "loss": 0.0947, - "step": 25780 - }, - { - "epoch": 0.6545246858738418, - "grad_norm": 0.6035286784172058, - "learning_rate": 1.5636502094174387e-05, - "loss": 0.0754, - "step": 25785 - }, - { - "epoch": 0.6546516055336972, - "grad_norm": 0.5661017298698425, - "learning_rate": 1.5635655963108686e-05, - "loss": 0.0715, - "step": 25790 - }, - { - "epoch": 0.6547785251935525, - "grad_norm": 0.7710785865783691, - "learning_rate": 1.5634809832042984e-05, - "loss": 0.0869, - "step": 25795 - }, - { - "epoch": 0.6549054448534078, - "grad_norm": 0.5894028544425964, - "learning_rate": 1.5633963700977283e-05, - "loss": 0.0726, - "step": 25800 - }, - { - "epoch": 0.6550323645132631, - "grad_norm": 0.46138760447502136, - "learning_rate": 1.563311756991158e-05, - "loss": 0.0632, - "step": 25805 - }, - { - "epoch": 0.6551592841731184, - "grad_norm": 0.39392775297164917, - "learning_rate": 1.563227143884588e-05, - "loss": 0.0663, - "step": 25810 - }, - { - "epoch": 0.6552862038329738, - "grad_norm": 0.4840446412563324, - "learning_rate": 1.5631425307780178e-05, - "loss": 0.0809, - "step": 25815 - }, - { - "epoch": 0.655413123492829, - "grad_norm": 0.49254101514816284, - "learning_rate": 1.5630579176714476e-05, - "loss": 0.1033, - "step": 25820 - }, - { - "epoch": 0.6555400431526843, - "grad_norm": 0.5158733129501343, - "learning_rate": 1.562973304564877e-05, - "loss": 0.0816, - "step": 25825 - }, - { - "epoch": 0.6556669628125397, - "grad_norm": 0.3583490550518036, - "learning_rate": 1.562888691458307e-05, - "loss": 0.0595, - "step": 25830 - }, - { - "epoch": 0.655793882472395, - "grad_norm": 0.4930334687232971, - "learning_rate": 1.5628040783517368e-05, - "loss": 0.0834, - "step": 25835 - }, - { - "epoch": 0.6559208021322502, - "grad_norm": 0.36883729696273804, - "learning_rate": 1.5627194652451666e-05, - "loss": 0.0893, - "step": 25840 - }, - { - "epoch": 0.6560477217921056, - "grad_norm": 0.5169828534126282, - "learning_rate": 1.5626348521385965e-05, - "loss": 0.0699, - "step": 25845 - }, - { - "epoch": 0.6561746414519609, - "grad_norm": 0.34639284014701843, - "learning_rate": 1.5625502390320263e-05, - "loss": 0.071, - "step": 25850 - }, - { - "epoch": 0.6563015611118163, - "grad_norm": 0.6319788694381714, - "learning_rate": 1.5624656259254562e-05, - "loss": 0.0792, - "step": 25855 - }, - { - "epoch": 0.6564284807716715, - "grad_norm": 0.5280594229698181, - "learning_rate": 1.562381012818886e-05, - "loss": 0.0613, - "step": 25860 - }, - { - "epoch": 0.6565554004315268, - "grad_norm": 0.7193657755851746, - "learning_rate": 1.5622963997123155e-05, - "loss": 0.0802, - "step": 25865 - }, - { - "epoch": 0.6566823200913822, - "grad_norm": 0.7091192007064819, - "learning_rate": 1.5622117866057454e-05, - "loss": 0.0752, - "step": 25870 - }, - { - "epoch": 0.6568092397512375, - "grad_norm": 0.6762391328811646, - "learning_rate": 1.5621271734991752e-05, - "loss": 0.089, - "step": 25875 - }, - { - "epoch": 0.6569361594110927, - "grad_norm": 0.546559751033783, - "learning_rate": 1.562042560392605e-05, - "loss": 0.0825, - "step": 25880 - }, - { - "epoch": 0.6570630790709481, - "grad_norm": 0.5323053598403931, - "learning_rate": 1.5619579472860345e-05, - "loss": 0.0709, - "step": 25885 - }, - { - "epoch": 0.6571899987308034, - "grad_norm": 0.5276694297790527, - "learning_rate": 1.5618733341794644e-05, - "loss": 0.081, - "step": 25890 - }, - { - "epoch": 0.6573169183906588, - "grad_norm": 0.3345666527748108, - "learning_rate": 1.5617887210728942e-05, - "loss": 0.0544, - "step": 25895 - }, - { - "epoch": 0.657443838050514, - "grad_norm": 0.45762988924980164, - "learning_rate": 1.561704107966324e-05, - "loss": 0.0764, - "step": 25900 - }, - { - "epoch": 0.6575707577103693, - "grad_norm": 0.7287636995315552, - "learning_rate": 1.561619494859754e-05, - "loss": 0.0897, - "step": 25905 - }, - { - "epoch": 0.6576976773702247, - "grad_norm": 0.4380788505077362, - "learning_rate": 1.5615348817531837e-05, - "loss": 0.0902, - "step": 25910 - }, - { - "epoch": 0.65782459703008, - "grad_norm": 0.7656886577606201, - "learning_rate": 1.5614502686466136e-05, - "loss": 0.0931, - "step": 25915 - }, - { - "epoch": 0.6579515166899352, - "grad_norm": 0.6501161456108093, - "learning_rate": 1.5613656555400434e-05, - "loss": 0.0855, - "step": 25920 - }, - { - "epoch": 0.6580784363497906, - "grad_norm": 0.3374694287776947, - "learning_rate": 1.561281042433473e-05, - "loss": 0.0629, - "step": 25925 - }, - { - "epoch": 0.6582053560096459, - "grad_norm": 0.5942754745483398, - "learning_rate": 1.5611964293269028e-05, - "loss": 0.0572, - "step": 25930 - }, - { - "epoch": 0.6583322756695013, - "grad_norm": 0.4317777454853058, - "learning_rate": 1.5611118162203326e-05, - "loss": 0.0776, - "step": 25935 - }, - { - "epoch": 0.6584591953293565, - "grad_norm": 0.6078863739967346, - "learning_rate": 1.5610272031137624e-05, - "loss": 0.0799, - "step": 25940 - }, - { - "epoch": 0.6585861149892118, - "grad_norm": 0.6531168222427368, - "learning_rate": 1.5609425900071923e-05, - "loss": 0.1071, - "step": 25945 - }, - { - "epoch": 0.6587130346490672, - "grad_norm": 0.35571590065956116, - "learning_rate": 1.560857976900622e-05, - "loss": 0.0784, - "step": 25950 - }, - { - "epoch": 0.6588399543089225, - "grad_norm": 0.5368595123291016, - "learning_rate": 1.560773363794052e-05, - "loss": 0.0787, - "step": 25955 - }, - { - "epoch": 0.6589668739687777, - "grad_norm": 0.4517717659473419, - "learning_rate": 1.5606887506874818e-05, - "loss": 0.0629, - "step": 25960 - }, - { - "epoch": 0.6590937936286331, - "grad_norm": 0.559029757976532, - "learning_rate": 1.5606041375809113e-05, - "loss": 0.0771, - "step": 25965 - }, - { - "epoch": 0.6592207132884884, - "grad_norm": 0.5220367908477783, - "learning_rate": 1.560519524474341e-05, - "loss": 0.1044, - "step": 25970 - }, - { - "epoch": 0.6593476329483438, - "grad_norm": 0.9519271850585938, - "learning_rate": 1.560434911367771e-05, - "loss": 0.0641, - "step": 25975 - }, - { - "epoch": 0.659474552608199, - "grad_norm": 0.46222665905952454, - "learning_rate": 1.5603502982612008e-05, - "loss": 0.0725, - "step": 25980 - }, - { - "epoch": 0.6596014722680543, - "grad_norm": 0.7238847613334656, - "learning_rate": 1.5602656851546307e-05, - "loss": 0.0755, - "step": 25985 - }, - { - "epoch": 0.6597283919279097, - "grad_norm": 0.40288046002388, - "learning_rate": 1.5601810720480605e-05, - "loss": 0.0875, - "step": 25990 - }, - { - "epoch": 0.6598553115877649, - "grad_norm": 0.463578462600708, - "learning_rate": 1.5600964589414903e-05, - "loss": 0.078, - "step": 25995 - }, - { - "epoch": 0.6599822312476202, - "grad_norm": 0.5152695178985596, - "learning_rate": 1.5600118458349202e-05, - "loss": 0.083, - "step": 26000 - }, - { - "epoch": 0.6601091509074756, - "grad_norm": 0.7276461124420166, - "learning_rate": 1.5599272327283497e-05, - "loss": 0.0872, - "step": 26005 - }, - { - "epoch": 0.6602360705673309, - "grad_norm": 0.7118815183639526, - "learning_rate": 1.5598426196217795e-05, - "loss": 0.0797, - "step": 26010 - }, - { - "epoch": 0.6603629902271861, - "grad_norm": 0.5847872495651245, - "learning_rate": 1.5597580065152094e-05, - "loss": 0.0835, - "step": 26015 - }, - { - "epoch": 0.6604899098870415, - "grad_norm": 0.4147334396839142, - "learning_rate": 1.5596733934086392e-05, - "loss": 0.0733, - "step": 26020 - }, - { - "epoch": 0.6606168295468968, - "grad_norm": 0.6635116338729858, - "learning_rate": 1.5595887803020687e-05, - "loss": 0.0795, - "step": 26025 - }, - { - "epoch": 0.6607437492067522, - "grad_norm": 1.0098915100097656, - "learning_rate": 1.5595041671954985e-05, - "loss": 0.0673, - "step": 26030 - }, - { - "epoch": 0.6608706688666074, - "grad_norm": 0.5184196829795837, - "learning_rate": 1.5594195540889284e-05, - "loss": 0.0633, - "step": 26035 - }, - { - "epoch": 0.6609975885264627, - "grad_norm": 0.42824792861938477, - "learning_rate": 1.5593349409823582e-05, - "loss": 0.0958, - "step": 26040 - }, - { - "epoch": 0.6611245081863181, - "grad_norm": 0.6634860038757324, - "learning_rate": 1.559250327875788e-05, - "loss": 0.0759, - "step": 26045 - }, - { - "epoch": 0.6612514278461734, - "grad_norm": 0.5687294006347656, - "learning_rate": 1.559165714769218e-05, - "loss": 0.0909, - "step": 26050 - }, - { - "epoch": 0.6613783475060286, - "grad_norm": 0.6062563061714172, - "learning_rate": 1.5590811016626477e-05, - "loss": 0.0768, - "step": 26055 - }, - { - "epoch": 0.661505267165884, - "grad_norm": 0.4311898946762085, - "learning_rate": 1.5589964885560776e-05, - "loss": 0.0787, - "step": 26060 - }, - { - "epoch": 0.6616321868257393, - "grad_norm": 1.4445513486862183, - "learning_rate": 1.558911875449507e-05, - "loss": 0.0721, - "step": 26065 - }, - { - "epoch": 0.6617591064855947, - "grad_norm": 0.417083740234375, - "learning_rate": 1.558827262342937e-05, - "loss": 0.0756, - "step": 26070 - }, - { - "epoch": 0.6618860261454499, - "grad_norm": 0.48454102873802185, - "learning_rate": 1.5587426492363668e-05, - "loss": 0.0862, - "step": 26075 - }, - { - "epoch": 0.6620129458053052, - "grad_norm": 0.5329951047897339, - "learning_rate": 1.5586580361297966e-05, - "loss": 0.0781, - "step": 26080 - }, - { - "epoch": 0.6621398654651606, - "grad_norm": 0.5337794423103333, - "learning_rate": 1.5585734230232264e-05, - "loss": 0.0742, - "step": 26085 - }, - { - "epoch": 0.6622667851250159, - "grad_norm": 0.6127446889877319, - "learning_rate": 1.5584888099166563e-05, - "loss": 0.073, - "step": 26090 - }, - { - "epoch": 0.6623937047848711, - "grad_norm": 1.2488874197006226, - "learning_rate": 1.558404196810086e-05, - "loss": 0.0758, - "step": 26095 - }, - { - "epoch": 0.6625206244447265, - "grad_norm": 0.4965994358062744, - "learning_rate": 1.558319583703516e-05, - "loss": 0.0772, - "step": 26100 - }, - { - "epoch": 0.6626475441045818, - "grad_norm": 0.6680147647857666, - "learning_rate": 1.5582349705969455e-05, - "loss": 0.092, - "step": 26105 - }, - { - "epoch": 0.6627744637644372, - "grad_norm": 0.7254077196121216, - "learning_rate": 1.5581503574903753e-05, - "loss": 0.0804, - "step": 26110 - }, - { - "epoch": 0.6629013834242924, - "grad_norm": 0.741085410118103, - "learning_rate": 1.558065744383805e-05, - "loss": 0.0942, - "step": 26115 - }, - { - "epoch": 0.6630283030841477, - "grad_norm": 0.5153433680534363, - "learning_rate": 1.557981131277235e-05, - "loss": 0.0745, - "step": 26120 - }, - { - "epoch": 0.6631552227440031, - "grad_norm": 0.6251468658447266, - "learning_rate": 1.5578965181706648e-05, - "loss": 0.0947, - "step": 26125 - }, - { - "epoch": 0.6632821424038584, - "grad_norm": 0.5727699398994446, - "learning_rate": 1.5578119050640947e-05, - "loss": 0.0811, - "step": 26130 - }, - { - "epoch": 0.6634090620637136, - "grad_norm": 0.6726710200309753, - "learning_rate": 1.5577272919575245e-05, - "loss": 0.0864, - "step": 26135 - }, - { - "epoch": 0.663535981723569, - "grad_norm": 0.6347255110740662, - "learning_rate": 1.5576426788509544e-05, - "loss": 0.0834, - "step": 26140 - }, - { - "epoch": 0.6636629013834243, - "grad_norm": 0.7589071393013, - "learning_rate": 1.557558065744384e-05, - "loss": 0.0811, - "step": 26145 - }, - { - "epoch": 0.6637898210432795, - "grad_norm": 0.36933398246765137, - "learning_rate": 1.5574734526378137e-05, - "loss": 0.0584, - "step": 26150 - }, - { - "epoch": 0.6639167407031349, - "grad_norm": 1.5570992231369019, - "learning_rate": 1.5573888395312435e-05, - "loss": 0.0702, - "step": 26155 - }, - { - "epoch": 0.6640436603629902, - "grad_norm": 0.4937205910682678, - "learning_rate": 1.5573042264246734e-05, - "loss": 0.065, - "step": 26160 - }, - { - "epoch": 0.6641705800228456, - "grad_norm": 0.43732866644859314, - "learning_rate": 1.557219613318103e-05, - "loss": 0.0811, - "step": 26165 - }, - { - "epoch": 0.6642974996827008, - "grad_norm": 0.7285076975822449, - "learning_rate": 1.5571350002115327e-05, - "loss": 0.1088, - "step": 26170 - }, - { - "epoch": 0.6644244193425561, - "grad_norm": 0.7265585064888, - "learning_rate": 1.5570503871049626e-05, - "loss": 0.071, - "step": 26175 - }, - { - "epoch": 0.6645513390024115, - "grad_norm": 0.4160344898700714, - "learning_rate": 1.5569657739983924e-05, - "loss": 0.0769, - "step": 26180 - }, - { - "epoch": 0.6646782586622668, - "grad_norm": 1.1785178184509277, - "learning_rate": 1.5568811608918222e-05, - "loss": 0.0971, - "step": 26185 - }, - { - "epoch": 0.664805178322122, - "grad_norm": 1.0128756761550903, - "learning_rate": 1.556796547785252e-05, - "loss": 0.071, - "step": 26190 - }, - { - "epoch": 0.6649320979819774, - "grad_norm": 0.3282056748867035, - "learning_rate": 1.556711934678682e-05, - "loss": 0.0576, - "step": 26195 - }, - { - "epoch": 0.6650590176418327, - "grad_norm": 0.7686196565628052, - "learning_rate": 1.5566273215721118e-05, - "loss": 0.0885, - "step": 26200 - }, - { - "epoch": 0.6651859373016881, - "grad_norm": 0.6677250862121582, - "learning_rate": 1.5565427084655413e-05, - "loss": 0.0773, - "step": 26205 - }, - { - "epoch": 0.6653128569615433, - "grad_norm": 0.6378008127212524, - "learning_rate": 1.556458095358971e-05, - "loss": 0.0893, - "step": 26210 - }, - { - "epoch": 0.6654397766213986, - "grad_norm": 0.5647174715995789, - "learning_rate": 1.556373482252401e-05, - "loss": 0.0852, - "step": 26215 - }, - { - "epoch": 0.665566696281254, - "grad_norm": 0.7417848706245422, - "learning_rate": 1.5562888691458308e-05, - "loss": 0.0949, - "step": 26220 - }, - { - "epoch": 0.6656936159411093, - "grad_norm": 0.5930640697479248, - "learning_rate": 1.5562042560392606e-05, - "loss": 0.0871, - "step": 26225 - }, - { - "epoch": 0.6658205356009645, - "grad_norm": 0.583536684513092, - "learning_rate": 1.5561196429326905e-05, - "loss": 0.064, - "step": 26230 - }, - { - "epoch": 0.6659474552608199, - "grad_norm": 0.3597191572189331, - "learning_rate": 1.5560350298261203e-05, - "loss": 0.0669, - "step": 26235 - }, - { - "epoch": 0.6660743749206752, - "grad_norm": 0.6393194198608398, - "learning_rate": 1.55595041671955e-05, - "loss": 0.0704, - "step": 26240 - }, - { - "epoch": 0.6662012945805306, - "grad_norm": 0.9003416299819946, - "learning_rate": 1.5558658036129796e-05, - "loss": 0.0667, - "step": 26245 - }, - { - "epoch": 0.6663282142403858, - "grad_norm": 0.7774051427841187, - "learning_rate": 1.5557811905064095e-05, - "loss": 0.0716, - "step": 26250 - }, - { - "epoch": 0.6664551339002411, - "grad_norm": 0.6941038966178894, - "learning_rate": 1.5556965773998393e-05, - "loss": 0.0715, - "step": 26255 - }, - { - "epoch": 0.6665820535600965, - "grad_norm": 0.8869980573654175, - "learning_rate": 1.555611964293269e-05, - "loss": 0.092, - "step": 26260 - }, - { - "epoch": 0.6667089732199518, - "grad_norm": 0.3781154751777649, - "learning_rate": 1.555527351186699e-05, - "loss": 0.066, - "step": 26265 - }, - { - "epoch": 0.666835892879807, - "grad_norm": 0.8883450627326965, - "learning_rate": 1.555442738080129e-05, - "loss": 0.0699, - "step": 26270 - }, - { - "epoch": 0.6669628125396624, - "grad_norm": 0.5447213649749756, - "learning_rate": 1.5553581249735587e-05, - "loss": 0.0851, - "step": 26275 - }, - { - "epoch": 0.6670897321995177, - "grad_norm": 0.516818642616272, - "learning_rate": 1.5552735118669885e-05, - "loss": 0.0765, - "step": 26280 - }, - { - "epoch": 0.6672166518593731, - "grad_norm": 0.6588066220283508, - "learning_rate": 1.555188898760418e-05, - "loss": 0.0962, - "step": 26285 - }, - { - "epoch": 0.6673435715192283, - "grad_norm": 0.4447103440761566, - "learning_rate": 1.555104285653848e-05, - "loss": 0.0729, - "step": 26290 - }, - { - "epoch": 0.6674704911790836, - "grad_norm": 0.4206496477127075, - "learning_rate": 1.5550196725472777e-05, - "loss": 0.0693, - "step": 26295 - }, - { - "epoch": 0.667597410838939, - "grad_norm": 0.5764579772949219, - "learning_rate": 1.5549350594407075e-05, - "loss": 0.089, - "step": 26300 - }, - { - "epoch": 0.6677243304987943, - "grad_norm": 0.695303201675415, - "learning_rate": 1.554850446334137e-05, - "loss": 0.0983, - "step": 26305 - }, - { - "epoch": 0.6678512501586495, - "grad_norm": 1.2056975364685059, - "learning_rate": 1.554765833227567e-05, - "loss": 0.0961, - "step": 26310 - }, - { - "epoch": 0.6679781698185049, - "grad_norm": 0.31217125058174133, - "learning_rate": 1.5546812201209967e-05, - "loss": 0.0659, - "step": 26315 - }, - { - "epoch": 0.6681050894783602, - "grad_norm": 0.6678401231765747, - "learning_rate": 1.5545966070144266e-05, - "loss": 0.0923, - "step": 26320 - }, - { - "epoch": 0.6682320091382155, - "grad_norm": 2.605055570602417, - "learning_rate": 1.5545119939078564e-05, - "loss": 0.0783, - "step": 26325 - }, - { - "epoch": 0.6683589287980708, - "grad_norm": 0.5850388407707214, - "learning_rate": 1.5544273808012862e-05, - "loss": 0.0774, - "step": 26330 - }, - { - "epoch": 0.6684858484579261, - "grad_norm": 0.4695473611354828, - "learning_rate": 1.554342767694716e-05, - "loss": 0.063, - "step": 26335 - }, - { - "epoch": 0.6686127681177815, - "grad_norm": 0.7140963673591614, - "learning_rate": 1.554258154588146e-05, - "loss": 0.0726, - "step": 26340 - }, - { - "epoch": 0.6687396877776367, - "grad_norm": 0.5091566443443298, - "learning_rate": 1.5541735414815758e-05, - "loss": 0.0988, - "step": 26345 - }, - { - "epoch": 0.668866607437492, - "grad_norm": 0.4665166139602661, - "learning_rate": 1.5540889283750053e-05, - "loss": 0.0711, - "step": 26350 - }, - { - "epoch": 0.6689935270973474, - "grad_norm": 0.9313696622848511, - "learning_rate": 1.554004315268435e-05, - "loss": 0.0737, - "step": 26355 - }, - { - "epoch": 0.6691204467572027, - "grad_norm": 0.4985034167766571, - "learning_rate": 1.553919702161865e-05, - "loss": 0.0883, - "step": 26360 - }, - { - "epoch": 0.669247366417058, - "grad_norm": 0.3904946744441986, - "learning_rate": 1.5538350890552948e-05, - "loss": 0.0687, - "step": 26365 - }, - { - "epoch": 0.6693742860769133, - "grad_norm": 0.6810764670372009, - "learning_rate": 1.5537504759487246e-05, - "loss": 0.0894, - "step": 26370 - }, - { - "epoch": 0.6695012057367686, - "grad_norm": 0.8513662219047546, - "learning_rate": 1.5536658628421545e-05, - "loss": 0.07, - "step": 26375 - }, - { - "epoch": 0.669628125396624, - "grad_norm": 0.6819890141487122, - "learning_rate": 1.5535812497355843e-05, - "loss": 0.0842, - "step": 26380 - }, - { - "epoch": 0.6697550450564792, - "grad_norm": 0.6391957998275757, - "learning_rate": 1.553496636629014e-05, - "loss": 0.0702, - "step": 26385 - }, - { - "epoch": 0.6698819647163345, - "grad_norm": 0.540783166885376, - "learning_rate": 1.5534120235224437e-05, - "loss": 0.0716, - "step": 26390 - }, - { - "epoch": 0.6700088843761899, - "grad_norm": 0.7187281250953674, - "learning_rate": 1.5533274104158735e-05, - "loss": 0.0661, - "step": 26395 - }, - { - "epoch": 0.6701358040360452, - "grad_norm": 0.4995191693305969, - "learning_rate": 1.5532427973093033e-05, - "loss": 0.0806, - "step": 26400 - }, - { - "epoch": 0.6702627236959005, - "grad_norm": 0.6605052947998047, - "learning_rate": 1.5531581842027332e-05, - "loss": 0.097, - "step": 26405 - }, - { - "epoch": 0.6703896433557558, - "grad_norm": 0.5970720648765564, - "learning_rate": 1.553073571096163e-05, - "loss": 0.0595, - "step": 26410 - }, - { - "epoch": 0.6705165630156111, - "grad_norm": 0.6540791392326355, - "learning_rate": 1.552988957989593e-05, - "loss": 0.0891, - "step": 26415 - }, - { - "epoch": 0.6706434826754665, - "grad_norm": 0.6155006289482117, - "learning_rate": 1.5529043448830227e-05, - "loss": 0.0585, - "step": 26420 - }, - { - "epoch": 0.6707704023353217, - "grad_norm": 1.9594645500183105, - "learning_rate": 1.5528197317764525e-05, - "loss": 0.0823, - "step": 26425 - }, - { - "epoch": 0.670897321995177, - "grad_norm": 0.42009472846984863, - "learning_rate": 1.552735118669882e-05, - "loss": 0.0723, - "step": 26430 - }, - { - "epoch": 0.6710242416550324, - "grad_norm": 0.3082939684391022, - "learning_rate": 1.552650505563312e-05, - "loss": 0.077, - "step": 26435 - }, - { - "epoch": 0.6711511613148877, - "grad_norm": 0.4445505738258362, - "learning_rate": 1.5525658924567417e-05, - "loss": 0.0782, - "step": 26440 - }, - { - "epoch": 0.671278080974743, - "grad_norm": 0.6757209897041321, - "learning_rate": 1.5524812793501716e-05, - "loss": 0.0832, - "step": 26445 - }, - { - "epoch": 0.6714050006345983, - "grad_norm": 0.5179021954536438, - "learning_rate": 1.552396666243601e-05, - "loss": 0.0948, - "step": 26450 - }, - { - "epoch": 0.6715319202944536, - "grad_norm": 0.3126007616519928, - "learning_rate": 1.552312053137031e-05, - "loss": 0.0648, - "step": 26455 - }, - { - "epoch": 0.671658839954309, - "grad_norm": 0.6841244697570801, - "learning_rate": 1.5522274400304607e-05, - "loss": 0.0675, - "step": 26460 - }, - { - "epoch": 0.6717857596141642, - "grad_norm": 0.5707188844680786, - "learning_rate": 1.5521428269238906e-05, - "loss": 0.0805, - "step": 26465 - }, - { - "epoch": 0.6719126792740195, - "grad_norm": 0.7112056612968445, - "learning_rate": 1.5520582138173204e-05, - "loss": 0.0804, - "step": 26470 - }, - { - "epoch": 0.6720395989338749, - "grad_norm": 0.45432397723197937, - "learning_rate": 1.5519736007107503e-05, - "loss": 0.108, - "step": 26475 - }, - { - "epoch": 0.6721665185937302, - "grad_norm": 0.4612070620059967, - "learning_rate": 1.55188898760418e-05, - "loss": 0.087, - "step": 26480 - }, - { - "epoch": 0.6722934382535855, - "grad_norm": 0.46171289682388306, - "learning_rate": 1.55180437449761e-05, - "loss": 0.0871, - "step": 26485 - }, - { - "epoch": 0.6724203579134408, - "grad_norm": 0.5781970620155334, - "learning_rate": 1.5517197613910394e-05, - "loss": 0.0766, - "step": 26490 - }, - { - "epoch": 0.6725472775732961, - "grad_norm": 0.5076903700828552, - "learning_rate": 1.5516351482844693e-05, - "loss": 0.0723, - "step": 26495 - }, - { - "epoch": 0.6726741972331514, - "grad_norm": 0.5995897054672241, - "learning_rate": 1.551550535177899e-05, - "loss": 0.0625, - "step": 26500 - }, - { - "epoch": 0.6728011168930067, - "grad_norm": 0.6361657977104187, - "learning_rate": 1.551465922071329e-05, - "loss": 0.0892, - "step": 26505 - }, - { - "epoch": 0.672928036552862, - "grad_norm": 0.444416880607605, - "learning_rate": 1.5513813089647588e-05, - "loss": 0.0769, - "step": 26510 - }, - { - "epoch": 0.6730549562127174, - "grad_norm": 0.6307207345962524, - "learning_rate": 1.5512966958581886e-05, - "loss": 0.0659, - "step": 26515 - }, - { - "epoch": 0.6731818758725726, - "grad_norm": 0.6462589502334595, - "learning_rate": 1.5512120827516185e-05, - "loss": 0.0753, - "step": 26520 - }, - { - "epoch": 0.673308795532428, - "grad_norm": 0.5009861588478088, - "learning_rate": 1.5511274696450483e-05, - "loss": 0.0674, - "step": 26525 - }, - { - "epoch": 0.6734357151922833, - "grad_norm": 0.4739871323108673, - "learning_rate": 1.5510428565384778e-05, - "loss": 0.0817, - "step": 26530 - }, - { - "epoch": 0.6735626348521386, - "grad_norm": 0.8796969652175903, - "learning_rate": 1.5509582434319077e-05, - "loss": 0.0643, - "step": 26535 - }, - { - "epoch": 0.6736895545119939, - "grad_norm": 0.311044305562973, - "learning_rate": 1.5508736303253375e-05, - "loss": 0.063, - "step": 26540 - }, - { - "epoch": 0.6738164741718492, - "grad_norm": 1.6156561374664307, - "learning_rate": 1.5507890172187673e-05, - "loss": 0.0631, - "step": 26545 - }, - { - "epoch": 0.6739433938317045, - "grad_norm": 0.39277249574661255, - "learning_rate": 1.5507044041121972e-05, - "loss": 0.083, - "step": 26550 - }, - { - "epoch": 0.6740703134915599, - "grad_norm": 1.1565591096878052, - "learning_rate": 1.550619791005627e-05, - "loss": 0.0706, - "step": 26555 - }, - { - "epoch": 0.6741972331514151, - "grad_norm": 0.5493355393409729, - "learning_rate": 1.550535177899057e-05, - "loss": 0.0718, - "step": 26560 - }, - { - "epoch": 0.6743241528112704, - "grad_norm": 0.4138796329498291, - "learning_rate": 1.5504505647924867e-05, - "loss": 0.0582, - "step": 26565 - }, - { - "epoch": 0.6744510724711258, - "grad_norm": 0.5084617137908936, - "learning_rate": 1.5503659516859162e-05, - "loss": 0.0809, - "step": 26570 - }, - { - "epoch": 0.6745779921309811, - "grad_norm": 0.9753233194351196, - "learning_rate": 1.550281338579346e-05, - "loss": 0.0774, - "step": 26575 - }, - { - "epoch": 0.6747049117908364, - "grad_norm": 0.5777877569198608, - "learning_rate": 1.550196725472776e-05, - "loss": 0.0901, - "step": 26580 - }, - { - "epoch": 0.6748318314506917, - "grad_norm": 0.705600917339325, - "learning_rate": 1.5501121123662057e-05, - "loss": 0.0783, - "step": 26585 - }, - { - "epoch": 0.674958751110547, - "grad_norm": 0.6768164038658142, - "learning_rate": 1.5500274992596352e-05, - "loss": 0.0831, - "step": 26590 - }, - { - "epoch": 0.6750856707704024, - "grad_norm": 0.6251177191734314, - "learning_rate": 1.549942886153065e-05, - "loss": 0.1108, - "step": 26595 - }, - { - "epoch": 0.6752125904302576, - "grad_norm": 0.6247931122779846, - "learning_rate": 1.549858273046495e-05, - "loss": 0.1042, - "step": 26600 - }, - { - "epoch": 0.675339510090113, - "grad_norm": 0.4767790138721466, - "learning_rate": 1.5497736599399247e-05, - "loss": 0.0845, - "step": 26605 - }, - { - "epoch": 0.6754664297499683, - "grad_norm": 0.6847190260887146, - "learning_rate": 1.5496890468333546e-05, - "loss": 0.0897, - "step": 26610 - }, - { - "epoch": 0.6755933494098236, - "grad_norm": 0.4274722933769226, - "learning_rate": 1.5496044337267844e-05, - "loss": 0.0754, - "step": 26615 - }, - { - "epoch": 0.6757202690696789, - "grad_norm": 0.3119281530380249, - "learning_rate": 1.5495198206202143e-05, - "loss": 0.0561, - "step": 26620 - }, - { - "epoch": 0.6758471887295342, - "grad_norm": 0.660234272480011, - "learning_rate": 1.549435207513644e-05, - "loss": 0.0634, - "step": 26625 - }, - { - "epoch": 0.6759741083893895, - "grad_norm": 0.49825620651245117, - "learning_rate": 1.5493505944070736e-05, - "loss": 0.08, - "step": 26630 - }, - { - "epoch": 0.6761010280492449, - "grad_norm": 0.7401949763298035, - "learning_rate": 1.5492659813005034e-05, - "loss": 0.0694, - "step": 26635 - }, - { - "epoch": 0.6762279477091001, - "grad_norm": 0.49449804425239563, - "learning_rate": 1.5491813681939333e-05, - "loss": 0.0666, - "step": 26640 - }, - { - "epoch": 0.6763548673689554, - "grad_norm": 0.4290331304073334, - "learning_rate": 1.549096755087363e-05, - "loss": 0.07, - "step": 26645 - }, - { - "epoch": 0.6764817870288108, - "grad_norm": 1.1978868246078491, - "learning_rate": 1.549012141980793e-05, - "loss": 0.0914, - "step": 26650 - }, - { - "epoch": 0.6766087066886661, - "grad_norm": 0.42705070972442627, - "learning_rate": 1.5489275288742228e-05, - "loss": 0.0555, - "step": 26655 - }, - { - "epoch": 0.6767356263485214, - "grad_norm": 0.540539026260376, - "learning_rate": 1.5488429157676526e-05, - "loss": 0.0696, - "step": 26660 - }, - { - "epoch": 0.6768625460083767, - "grad_norm": 0.4604540169239044, - "learning_rate": 1.5487583026610825e-05, - "loss": 0.0781, - "step": 26665 - }, - { - "epoch": 0.676989465668232, - "grad_norm": 0.4780922830104828, - "learning_rate": 1.548673689554512e-05, - "loss": 0.0919, - "step": 26670 - }, - { - "epoch": 0.6771163853280873, - "grad_norm": 0.6420660018920898, - "learning_rate": 1.548589076447942e-05, - "loss": 0.0955, - "step": 26675 - }, - { - "epoch": 0.6772433049879426, - "grad_norm": 0.5097129940986633, - "learning_rate": 1.5485044633413717e-05, - "loss": 0.0873, - "step": 26680 - }, - { - "epoch": 0.677370224647798, - "grad_norm": 0.46168532967567444, - "learning_rate": 1.5484198502348015e-05, - "loss": 0.0746, - "step": 26685 - }, - { - "epoch": 0.6774971443076533, - "grad_norm": 0.5769943594932556, - "learning_rate": 1.5483352371282314e-05, - "loss": 0.0762, - "step": 26690 - }, - { - "epoch": 0.6776240639675085, - "grad_norm": 0.5819986462593079, - "learning_rate": 1.5482506240216612e-05, - "loss": 0.072, - "step": 26695 - }, - { - "epoch": 0.6777509836273639, - "grad_norm": 0.4734315574169159, - "learning_rate": 1.548166010915091e-05, - "loss": 0.0736, - "step": 26700 - }, - { - "epoch": 0.6778779032872192, - "grad_norm": 0.5533196330070496, - "learning_rate": 1.548081397808521e-05, - "loss": 0.0605, - "step": 26705 - }, - { - "epoch": 0.6780048229470745, - "grad_norm": 0.41125231981277466, - "learning_rate": 1.5479967847019504e-05, - "loss": 0.0878, - "step": 26710 - }, - { - "epoch": 0.6781317426069298, - "grad_norm": 0.4636082649230957, - "learning_rate": 1.5479121715953802e-05, - "loss": 0.0747, - "step": 26715 - }, - { - "epoch": 0.6782586622667851, - "grad_norm": 0.6554605960845947, - "learning_rate": 1.54782755848881e-05, - "loss": 0.0753, - "step": 26720 - }, - { - "epoch": 0.6783855819266404, - "grad_norm": 0.5270013809204102, - "learning_rate": 1.54774294538224e-05, - "loss": 0.0934, - "step": 26725 - }, - { - "epoch": 0.6785125015864958, - "grad_norm": 0.4893379807472229, - "learning_rate": 1.5476583322756694e-05, - "loss": 0.1003, - "step": 26730 - }, - { - "epoch": 0.678639421246351, - "grad_norm": 0.4997842609882355, - "learning_rate": 1.5475737191690992e-05, - "loss": 0.0834, - "step": 26735 - }, - { - "epoch": 0.6787663409062064, - "grad_norm": 0.487184077501297, - "learning_rate": 1.547489106062529e-05, - "loss": 0.0641, - "step": 26740 - }, - { - "epoch": 0.6788932605660617, - "grad_norm": 0.5886039733886719, - "learning_rate": 1.547404492955959e-05, - "loss": 0.0869, - "step": 26745 - }, - { - "epoch": 0.679020180225917, - "grad_norm": 0.46788182854652405, - "learning_rate": 1.5473198798493888e-05, - "loss": 0.0757, - "step": 26750 - }, - { - "epoch": 0.6791470998857723, - "grad_norm": 0.6349279284477234, - "learning_rate": 1.5472352667428186e-05, - "loss": 0.0799, - "step": 26755 - }, - { - "epoch": 0.6792740195456276, - "grad_norm": 0.8066713213920593, - "learning_rate": 1.5471506536362484e-05, - "loss": 0.0672, - "step": 26760 - }, - { - "epoch": 0.679400939205483, - "grad_norm": 0.5761719346046448, - "learning_rate": 1.5470660405296783e-05, - "loss": 0.0718, - "step": 26765 - }, - { - "epoch": 0.6795278588653383, - "grad_norm": 0.440176784992218, - "learning_rate": 1.5469814274231078e-05, - "loss": 0.0959, - "step": 26770 - }, - { - "epoch": 0.6796547785251935, - "grad_norm": 0.5699032545089722, - "learning_rate": 1.5468968143165376e-05, - "loss": 0.0714, - "step": 26775 - }, - { - "epoch": 0.6797816981850489, - "grad_norm": 0.5441529750823975, - "learning_rate": 1.5468122012099675e-05, - "loss": 0.0779, - "step": 26780 - }, - { - "epoch": 0.6799086178449042, - "grad_norm": 0.4850459396839142, - "learning_rate": 1.5467275881033973e-05, - "loss": 0.0789, - "step": 26785 - }, - { - "epoch": 0.6800355375047595, - "grad_norm": 0.6795727014541626, - "learning_rate": 1.546642974996827e-05, - "loss": 0.0822, - "step": 26790 - }, - { - "epoch": 0.6801624571646148, - "grad_norm": 0.43084222078323364, - "learning_rate": 1.546558361890257e-05, - "loss": 0.0703, - "step": 26795 - }, - { - "epoch": 0.6802893768244701, - "grad_norm": 0.6545692682266235, - "learning_rate": 1.5464737487836868e-05, - "loss": 0.0801, - "step": 26800 - }, - { - "epoch": 0.6804162964843254, - "grad_norm": 0.8720556497573853, - "learning_rate": 1.5463891356771167e-05, - "loss": 0.0941, - "step": 26805 - }, - { - "epoch": 0.6805432161441808, - "grad_norm": 0.5116707682609558, - "learning_rate": 1.546304522570546e-05, - "loss": 0.1003, - "step": 26810 - }, - { - "epoch": 0.680670135804036, - "grad_norm": 0.60627281665802, - "learning_rate": 1.546219909463976e-05, - "loss": 0.0961, - "step": 26815 - }, - { - "epoch": 0.6807970554638914, - "grad_norm": 0.43054473400115967, - "learning_rate": 1.546135296357406e-05, - "loss": 0.0725, - "step": 26820 - }, - { - "epoch": 0.6809239751237467, - "grad_norm": 1.1013838052749634, - "learning_rate": 1.5460506832508357e-05, - "loss": 0.096, - "step": 26825 - }, - { - "epoch": 0.681050894783602, - "grad_norm": 0.3682571053504944, - "learning_rate": 1.5459660701442655e-05, - "loss": 0.0578, - "step": 26830 - }, - { - "epoch": 0.6811778144434573, - "grad_norm": 0.5237048864364624, - "learning_rate": 1.5458814570376954e-05, - "loss": 0.0944, - "step": 26835 - }, - { - "epoch": 0.6813047341033126, - "grad_norm": 0.5823095440864563, - "learning_rate": 1.5457968439311252e-05, - "loss": 0.0862, - "step": 26840 - }, - { - "epoch": 0.6814316537631679, - "grad_norm": 0.5168060660362244, - "learning_rate": 1.545712230824555e-05, - "loss": 0.0782, - "step": 26845 - }, - { - "epoch": 0.6815585734230232, - "grad_norm": 0.3818340301513672, - "learning_rate": 1.545627617717985e-05, - "loss": 0.0694, - "step": 26850 - }, - { - "epoch": 0.6816854930828785, - "grad_norm": 0.46493110060691833, - "learning_rate": 1.5455430046114144e-05, - "loss": 0.0763, - "step": 26855 - }, - { - "epoch": 0.6818124127427339, - "grad_norm": 0.4303334653377533, - "learning_rate": 1.5454583915048442e-05, - "loss": 0.0852, - "step": 26860 - }, - { - "epoch": 0.6819393324025892, - "grad_norm": 0.4708954095840454, - "learning_rate": 1.545373778398274e-05, - "loss": 0.0916, - "step": 26865 - }, - { - "epoch": 0.6820662520624444, - "grad_norm": 0.4625706076622009, - "learning_rate": 1.545289165291704e-05, - "loss": 0.0848, - "step": 26870 - }, - { - "epoch": 0.6821931717222998, - "grad_norm": 0.6468889713287354, - "learning_rate": 1.5452045521851334e-05, - "loss": 0.0883, - "step": 26875 - }, - { - "epoch": 0.6823200913821551, - "grad_norm": 0.7951503396034241, - "learning_rate": 1.5451199390785632e-05, - "loss": 0.0751, - "step": 26880 - }, - { - "epoch": 0.6824470110420104, - "grad_norm": 0.47978833317756653, - "learning_rate": 1.545035325971993e-05, - "loss": 0.0724, - "step": 26885 - }, - { - "epoch": 0.6825739307018657, - "grad_norm": 1.0005786418914795, - "learning_rate": 1.544950712865423e-05, - "loss": 0.0624, - "step": 26890 - }, - { - "epoch": 0.682700850361721, - "grad_norm": 0.41501936316490173, - "learning_rate": 1.5448660997588528e-05, - "loss": 0.076, - "step": 26895 - }, - { - "epoch": 0.6828277700215764, - "grad_norm": 0.5102667808532715, - "learning_rate": 1.5447814866522826e-05, - "loss": 0.0784, - "step": 26900 - }, - { - "epoch": 0.6829546896814317, - "grad_norm": 0.5299312472343445, - "learning_rate": 1.5446968735457124e-05, - "loss": 0.0746, - "step": 26905 - }, - { - "epoch": 0.6830816093412869, - "grad_norm": 0.3904643654823303, - "learning_rate": 1.5446122604391423e-05, - "loss": 0.0875, - "step": 26910 - }, - { - "epoch": 0.6832085290011423, - "grad_norm": 0.4662639796733856, - "learning_rate": 1.5445276473325718e-05, - "loss": 0.0597, - "step": 26915 - }, - { - "epoch": 0.6833354486609976, - "grad_norm": 0.7181617617607117, - "learning_rate": 1.5444430342260016e-05, - "loss": 0.0526, - "step": 26920 - }, - { - "epoch": 0.6834623683208529, - "grad_norm": 1.007664442062378, - "learning_rate": 1.5443584211194315e-05, - "loss": 0.0869, - "step": 26925 - }, - { - "epoch": 0.6835892879807082, - "grad_norm": 0.5605146288871765, - "learning_rate": 1.5442738080128613e-05, - "loss": 0.0689, - "step": 26930 - }, - { - "epoch": 0.6837162076405635, - "grad_norm": 0.5746894478797913, - "learning_rate": 1.544189194906291e-05, - "loss": 0.0906, - "step": 26935 - }, - { - "epoch": 0.6838431273004189, - "grad_norm": 0.659233570098877, - "learning_rate": 1.544104581799721e-05, - "loss": 0.073, - "step": 26940 - }, - { - "epoch": 0.6839700469602742, - "grad_norm": 0.4516282379627228, - "learning_rate": 1.5440199686931508e-05, - "loss": 0.0735, - "step": 26945 - }, - { - "epoch": 0.6840969666201294, - "grad_norm": 0.6382076740264893, - "learning_rate": 1.5439353555865807e-05, - "loss": 0.076, - "step": 26950 - }, - { - "epoch": 0.6842238862799848, - "grad_norm": 1.1756584644317627, - "learning_rate": 1.5438507424800102e-05, - "loss": 0.088, - "step": 26955 - }, - { - "epoch": 0.6843508059398401, - "grad_norm": 0.6587080955505371, - "learning_rate": 1.54376612937344e-05, - "loss": 0.0733, - "step": 26960 - }, - { - "epoch": 0.6844777255996954, - "grad_norm": 1.1116844415664673, - "learning_rate": 1.54368151626687e-05, - "loss": 0.0702, - "step": 26965 - }, - { - "epoch": 0.6846046452595507, - "grad_norm": 0.41321566700935364, - "learning_rate": 1.5435969031602997e-05, - "loss": 0.0723, - "step": 26970 - }, - { - "epoch": 0.684731564919406, - "grad_norm": 1.0093282461166382, - "learning_rate": 1.5435122900537295e-05, - "loss": 0.0715, - "step": 26975 - }, - { - "epoch": 0.6848584845792614, - "grad_norm": 0.7936349511146545, - "learning_rate": 1.5434276769471594e-05, - "loss": 0.1036, - "step": 26980 - }, - { - "epoch": 0.6849854042391167, - "grad_norm": 0.6383551955223083, - "learning_rate": 1.5433430638405892e-05, - "loss": 0.0647, - "step": 26985 - }, - { - "epoch": 0.6851123238989719, - "grad_norm": 0.4351334571838379, - "learning_rate": 1.543258450734019e-05, - "loss": 0.104, - "step": 26990 - }, - { - "epoch": 0.6852392435588273, - "grad_norm": 0.5618158578872681, - "learning_rate": 1.5431738376274486e-05, - "loss": 0.0848, - "step": 26995 - }, - { - "epoch": 0.6853661632186826, - "grad_norm": 0.4769505560398102, - "learning_rate": 1.5430892245208784e-05, - "loss": 0.0686, - "step": 27000 - }, - { - "epoch": 0.6854930828785379, - "grad_norm": 0.5222181677818298, - "learning_rate": 1.5430046114143082e-05, - "loss": 0.0816, - "step": 27005 - }, - { - "epoch": 0.6856200025383932, - "grad_norm": 0.505913257598877, - "learning_rate": 1.542919998307738e-05, - "loss": 0.0762, - "step": 27010 - }, - { - "epoch": 0.6857469221982485, - "grad_norm": 0.8177339434623718, - "learning_rate": 1.5428353852011676e-05, - "loss": 0.1069, - "step": 27015 - }, - { - "epoch": 0.6858738418581038, - "grad_norm": 0.7317471504211426, - "learning_rate": 1.5427507720945974e-05, - "loss": 0.0614, - "step": 27020 - }, - { - "epoch": 0.6860007615179591, - "grad_norm": 0.5457929372787476, - "learning_rate": 1.5426661589880273e-05, - "loss": 0.0691, - "step": 27025 - }, - { - "epoch": 0.6861276811778144, - "grad_norm": 0.637144148349762, - "learning_rate": 1.542581545881457e-05, - "loss": 0.0608, - "step": 27030 - }, - { - "epoch": 0.6862546008376698, - "grad_norm": 0.6589449048042297, - "learning_rate": 1.542496932774887e-05, - "loss": 0.0848, - "step": 27035 - }, - { - "epoch": 0.6863815204975251, - "grad_norm": 0.5666413307189941, - "learning_rate": 1.5424123196683168e-05, - "loss": 0.0698, - "step": 27040 - }, - { - "epoch": 0.6865084401573803, - "grad_norm": 0.7854941487312317, - "learning_rate": 1.5423277065617466e-05, - "loss": 0.0945, - "step": 27045 - }, - { - "epoch": 0.6866353598172357, - "grad_norm": 1.1406168937683105, - "learning_rate": 1.5422430934551765e-05, - "loss": 0.0988, - "step": 27050 - }, - { - "epoch": 0.686762279477091, - "grad_norm": 0.4784201979637146, - "learning_rate": 1.542158480348606e-05, - "loss": 0.0843, - "step": 27055 - }, - { - "epoch": 0.6868891991369463, - "grad_norm": 1.050290822982788, - "learning_rate": 1.5420738672420358e-05, - "loss": 0.0827, - "step": 27060 - }, - { - "epoch": 0.6870161187968016, - "grad_norm": 0.554878294467926, - "learning_rate": 1.5419892541354656e-05, - "loss": 0.088, - "step": 27065 - }, - { - "epoch": 0.6871430384566569, - "grad_norm": 0.46095171570777893, - "learning_rate": 1.5419046410288955e-05, - "loss": 0.0968, - "step": 27070 - }, - { - "epoch": 0.6872699581165123, - "grad_norm": 1.5074362754821777, - "learning_rate": 1.5418200279223253e-05, - "loss": 0.0847, - "step": 27075 - }, - { - "epoch": 0.6873968777763676, - "grad_norm": 0.4605385959148407, - "learning_rate": 1.541735414815755e-05, - "loss": 0.0838, - "step": 27080 - }, - { - "epoch": 0.6875237974362228, - "grad_norm": 0.6429403424263, - "learning_rate": 1.541650801709185e-05, - "loss": 0.0803, - "step": 27085 - }, - { - "epoch": 0.6876507170960782, - "grad_norm": 0.5165254473686218, - "learning_rate": 1.541566188602615e-05, - "loss": 0.0843, - "step": 27090 - }, - { - "epoch": 0.6877776367559335, - "grad_norm": 1.546995997428894, - "learning_rate": 1.5414815754960443e-05, - "loss": 0.0763, - "step": 27095 - }, - { - "epoch": 0.6879045564157888, - "grad_norm": 0.3889789581298828, - "learning_rate": 1.5413969623894742e-05, - "loss": 0.063, - "step": 27100 - }, - { - "epoch": 0.6880314760756441, - "grad_norm": 0.9436810612678528, - "learning_rate": 1.541312349282904e-05, - "loss": 0.0954, - "step": 27105 - }, - { - "epoch": 0.6881583957354994, - "grad_norm": 0.9593027234077454, - "learning_rate": 1.541227736176334e-05, - "loss": 0.0676, - "step": 27110 - }, - { - "epoch": 0.6882853153953548, - "grad_norm": 0.5452870726585388, - "learning_rate": 1.5411431230697637e-05, - "loss": 0.0828, - "step": 27115 - }, - { - "epoch": 0.6884122350552101, - "grad_norm": 0.42732396721839905, - "learning_rate": 1.5410585099631935e-05, - "loss": 0.0783, - "step": 27120 - }, - { - "epoch": 0.6885391547150653, - "grad_norm": 0.3557831048965454, - "learning_rate": 1.5409738968566234e-05, - "loss": 0.085, - "step": 27125 - }, - { - "epoch": 0.6886660743749207, - "grad_norm": 0.6042054891586304, - "learning_rate": 1.5408892837500532e-05, - "loss": 0.1001, - "step": 27130 - }, - { - "epoch": 0.688792994034776, - "grad_norm": 0.5047823786735535, - "learning_rate": 1.5408046706434827e-05, - "loss": 0.0768, - "step": 27135 - }, - { - "epoch": 0.6889199136946313, - "grad_norm": 0.423196941614151, - "learning_rate": 1.5407200575369126e-05, - "loss": 0.0661, - "step": 27140 - }, - { - "epoch": 0.6890468333544866, - "grad_norm": 0.5752532482147217, - "learning_rate": 1.5406354444303424e-05, - "loss": 0.0832, - "step": 27145 - }, - { - "epoch": 0.6891737530143419, - "grad_norm": 0.4292982816696167, - "learning_rate": 1.5405508313237722e-05, - "loss": 0.0764, - "step": 27150 - }, - { - "epoch": 0.6893006726741973, - "grad_norm": 0.4962009787559509, - "learning_rate": 1.5404662182172017e-05, - "loss": 0.0785, - "step": 27155 - }, - { - "epoch": 0.6894275923340526, - "grad_norm": 0.43963858485221863, - "learning_rate": 1.5403816051106316e-05, - "loss": 0.0711, - "step": 27160 - }, - { - "epoch": 0.6895545119939078, - "grad_norm": 0.4443722367286682, - "learning_rate": 1.5402969920040614e-05, - "loss": 0.0769, - "step": 27165 - }, - { - "epoch": 0.6896814316537632, - "grad_norm": 0.7178105711936951, - "learning_rate": 1.5402123788974913e-05, - "loss": 0.0711, - "step": 27170 - }, - { - "epoch": 0.6898083513136185, - "grad_norm": 0.4952721893787384, - "learning_rate": 1.540127765790921e-05, - "loss": 0.0792, - "step": 27175 - }, - { - "epoch": 0.6899352709734738, - "grad_norm": 0.5811629295349121, - "learning_rate": 1.540043152684351e-05, - "loss": 0.0806, - "step": 27180 - }, - { - "epoch": 0.6900621906333291, - "grad_norm": 1.1052091121673584, - "learning_rate": 1.5399585395777808e-05, - "loss": 0.089, - "step": 27185 - }, - { - "epoch": 0.6901891102931844, - "grad_norm": 0.6296457052230835, - "learning_rate": 1.5398739264712106e-05, - "loss": 0.0675, - "step": 27190 - }, - { - "epoch": 0.6903160299530398, - "grad_norm": 0.46708402037620544, - "learning_rate": 1.53978931336464e-05, - "loss": 0.0662, - "step": 27195 - }, - { - "epoch": 0.690442949612895, - "grad_norm": 0.5535792112350464, - "learning_rate": 1.53970470025807e-05, - "loss": 0.1043, - "step": 27200 - }, - { - "epoch": 0.6905698692727503, - "grad_norm": 0.5646365284919739, - "learning_rate": 1.5396200871514998e-05, - "loss": 0.0683, - "step": 27205 - }, - { - "epoch": 0.6906967889326057, - "grad_norm": 0.3740396797657013, - "learning_rate": 1.5395354740449297e-05, - "loss": 0.0624, - "step": 27210 - }, - { - "epoch": 0.690823708592461, - "grad_norm": 0.524610161781311, - "learning_rate": 1.5394508609383595e-05, - "loss": 0.0473, - "step": 27215 - }, - { - "epoch": 0.6909506282523162, - "grad_norm": 0.6363856196403503, - "learning_rate": 1.5393662478317893e-05, - "loss": 0.0666, - "step": 27220 - }, - { - "epoch": 0.6910775479121716, - "grad_norm": 0.5802817940711975, - "learning_rate": 1.5392816347252192e-05, - "loss": 0.0804, - "step": 27225 - }, - { - "epoch": 0.6912044675720269, - "grad_norm": 1.0503548383712769, - "learning_rate": 1.539197021618649e-05, - "loss": 0.0656, - "step": 27230 - }, - { - "epoch": 0.6913313872318823, - "grad_norm": 0.39139628410339355, - "learning_rate": 1.5391124085120785e-05, - "loss": 0.0769, - "step": 27235 - }, - { - "epoch": 0.6914583068917375, - "grad_norm": 0.6149697303771973, - "learning_rate": 1.5390277954055084e-05, - "loss": 0.0691, - "step": 27240 - }, - { - "epoch": 0.6915852265515928, - "grad_norm": 1.04027259349823, - "learning_rate": 1.5389431822989382e-05, - "loss": 0.0672, - "step": 27245 - }, - { - "epoch": 0.6917121462114482, - "grad_norm": 0.49079740047454834, - "learning_rate": 1.538858569192368e-05, - "loss": 0.0688, - "step": 27250 - }, - { - "epoch": 0.6918390658713035, - "grad_norm": 1.2650761604309082, - "learning_rate": 1.538773956085798e-05, - "loss": 0.0519, - "step": 27255 - }, - { - "epoch": 0.6919659855311587, - "grad_norm": 0.5315563082695007, - "learning_rate": 1.5386893429792277e-05, - "loss": 0.0725, - "step": 27260 - }, - { - "epoch": 0.6920929051910141, - "grad_norm": 0.5459601283073425, - "learning_rate": 1.5386047298726576e-05, - "loss": 0.0704, - "step": 27265 - }, - { - "epoch": 0.6922198248508694, - "grad_norm": 0.508229672908783, - "learning_rate": 1.5385201167660874e-05, - "loss": 0.0581, - "step": 27270 - }, - { - "epoch": 0.6923467445107248, - "grad_norm": 0.4915468096733093, - "learning_rate": 1.538435503659517e-05, - "loss": 0.0915, - "step": 27275 - }, - { - "epoch": 0.69247366417058, - "grad_norm": 0.6359374523162842, - "learning_rate": 1.5383508905529467e-05, - "loss": 0.0808, - "step": 27280 - }, - { - "epoch": 0.6926005838304353, - "grad_norm": 0.4112793505191803, - "learning_rate": 1.5382662774463766e-05, - "loss": 0.091, - "step": 27285 - }, - { - "epoch": 0.6927275034902907, - "grad_norm": 0.5018948316574097, - "learning_rate": 1.5381816643398064e-05, - "loss": 0.1014, - "step": 27290 - }, - { - "epoch": 0.692854423150146, - "grad_norm": 0.48221123218536377, - "learning_rate": 1.538097051233236e-05, - "loss": 0.0723, - "step": 27295 - }, - { - "epoch": 0.6929813428100012, - "grad_norm": 0.5554606318473816, - "learning_rate": 1.5380124381266658e-05, - "loss": 0.0689, - "step": 27300 - }, - { - "epoch": 0.6931082624698566, - "grad_norm": 0.6132370233535767, - "learning_rate": 1.5379278250200956e-05, - "loss": 0.1111, - "step": 27305 - }, - { - "epoch": 0.6932351821297119, - "grad_norm": 0.5675580501556396, - "learning_rate": 1.5378432119135254e-05, - "loss": 0.0756, - "step": 27310 - }, - { - "epoch": 0.6933621017895673, - "grad_norm": 0.78398597240448, - "learning_rate": 1.5377585988069553e-05, - "loss": 0.0664, - "step": 27315 - }, - { - "epoch": 0.6934890214494225, - "grad_norm": 0.4223746657371521, - "learning_rate": 1.537673985700385e-05, - "loss": 0.0618, - "step": 27320 - }, - { - "epoch": 0.6936159411092778, - "grad_norm": 0.5792942047119141, - "learning_rate": 1.537589372593815e-05, - "loss": 0.0786, - "step": 27325 - }, - { - "epoch": 0.6937428607691332, - "grad_norm": 0.4044418931007385, - "learning_rate": 1.5375047594872448e-05, - "loss": 0.0665, - "step": 27330 - }, - { - "epoch": 0.6938697804289885, - "grad_norm": 0.640800416469574, - "learning_rate": 1.5374201463806743e-05, - "loss": 0.1031, - "step": 27335 - }, - { - "epoch": 0.6939967000888437, - "grad_norm": 0.49217018485069275, - "learning_rate": 1.537335533274104e-05, - "loss": 0.0762, - "step": 27340 - }, - { - "epoch": 0.6941236197486991, - "grad_norm": 0.557205319404602, - "learning_rate": 1.537250920167534e-05, - "loss": 0.083, - "step": 27345 - }, - { - "epoch": 0.6942505394085544, - "grad_norm": 0.8593919277191162, - "learning_rate": 1.5371663070609638e-05, - "loss": 0.0816, - "step": 27350 - }, - { - "epoch": 0.6943774590684096, - "grad_norm": 0.3901413083076477, - "learning_rate": 1.5370816939543937e-05, - "loss": 0.0725, - "step": 27355 - }, - { - "epoch": 0.694504378728265, - "grad_norm": 0.6139826774597168, - "learning_rate": 1.5369970808478235e-05, - "loss": 0.1, - "step": 27360 - }, - { - "epoch": 0.6946312983881203, - "grad_norm": 0.5992996692657471, - "learning_rate": 1.5369124677412533e-05, - "loss": 0.0703, - "step": 27365 - }, - { - "epoch": 0.6947582180479757, - "grad_norm": 0.6318697929382324, - "learning_rate": 1.5368278546346832e-05, - "loss": 0.0741, - "step": 27370 - }, - { - "epoch": 0.6948851377078309, - "grad_norm": 0.5115883350372314, - "learning_rate": 1.536743241528113e-05, - "loss": 0.085, - "step": 27375 - }, - { - "epoch": 0.6950120573676862, - "grad_norm": 0.4749890863895416, - "learning_rate": 1.5366586284215425e-05, - "loss": 0.0995, - "step": 27380 - }, - { - "epoch": 0.6951389770275416, - "grad_norm": 1.0869851112365723, - "learning_rate": 1.5365740153149724e-05, - "loss": 0.0936, - "step": 27385 - }, - { - "epoch": 0.6952658966873969, - "grad_norm": 0.4529355466365814, - "learning_rate": 1.5364894022084022e-05, - "loss": 0.0775, - "step": 27390 - }, - { - "epoch": 0.6953928163472521, - "grad_norm": 0.640028178691864, - "learning_rate": 1.536404789101832e-05, - "loss": 0.088, - "step": 27395 - }, - { - "epoch": 0.6955197360071075, - "grad_norm": 0.5919060707092285, - "learning_rate": 1.536320175995262e-05, - "loss": 0.084, - "step": 27400 - }, - { - "epoch": 0.6956466556669628, - "grad_norm": 0.5808092951774597, - "learning_rate": 1.5362355628886917e-05, - "loss": 0.0745, - "step": 27405 - }, - { - "epoch": 0.6957735753268182, - "grad_norm": 0.9744669198989868, - "learning_rate": 1.5361509497821216e-05, - "loss": 0.0906, - "step": 27410 - }, - { - "epoch": 0.6959004949866734, - "grad_norm": 0.5081299543380737, - "learning_rate": 1.5360663366755514e-05, - "loss": 0.0751, - "step": 27415 - }, - { - "epoch": 0.6960274146465287, - "grad_norm": 0.4710322320461273, - "learning_rate": 1.535981723568981e-05, - "loss": 0.075, - "step": 27420 - }, - { - "epoch": 0.6961543343063841, - "grad_norm": 0.794064462184906, - "learning_rate": 1.5358971104624107e-05, - "loss": 0.0878, - "step": 27425 - }, - { - "epoch": 0.6962812539662394, - "grad_norm": 0.4188995659351349, - "learning_rate": 1.5358124973558406e-05, - "loss": 0.0816, - "step": 27430 - }, - { - "epoch": 0.6964081736260946, - "grad_norm": 0.3926332890987396, - "learning_rate": 1.5357278842492704e-05, - "loss": 0.0628, - "step": 27435 - }, - { - "epoch": 0.69653509328595, - "grad_norm": 0.6087777018547058, - "learning_rate": 1.5356432711427e-05, - "loss": 0.0704, - "step": 27440 - }, - { - "epoch": 0.6966620129458053, - "grad_norm": 0.5038256645202637, - "learning_rate": 1.5355586580361298e-05, - "loss": 0.0611, - "step": 27445 - }, - { - "epoch": 0.6967889326056607, - "grad_norm": 0.4087475538253784, - "learning_rate": 1.5354740449295596e-05, - "loss": 0.0841, - "step": 27450 - }, - { - "epoch": 0.6969158522655159, - "grad_norm": 0.5893786549568176, - "learning_rate": 1.5353894318229894e-05, - "loss": 0.0803, - "step": 27455 - }, - { - "epoch": 0.6970427719253712, - "grad_norm": 0.561238169670105, - "learning_rate": 1.5353048187164193e-05, - "loss": 0.0804, - "step": 27460 - }, - { - "epoch": 0.6971696915852266, - "grad_norm": 0.6430284380912781, - "learning_rate": 1.535220205609849e-05, - "loss": 0.0802, - "step": 27465 - }, - { - "epoch": 0.6972966112450819, - "grad_norm": 0.8209748864173889, - "learning_rate": 1.535135592503279e-05, - "loss": 0.0727, - "step": 27470 - }, - { - "epoch": 0.6974235309049371, - "grad_norm": 0.6435092091560364, - "learning_rate": 1.5350509793967088e-05, - "loss": 0.0839, - "step": 27475 - }, - { - "epoch": 0.6975504505647925, - "grad_norm": 1.494761347770691, - "learning_rate": 1.5349663662901383e-05, - "loss": 0.0909, - "step": 27480 - }, - { - "epoch": 0.6976773702246478, - "grad_norm": 0.3407941162586212, - "learning_rate": 1.534881753183568e-05, - "loss": 0.0625, - "step": 27485 - }, - { - "epoch": 0.6978042898845032, - "grad_norm": 0.4978332221508026, - "learning_rate": 1.534797140076998e-05, - "loss": 0.0755, - "step": 27490 - }, - { - "epoch": 0.6979312095443584, - "grad_norm": 0.35904064774513245, - "learning_rate": 1.534712526970428e-05, - "loss": 0.0555, - "step": 27495 - }, - { - "epoch": 0.6980581292042137, - "grad_norm": 1.5096981525421143, - "learning_rate": 1.5346279138638577e-05, - "loss": 0.0784, - "step": 27500 - }, - { - "epoch": 0.6981850488640691, - "grad_norm": 0.4218047857284546, - "learning_rate": 1.5345433007572875e-05, - "loss": 0.0816, - "step": 27505 - }, - { - "epoch": 0.6983119685239244, - "grad_norm": 0.5196631550788879, - "learning_rate": 1.5344586876507174e-05, - "loss": 0.0747, - "step": 27510 - }, - { - "epoch": 0.6984388881837796, - "grad_norm": 0.9191346168518066, - "learning_rate": 1.5343740745441472e-05, - "loss": 0.0749, - "step": 27515 - }, - { - "epoch": 0.698565807843635, - "grad_norm": 0.38459739089012146, - "learning_rate": 1.5342894614375767e-05, - "loss": 0.0786, - "step": 27520 - }, - { - "epoch": 0.6986927275034903, - "grad_norm": 0.9205303192138672, - "learning_rate": 1.5342048483310065e-05, - "loss": 0.0884, - "step": 27525 - }, - { - "epoch": 0.6988196471633455, - "grad_norm": 0.5492190718650818, - "learning_rate": 1.5341202352244364e-05, - "loss": 0.0902, - "step": 27530 - }, - { - "epoch": 0.6989465668232009, - "grad_norm": 0.5391821265220642, - "learning_rate": 1.5340356221178662e-05, - "loss": 0.0724, - "step": 27535 - }, - { - "epoch": 0.6990734864830562, - "grad_norm": 0.48393115401268005, - "learning_rate": 1.533951009011296e-05, - "loss": 0.0825, - "step": 27540 - }, - { - "epoch": 0.6992004061429116, - "grad_norm": 0.5654968619346619, - "learning_rate": 1.533866395904726e-05, - "loss": 0.0822, - "step": 27545 - }, - { - "epoch": 0.6993273258027668, - "grad_norm": 0.541138231754303, - "learning_rate": 1.5337817827981557e-05, - "loss": 0.074, - "step": 27550 - }, - { - "epoch": 0.6994542454626221, - "grad_norm": 0.4121513366699219, - "learning_rate": 1.5336971696915856e-05, - "loss": 0.0784, - "step": 27555 - }, - { - "epoch": 0.6995811651224775, - "grad_norm": 0.4711996018886566, - "learning_rate": 1.533612556585015e-05, - "loss": 0.0656, - "step": 27560 - }, - { - "epoch": 0.6997080847823328, - "grad_norm": 0.7021657228469849, - "learning_rate": 1.533527943478445e-05, - "loss": 0.0805, - "step": 27565 - }, - { - "epoch": 0.699835004442188, - "grad_norm": 0.4633168876171112, - "learning_rate": 1.5334433303718748e-05, - "loss": 0.067, - "step": 27570 - }, - { - "epoch": 0.6999619241020434, - "grad_norm": 0.6200512051582336, - "learning_rate": 1.5333587172653046e-05, - "loss": 0.0626, - "step": 27575 - }, - { - "epoch": 0.7000888437618987, - "grad_norm": 0.46453073620796204, - "learning_rate": 1.533274104158734e-05, - "loss": 0.0693, - "step": 27580 - }, - { - "epoch": 0.7002157634217541, - "grad_norm": 0.7096994519233704, - "learning_rate": 1.533189491052164e-05, - "loss": 0.0615, - "step": 27585 - }, - { - "epoch": 0.7003426830816093, - "grad_norm": 0.6142633557319641, - "learning_rate": 1.5331048779455938e-05, - "loss": 0.0768, - "step": 27590 - }, - { - "epoch": 0.7004696027414646, - "grad_norm": 0.6474071741104126, - "learning_rate": 1.5330202648390236e-05, - "loss": 0.0713, - "step": 27595 - }, - { - "epoch": 0.70059652240132, - "grad_norm": 0.4260212779045105, - "learning_rate": 1.5329356517324535e-05, - "loss": 0.0596, - "step": 27600 - }, - { - "epoch": 0.7007234420611753, - "grad_norm": 0.5518221855163574, - "learning_rate": 1.5328510386258833e-05, - "loss": 0.0762, - "step": 27605 - }, - { - "epoch": 0.7008503617210305, - "grad_norm": 0.5463601350784302, - "learning_rate": 1.532766425519313e-05, - "loss": 0.073, - "step": 27610 - }, - { - "epoch": 0.7009772813808859, - "grad_norm": 0.7637608647346497, - "learning_rate": 1.532681812412743e-05, - "loss": 0.0942, - "step": 27615 - }, - { - "epoch": 0.7011042010407412, - "grad_norm": 0.9360052347183228, - "learning_rate": 1.5325971993061725e-05, - "loss": 0.0849, - "step": 27620 - }, - { - "epoch": 0.7012311207005966, - "grad_norm": 0.475504606962204, - "learning_rate": 1.5325125861996023e-05, - "loss": 0.0733, - "step": 27625 - }, - { - "epoch": 0.7013580403604518, - "grad_norm": 0.604937732219696, - "learning_rate": 1.532427973093032e-05, - "loss": 0.0953, - "step": 27630 - }, - { - "epoch": 0.7014849600203071, - "grad_norm": 0.4882931411266327, - "learning_rate": 1.532343359986462e-05, - "loss": 0.0916, - "step": 27635 - }, - { - "epoch": 0.7016118796801625, - "grad_norm": 0.5691571831703186, - "learning_rate": 1.532258746879892e-05, - "loss": 0.0683, - "step": 27640 - }, - { - "epoch": 0.7017387993400178, - "grad_norm": 0.4691522419452667, - "learning_rate": 1.5321741337733217e-05, - "loss": 0.0949, - "step": 27645 - }, - { - "epoch": 0.701865718999873, - "grad_norm": 0.6025875210762024, - "learning_rate": 1.5320895206667515e-05, - "loss": 0.0702, - "step": 27650 - }, - { - "epoch": 0.7019926386597284, - "grad_norm": 0.46353679895401, - "learning_rate": 1.5320049075601814e-05, - "loss": 0.0616, - "step": 27655 - }, - { - "epoch": 0.7021195583195837, - "grad_norm": 0.46556511521339417, - "learning_rate": 1.531920294453611e-05, - "loss": 0.0838, - "step": 27660 - }, - { - "epoch": 0.7022464779794391, - "grad_norm": 1.5853947401046753, - "learning_rate": 1.5318356813470407e-05, - "loss": 0.1022, - "step": 27665 - }, - { - "epoch": 0.7023733976392943, - "grad_norm": 0.47695836424827576, - "learning_rate": 1.5317510682404705e-05, - "loss": 0.0647, - "step": 27670 - }, - { - "epoch": 0.7025003172991496, - "grad_norm": 1.0827410221099854, - "learning_rate": 1.5316664551339004e-05, - "loss": 0.0872, - "step": 27675 - }, - { - "epoch": 0.702627236959005, - "grad_norm": 0.8112504482269287, - "learning_rate": 1.5315818420273302e-05, - "loss": 0.0867, - "step": 27680 - }, - { - "epoch": 0.7027541566188603, - "grad_norm": 0.5159047245979309, - "learning_rate": 1.53149722892076e-05, - "loss": 0.0802, - "step": 27685 - }, - { - "epoch": 0.7028810762787155, - "grad_norm": 0.5715064406394958, - "learning_rate": 1.53141261581419e-05, - "loss": 0.0719, - "step": 27690 - }, - { - "epoch": 0.7030079959385709, - "grad_norm": 0.9808611869812012, - "learning_rate": 1.5313280027076197e-05, - "loss": 0.0675, - "step": 27695 - }, - { - "epoch": 0.7031349155984262, - "grad_norm": 0.3741804361343384, - "learning_rate": 1.5312433896010492e-05, - "loss": 0.0603, - "step": 27700 - }, - { - "epoch": 0.7032618352582815, - "grad_norm": 0.4923637807369232, - "learning_rate": 1.531158776494479e-05, - "loss": 0.081, - "step": 27705 - }, - { - "epoch": 0.7033887549181368, - "grad_norm": 0.6602376699447632, - "learning_rate": 1.531074163387909e-05, - "loss": 0.0866, - "step": 27710 - }, - { - "epoch": 0.7035156745779921, - "grad_norm": 0.6769668459892273, - "learning_rate": 1.5309895502813388e-05, - "loss": 0.0779, - "step": 27715 - }, - { - "epoch": 0.7036425942378475, - "grad_norm": 0.6130188703536987, - "learning_rate": 1.5309049371747683e-05, - "loss": 0.0771, - "step": 27720 - }, - { - "epoch": 0.7037695138977027, - "grad_norm": 0.6500875353813171, - "learning_rate": 1.530820324068198e-05, - "loss": 0.0665, - "step": 27725 - }, - { - "epoch": 0.703896433557558, - "grad_norm": 1.3650981187820435, - "learning_rate": 1.530735710961628e-05, - "loss": 0.1062, - "step": 27730 - }, - { - "epoch": 0.7040233532174134, - "grad_norm": 0.4946935474872589, - "learning_rate": 1.5306510978550578e-05, - "loss": 0.0776, - "step": 27735 - }, - { - "epoch": 0.7041502728772687, - "grad_norm": 0.60744708776474, - "learning_rate": 1.5305664847484876e-05, - "loss": 0.1041, - "step": 27740 - }, - { - "epoch": 0.704277192537124, - "grad_norm": 0.53304123878479, - "learning_rate": 1.5304818716419175e-05, - "loss": 0.074, - "step": 27745 - }, - { - "epoch": 0.7044041121969793, - "grad_norm": 0.5886366963386536, - "learning_rate": 1.5303972585353473e-05, - "loss": 0.0607, - "step": 27750 - }, - { - "epoch": 0.7045310318568346, - "grad_norm": 1.68309485912323, - "learning_rate": 1.530312645428777e-05, - "loss": 0.0653, - "step": 27755 - }, - { - "epoch": 0.70465795151669, - "grad_norm": 1.1175413131713867, - "learning_rate": 1.5302280323222067e-05, - "loss": 0.0817, - "step": 27760 - }, - { - "epoch": 0.7047848711765452, - "grad_norm": 0.49793750047683716, - "learning_rate": 1.5301434192156365e-05, - "loss": 0.0826, - "step": 27765 - }, - { - "epoch": 0.7049117908364005, - "grad_norm": 0.48911744356155396, - "learning_rate": 1.5300588061090663e-05, - "loss": 0.0626, - "step": 27770 - }, - { - "epoch": 0.7050387104962559, - "grad_norm": 0.38664427399635315, - "learning_rate": 1.5299741930024962e-05, - "loss": 0.0681, - "step": 27775 - }, - { - "epoch": 0.7051656301561112, - "grad_norm": 0.45361289381980896, - "learning_rate": 1.529889579895926e-05, - "loss": 0.0752, - "step": 27780 - }, - { - "epoch": 0.7052925498159665, - "grad_norm": 0.4490770995616913, - "learning_rate": 1.529804966789356e-05, - "loss": 0.0695, - "step": 27785 - }, - { - "epoch": 0.7054194694758218, - "grad_norm": 0.6481912732124329, - "learning_rate": 1.5297203536827857e-05, - "loss": 0.0677, - "step": 27790 - }, - { - "epoch": 0.7055463891356771, - "grad_norm": 0.48350808024406433, - "learning_rate": 1.5296357405762155e-05, - "loss": 0.0728, - "step": 27795 - }, - { - "epoch": 0.7056733087955325, - "grad_norm": 0.8417527079582214, - "learning_rate": 1.529551127469645e-05, - "loss": 0.0723, - "step": 27800 - }, - { - "epoch": 0.7058002284553877, - "grad_norm": 0.5390920042991638, - "learning_rate": 1.529466514363075e-05, - "loss": 0.0813, - "step": 27805 - }, - { - "epoch": 0.705927148115243, - "grad_norm": 0.545564591884613, - "learning_rate": 1.5293819012565047e-05, - "loss": 0.0736, - "step": 27810 - }, - { - "epoch": 0.7060540677750984, - "grad_norm": 0.45539212226867676, - "learning_rate": 1.5292972881499346e-05, - "loss": 0.0792, - "step": 27815 - }, - { - "epoch": 0.7061809874349537, - "grad_norm": 0.34631767868995667, - "learning_rate": 1.5292126750433644e-05, - "loss": 0.0836, - "step": 27820 - }, - { - "epoch": 0.706307907094809, - "grad_norm": 0.820807158946991, - "learning_rate": 1.5291280619367942e-05, - "loss": 0.103, - "step": 27825 - }, - { - "epoch": 0.7064348267546643, - "grad_norm": 1.1574835777282715, - "learning_rate": 1.529043448830224e-05, - "loss": 0.0756, - "step": 27830 - }, - { - "epoch": 0.7065617464145196, - "grad_norm": 0.4214993417263031, - "learning_rate": 1.528958835723654e-05, - "loss": 0.0787, - "step": 27835 - }, - { - "epoch": 0.706688666074375, - "grad_norm": 1.0246186256408691, - "learning_rate": 1.5288742226170834e-05, - "loss": 0.0788, - "step": 27840 - }, - { - "epoch": 0.7068155857342302, - "grad_norm": 0.5138680934906006, - "learning_rate": 1.5287896095105133e-05, - "loss": 0.0868, - "step": 27845 - }, - { - "epoch": 0.7069425053940855, - "grad_norm": 0.5278159976005554, - "learning_rate": 1.528704996403943e-05, - "loss": 0.0679, - "step": 27850 - }, - { - "epoch": 0.7070694250539409, - "grad_norm": 0.47480061650276184, - "learning_rate": 1.528620383297373e-05, - "loss": 0.0881, - "step": 27855 - }, - { - "epoch": 0.7071963447137962, - "grad_norm": 1.7669185400009155, - "learning_rate": 1.5285357701908024e-05, - "loss": 0.0736, - "step": 27860 - }, - { - "epoch": 0.7073232643736515, - "grad_norm": 0.5160654187202454, - "learning_rate": 1.5284511570842323e-05, - "loss": 0.078, - "step": 27865 - }, - { - "epoch": 0.7074501840335068, - "grad_norm": 0.4577394127845764, - "learning_rate": 1.528366543977662e-05, - "loss": 0.0779, - "step": 27870 - }, - { - "epoch": 0.7075771036933621, - "grad_norm": 0.6169967651367188, - "learning_rate": 1.528281930871092e-05, - "loss": 0.0772, - "step": 27875 - }, - { - "epoch": 0.7077040233532174, - "grad_norm": 0.6163668036460876, - "learning_rate": 1.5281973177645218e-05, - "loss": 0.0796, - "step": 27880 - }, - { - "epoch": 0.7078309430130727, - "grad_norm": 0.7428560853004456, - "learning_rate": 1.5281127046579516e-05, - "loss": 0.0822, - "step": 27885 - }, - { - "epoch": 0.707957862672928, - "grad_norm": 0.6917024254798889, - "learning_rate": 1.5280280915513815e-05, - "loss": 0.0908, - "step": 27890 - }, - { - "epoch": 0.7080847823327834, - "grad_norm": 0.43397194147109985, - "learning_rate": 1.5279434784448113e-05, - "loss": 0.0547, - "step": 27895 - }, - { - "epoch": 0.7082117019926386, - "grad_norm": 3.008948564529419, - "learning_rate": 1.527858865338241e-05, - "loss": 0.0933, - "step": 27900 - }, - { - "epoch": 0.708338621652494, - "grad_norm": 0.4439755082130432, - "learning_rate": 1.5277742522316707e-05, - "loss": 0.0582, - "step": 27905 - }, - { - "epoch": 0.7084655413123493, - "grad_norm": 0.460104763507843, - "learning_rate": 1.5276896391251005e-05, - "loss": 0.0661, - "step": 27910 - }, - { - "epoch": 0.7085924609722046, - "grad_norm": 0.6237339377403259, - "learning_rate": 1.5276050260185303e-05, - "loss": 0.0659, - "step": 27915 - }, - { - "epoch": 0.7087193806320599, - "grad_norm": 0.4575790464878082, - "learning_rate": 1.5275204129119602e-05, - "loss": 0.0602, - "step": 27920 - }, - { - "epoch": 0.7088463002919152, - "grad_norm": 0.48309561610221863, - "learning_rate": 1.52743579980539e-05, - "loss": 0.0708, - "step": 27925 - }, - { - "epoch": 0.7089732199517705, - "grad_norm": 0.8328773379325867, - "learning_rate": 1.52735118669882e-05, - "loss": 0.0781, - "step": 27930 - }, - { - "epoch": 0.7091001396116259, - "grad_norm": 0.8798452019691467, - "learning_rate": 1.5272665735922497e-05, - "loss": 0.0761, - "step": 27935 - }, - { - "epoch": 0.7092270592714811, - "grad_norm": 1.031124234199524, - "learning_rate": 1.5271819604856795e-05, - "loss": 0.0654, - "step": 27940 - }, - { - "epoch": 0.7093539789313364, - "grad_norm": 0.5144909620285034, - "learning_rate": 1.527097347379109e-05, - "loss": 0.0827, - "step": 27945 - }, - { - "epoch": 0.7094808985911918, - "grad_norm": 0.5110494494438171, - "learning_rate": 1.527012734272539e-05, - "loss": 0.0673, - "step": 27950 - }, - { - "epoch": 0.7096078182510471, - "grad_norm": 0.5666137337684631, - "learning_rate": 1.5269281211659687e-05, - "loss": 0.0748, - "step": 27955 - }, - { - "epoch": 0.7097347379109024, - "grad_norm": 0.9650017023086548, - "learning_rate": 1.5268435080593986e-05, - "loss": 0.0668, - "step": 27960 - }, - { - "epoch": 0.7098616575707577, - "grad_norm": 0.4008505046367645, - "learning_rate": 1.5267588949528284e-05, - "loss": 0.0668, - "step": 27965 - }, - { - "epoch": 0.709988577230613, - "grad_norm": 0.5589998960494995, - "learning_rate": 1.5266742818462582e-05, - "loss": 0.0644, - "step": 27970 - }, - { - "epoch": 0.7101154968904684, - "grad_norm": 0.478901743888855, - "learning_rate": 1.526589668739688e-05, - "loss": 0.0688, - "step": 27975 - }, - { - "epoch": 0.7102424165503236, - "grad_norm": 0.768817663192749, - "learning_rate": 1.526505055633118e-05, - "loss": 0.0815, - "step": 27980 - }, - { - "epoch": 0.710369336210179, - "grad_norm": 0.4496302604675293, - "learning_rate": 1.5264204425265474e-05, - "loss": 0.0589, - "step": 27985 - }, - { - "epoch": 0.7104962558700343, - "grad_norm": 0.39842140674591064, - "learning_rate": 1.5263358294199773e-05, - "loss": 0.0707, - "step": 27990 - }, - { - "epoch": 0.7106231755298896, - "grad_norm": 0.42673251032829285, - "learning_rate": 1.526251216313407e-05, - "loss": 0.0734, - "step": 27995 - }, - { - "epoch": 0.7107500951897449, - "grad_norm": 0.33541738986968994, - "learning_rate": 1.526166603206837e-05, - "loss": 0.0677, - "step": 28000 - }, - { - "epoch": 0.7108770148496002, - "grad_norm": 0.4739742875099182, - "learning_rate": 1.5260819901002664e-05, - "loss": 0.0658, - "step": 28005 - }, - { - "epoch": 0.7110039345094555, - "grad_norm": 0.5941132307052612, - "learning_rate": 1.5259973769936963e-05, - "loss": 0.0853, - "step": 28010 - }, - { - "epoch": 0.7111308541693109, - "grad_norm": 0.4054299294948578, - "learning_rate": 1.525912763887126e-05, - "loss": 0.0684, - "step": 28015 - }, - { - "epoch": 0.7112577738291661, - "grad_norm": 0.5869559049606323, - "learning_rate": 1.5258281507805561e-05, - "loss": 0.0846, - "step": 28020 - }, - { - "epoch": 0.7113846934890214, - "grad_norm": 0.4678943157196045, - "learning_rate": 1.5257435376739858e-05, - "loss": 0.0687, - "step": 28025 - }, - { - "epoch": 0.7115116131488768, - "grad_norm": 0.39284002780914307, - "learning_rate": 1.5256589245674157e-05, - "loss": 0.0691, - "step": 28030 - }, - { - "epoch": 0.7116385328087321, - "grad_norm": 0.6832972764968872, - "learning_rate": 1.5255743114608455e-05, - "loss": 0.0812, - "step": 28035 - }, - { - "epoch": 0.7117654524685874, - "grad_norm": 0.6693100929260254, - "learning_rate": 1.5254896983542753e-05, - "loss": 0.0651, - "step": 28040 - }, - { - "epoch": 0.7118923721284427, - "grad_norm": 0.510979413986206, - "learning_rate": 1.525405085247705e-05, - "loss": 0.0734, - "step": 28045 - }, - { - "epoch": 0.712019291788298, - "grad_norm": 0.409019410610199, - "learning_rate": 1.5253204721411348e-05, - "loss": 0.0639, - "step": 28050 - }, - { - "epoch": 0.7121462114481533, - "grad_norm": 0.435287743806839, - "learning_rate": 1.5252358590345647e-05, - "loss": 0.0744, - "step": 28055 - }, - { - "epoch": 0.7122731311080086, - "grad_norm": 0.527946412563324, - "learning_rate": 1.5251512459279945e-05, - "loss": 0.0733, - "step": 28060 - }, - { - "epoch": 0.712400050767864, - "grad_norm": 0.9571462869644165, - "learning_rate": 1.525066632821424e-05, - "loss": 0.0902, - "step": 28065 - }, - { - "epoch": 0.7125269704277193, - "grad_norm": 0.48520857095718384, - "learning_rate": 1.5249820197148539e-05, - "loss": 0.0742, - "step": 28070 - }, - { - "epoch": 0.7126538900875745, - "grad_norm": 0.5645105242729187, - "learning_rate": 1.5248974066082837e-05, - "loss": 0.0744, - "step": 28075 - }, - { - "epoch": 0.7127808097474299, - "grad_norm": 0.6595269441604614, - "learning_rate": 1.5248127935017135e-05, - "loss": 0.0914, - "step": 28080 - }, - { - "epoch": 0.7129077294072852, - "grad_norm": 0.5216467380523682, - "learning_rate": 1.5247281803951432e-05, - "loss": 0.0937, - "step": 28085 - }, - { - "epoch": 0.7130346490671405, - "grad_norm": 0.549930989742279, - "learning_rate": 1.524643567288573e-05, - "loss": 0.0935, - "step": 28090 - }, - { - "epoch": 0.7131615687269958, - "grad_norm": 0.7253485918045044, - "learning_rate": 1.5245589541820029e-05, - "loss": 0.108, - "step": 28095 - }, - { - "epoch": 0.7132884883868511, - "grad_norm": 0.4891577661037445, - "learning_rate": 1.5244743410754327e-05, - "loss": 0.0639, - "step": 28100 - }, - { - "epoch": 0.7134154080467064, - "grad_norm": 0.6700971722602844, - "learning_rate": 1.5243897279688624e-05, - "loss": 0.0672, - "step": 28105 - }, - { - "epoch": 0.7135423277065618, - "grad_norm": 0.5404808521270752, - "learning_rate": 1.5243051148622922e-05, - "loss": 0.0725, - "step": 28110 - }, - { - "epoch": 0.713669247366417, - "grad_norm": 0.7661257386207581, - "learning_rate": 1.5242205017557221e-05, - "loss": 0.0758, - "step": 28115 - }, - { - "epoch": 0.7137961670262724, - "grad_norm": 0.6056308746337891, - "learning_rate": 1.524135888649152e-05, - "loss": 0.0724, - "step": 28120 - }, - { - "epoch": 0.7139230866861277, - "grad_norm": 0.5804558992385864, - "learning_rate": 1.5240512755425816e-05, - "loss": 0.0938, - "step": 28125 - }, - { - "epoch": 0.714050006345983, - "grad_norm": 0.4327389895915985, - "learning_rate": 1.5239666624360114e-05, - "loss": 0.0731, - "step": 28130 - }, - { - "epoch": 0.7141769260058383, - "grad_norm": 0.405392587184906, - "learning_rate": 1.5238820493294413e-05, - "loss": 0.0751, - "step": 28135 - }, - { - "epoch": 0.7143038456656936, - "grad_norm": 0.6504332423210144, - "learning_rate": 1.5237974362228711e-05, - "loss": 0.0682, - "step": 28140 - }, - { - "epoch": 0.7144307653255489, - "grad_norm": 0.5016568899154663, - "learning_rate": 1.5237128231163008e-05, - "loss": 0.0704, - "step": 28145 - }, - { - "epoch": 0.7145576849854043, - "grad_norm": 0.7112600207328796, - "learning_rate": 1.5236282100097306e-05, - "loss": 0.0536, - "step": 28150 - }, - { - "epoch": 0.7146846046452595, - "grad_norm": 0.38798466324806213, - "learning_rate": 1.5235435969031605e-05, - "loss": 0.0625, - "step": 28155 - }, - { - "epoch": 0.7148115243051149, - "grad_norm": 0.5247291326522827, - "learning_rate": 1.5234589837965903e-05, - "loss": 0.0909, - "step": 28160 - }, - { - "epoch": 0.7149384439649702, - "grad_norm": 0.437313973903656, - "learning_rate": 1.52337437069002e-05, - "loss": 0.0899, - "step": 28165 - }, - { - "epoch": 0.7150653636248255, - "grad_norm": 0.7365561127662659, - "learning_rate": 1.5232897575834498e-05, - "loss": 0.064, - "step": 28170 - }, - { - "epoch": 0.7151922832846808, - "grad_norm": 0.4586468040943146, - "learning_rate": 1.5232051444768797e-05, - "loss": 0.0567, - "step": 28175 - }, - { - "epoch": 0.7153192029445361, - "grad_norm": 0.9962919354438782, - "learning_rate": 1.5231205313703095e-05, - "loss": 0.0855, - "step": 28180 - }, - { - "epoch": 0.7154461226043914, - "grad_norm": 0.45911869406700134, - "learning_rate": 1.5230359182637392e-05, - "loss": 0.0735, - "step": 28185 - }, - { - "epoch": 0.7155730422642468, - "grad_norm": 0.7661426067352295, - "learning_rate": 1.522951305157169e-05, - "loss": 0.0925, - "step": 28190 - }, - { - "epoch": 0.715699961924102, - "grad_norm": 0.4748556911945343, - "learning_rate": 1.5228666920505989e-05, - "loss": 0.0766, - "step": 28195 - }, - { - "epoch": 0.7158268815839574, - "grad_norm": 0.4402677118778229, - "learning_rate": 1.5227820789440287e-05, - "loss": 0.0754, - "step": 28200 - }, - { - "epoch": 0.7159538012438127, - "grad_norm": 0.7236750721931458, - "learning_rate": 1.5226974658374582e-05, - "loss": 0.0629, - "step": 28205 - }, - { - "epoch": 0.716080720903668, - "grad_norm": 0.5704312920570374, - "learning_rate": 1.522612852730888e-05, - "loss": 0.065, - "step": 28210 - }, - { - "epoch": 0.7162076405635233, - "grad_norm": 0.66584712266922, - "learning_rate": 1.5225282396243179e-05, - "loss": 0.0693, - "step": 28215 - }, - { - "epoch": 0.7163345602233786, - "grad_norm": 0.45025792717933655, - "learning_rate": 1.5224436265177477e-05, - "loss": 0.0785, - "step": 28220 - }, - { - "epoch": 0.7164614798832339, - "grad_norm": 2.137105703353882, - "learning_rate": 1.5223590134111774e-05, - "loss": 0.0813, - "step": 28225 - }, - { - "epoch": 0.7165883995430892, - "grad_norm": 0.8944762945175171, - "learning_rate": 1.5222744003046072e-05, - "loss": 0.0837, - "step": 28230 - }, - { - "epoch": 0.7167153192029445, - "grad_norm": 0.48349329829216003, - "learning_rate": 1.522189787198037e-05, - "loss": 0.0646, - "step": 28235 - }, - { - "epoch": 0.7168422388627999, - "grad_norm": 0.29302075505256653, - "learning_rate": 1.5221051740914669e-05, - "loss": 0.0688, - "step": 28240 - }, - { - "epoch": 0.7169691585226552, - "grad_norm": 0.36715802550315857, - "learning_rate": 1.5220205609848966e-05, - "loss": 0.0845, - "step": 28245 - }, - { - "epoch": 0.7170960781825104, - "grad_norm": 1.5365729331970215, - "learning_rate": 1.5219359478783264e-05, - "loss": 0.0896, - "step": 28250 - }, - { - "epoch": 0.7172229978423658, - "grad_norm": 1.188496708869934, - "learning_rate": 1.5218513347717563e-05, - "loss": 0.0689, - "step": 28255 - }, - { - "epoch": 0.7173499175022211, - "grad_norm": 0.5500271320343018, - "learning_rate": 1.5217667216651861e-05, - "loss": 0.0799, - "step": 28260 - }, - { - "epoch": 0.7174768371620764, - "grad_norm": 0.4476902484893799, - "learning_rate": 1.5216821085586158e-05, - "loss": 0.0683, - "step": 28265 - }, - { - "epoch": 0.7176037568219317, - "grad_norm": 0.43855729699134827, - "learning_rate": 1.5215974954520456e-05, - "loss": 0.0635, - "step": 28270 - }, - { - "epoch": 0.717730676481787, - "grad_norm": 0.49547868967056274, - "learning_rate": 1.5215128823454754e-05, - "loss": 0.0805, - "step": 28275 - }, - { - "epoch": 0.7178575961416424, - "grad_norm": 0.5212785601615906, - "learning_rate": 1.5214282692389053e-05, - "loss": 0.0742, - "step": 28280 - }, - { - "epoch": 0.7179845158014977, - "grad_norm": 0.5327005386352539, - "learning_rate": 1.521343656132335e-05, - "loss": 0.0803, - "step": 28285 - }, - { - "epoch": 0.7181114354613529, - "grad_norm": 0.5577854514122009, - "learning_rate": 1.5212590430257648e-05, - "loss": 0.0541, - "step": 28290 - }, - { - "epoch": 0.7182383551212083, - "grad_norm": 0.3061642646789551, - "learning_rate": 1.5211744299191946e-05, - "loss": 0.075, - "step": 28295 - }, - { - "epoch": 0.7183652747810636, - "grad_norm": 0.4479852616786957, - "learning_rate": 1.5210898168126245e-05, - "loss": 0.0592, - "step": 28300 - }, - { - "epoch": 0.7184921944409189, - "grad_norm": 0.4907691776752472, - "learning_rate": 1.5210052037060542e-05, - "loss": 0.072, - "step": 28305 - }, - { - "epoch": 0.7186191141007742, - "grad_norm": 0.3830508887767792, - "learning_rate": 1.520920590599484e-05, - "loss": 0.0624, - "step": 28310 - }, - { - "epoch": 0.7187460337606295, - "grad_norm": 0.46005457639694214, - "learning_rate": 1.5208359774929138e-05, - "loss": 0.0731, - "step": 28315 - }, - { - "epoch": 0.7188729534204849, - "grad_norm": 0.46852537989616394, - "learning_rate": 1.5207513643863437e-05, - "loss": 0.0764, - "step": 28320 - }, - { - "epoch": 0.7189998730803402, - "grad_norm": 0.6932499408721924, - "learning_rate": 1.5206667512797732e-05, - "loss": 0.1021, - "step": 28325 - }, - { - "epoch": 0.7191267927401954, - "grad_norm": 0.7237448692321777, - "learning_rate": 1.5205821381732032e-05, - "loss": 0.0943, - "step": 28330 - }, - { - "epoch": 0.7192537124000508, - "grad_norm": 0.5981670022010803, - "learning_rate": 1.520497525066633e-05, - "loss": 0.0864, - "step": 28335 - }, - { - "epoch": 0.7193806320599061, - "grad_norm": 0.5098859071731567, - "learning_rate": 1.5204129119600629e-05, - "loss": 0.0713, - "step": 28340 - }, - { - "epoch": 0.7195075517197614, - "grad_norm": 0.5143765211105347, - "learning_rate": 1.5203282988534924e-05, - "loss": 0.069, - "step": 28345 - }, - { - "epoch": 0.7196344713796167, - "grad_norm": 0.5979509353637695, - "learning_rate": 1.5202436857469222e-05, - "loss": 0.0751, - "step": 28350 - }, - { - "epoch": 0.719761391039472, - "grad_norm": 0.48765069246292114, - "learning_rate": 1.520159072640352e-05, - "loss": 0.0671, - "step": 28355 - }, - { - "epoch": 0.7198883106993273, - "grad_norm": 0.5655904412269592, - "learning_rate": 1.5200744595337819e-05, - "loss": 0.0548, - "step": 28360 - }, - { - "epoch": 0.7200152303591827, - "grad_norm": 0.42686134576797485, - "learning_rate": 1.5199898464272116e-05, - "loss": 0.0797, - "step": 28365 - }, - { - "epoch": 0.7201421500190379, - "grad_norm": 0.7934390306472778, - "learning_rate": 1.5199052333206414e-05, - "loss": 0.08, - "step": 28370 - }, - { - "epoch": 0.7202690696788933, - "grad_norm": 0.5957829356193542, - "learning_rate": 1.5198206202140712e-05, - "loss": 0.0629, - "step": 28375 - }, - { - "epoch": 0.7203959893387486, - "grad_norm": 0.3604542315006256, - "learning_rate": 1.519736007107501e-05, - "loss": 0.0635, - "step": 28380 - }, - { - "epoch": 0.7205229089986039, - "grad_norm": 1.0504302978515625, - "learning_rate": 1.5196513940009307e-05, - "loss": 0.0943, - "step": 28385 - }, - { - "epoch": 0.7206498286584592, - "grad_norm": 0.8695054054260254, - "learning_rate": 1.5195667808943606e-05, - "loss": 0.0844, - "step": 28390 - }, - { - "epoch": 0.7207767483183145, - "grad_norm": 2.0043838024139404, - "learning_rate": 1.5194821677877904e-05, - "loss": 0.0944, - "step": 28395 - }, - { - "epoch": 0.7209036679781698, - "grad_norm": 0.5817368626594543, - "learning_rate": 1.5193975546812203e-05, - "loss": 0.0863, - "step": 28400 - }, - { - "epoch": 0.7210305876380251, - "grad_norm": 0.4209287166595459, - "learning_rate": 1.5193129415746501e-05, - "loss": 0.0733, - "step": 28405 - }, - { - "epoch": 0.7211575072978804, - "grad_norm": 0.98221755027771, - "learning_rate": 1.5192283284680798e-05, - "loss": 0.1057, - "step": 28410 - }, - { - "epoch": 0.7212844269577358, - "grad_norm": 1.1986887454986572, - "learning_rate": 1.5191437153615096e-05, - "loss": 0.0695, - "step": 28415 - }, - { - "epoch": 0.7214113466175911, - "grad_norm": 0.5827061533927917, - "learning_rate": 1.5190591022549395e-05, - "loss": 0.0809, - "step": 28420 - }, - { - "epoch": 0.7215382662774463, - "grad_norm": 0.6933357119560242, - "learning_rate": 1.5189744891483693e-05, - "loss": 0.074, - "step": 28425 - }, - { - "epoch": 0.7216651859373017, - "grad_norm": 0.782218873500824, - "learning_rate": 1.518889876041799e-05, - "loss": 0.0983, - "step": 28430 - }, - { - "epoch": 0.721792105597157, - "grad_norm": 0.5838494896888733, - "learning_rate": 1.5188052629352288e-05, - "loss": 0.0754, - "step": 28435 - }, - { - "epoch": 0.7219190252570123, - "grad_norm": 0.3990831673145294, - "learning_rate": 1.5187206498286587e-05, - "loss": 0.0763, - "step": 28440 - }, - { - "epoch": 0.7220459449168676, - "grad_norm": 0.642582893371582, - "learning_rate": 1.5186360367220885e-05, - "loss": 0.1078, - "step": 28445 - }, - { - "epoch": 0.7221728645767229, - "grad_norm": 0.4779188930988312, - "learning_rate": 1.5185514236155182e-05, - "loss": 0.0688, - "step": 28450 - }, - { - "epoch": 0.7222997842365783, - "grad_norm": 0.43423062562942505, - "learning_rate": 1.518466810508948e-05, - "loss": 0.0842, - "step": 28455 - }, - { - "epoch": 0.7224267038964336, - "grad_norm": 0.46826088428497314, - "learning_rate": 1.5183821974023778e-05, - "loss": 0.0718, - "step": 28460 - }, - { - "epoch": 0.7225536235562888, - "grad_norm": 0.4272255301475525, - "learning_rate": 1.5182975842958077e-05, - "loss": 0.066, - "step": 28465 - }, - { - "epoch": 0.7226805432161442, - "grad_norm": 1.059203863143921, - "learning_rate": 1.5182129711892374e-05, - "loss": 0.0863, - "step": 28470 - }, - { - "epoch": 0.7228074628759995, - "grad_norm": 0.4420754015445709, - "learning_rate": 1.5181283580826672e-05, - "loss": 0.0717, - "step": 28475 - }, - { - "epoch": 0.7229343825358548, - "grad_norm": 0.4609951972961426, - "learning_rate": 1.518043744976097e-05, - "loss": 0.0704, - "step": 28480 - }, - { - "epoch": 0.7230613021957101, - "grad_norm": 0.4938882291316986, - "learning_rate": 1.5179591318695269e-05, - "loss": 0.0739, - "step": 28485 - }, - { - "epoch": 0.7231882218555654, - "grad_norm": 0.6591818928718567, - "learning_rate": 1.5178745187629564e-05, - "loss": 0.0674, - "step": 28490 - }, - { - "epoch": 0.7233151415154208, - "grad_norm": 0.34518250823020935, - "learning_rate": 1.5177899056563862e-05, - "loss": 0.0628, - "step": 28495 - }, - { - "epoch": 0.7234420611752761, - "grad_norm": 0.6767856478691101, - "learning_rate": 1.517705292549816e-05, - "loss": 0.0656, - "step": 28500 - }, - { - "epoch": 0.7235689808351313, - "grad_norm": 0.5813471674919128, - "learning_rate": 1.517620679443246e-05, - "loss": 0.0579, - "step": 28505 - }, - { - "epoch": 0.7236959004949867, - "grad_norm": 0.5174162983894348, - "learning_rate": 1.5175360663366756e-05, - "loss": 0.0522, - "step": 28510 - }, - { - "epoch": 0.723822820154842, - "grad_norm": 4.905553817749023, - "learning_rate": 1.5174514532301054e-05, - "loss": 0.0772, - "step": 28515 - }, - { - "epoch": 0.7239497398146973, - "grad_norm": 0.3605773448944092, - "learning_rate": 1.5173668401235352e-05, - "loss": 0.0688, - "step": 28520 - }, - { - "epoch": 0.7240766594745526, - "grad_norm": 0.5422257781028748, - "learning_rate": 1.5172822270169651e-05, - "loss": 0.071, - "step": 28525 - }, - { - "epoch": 0.7242035791344079, - "grad_norm": 0.5356601476669312, - "learning_rate": 1.5171976139103948e-05, - "loss": 0.0636, - "step": 28530 - }, - { - "epoch": 0.7243304987942633, - "grad_norm": 0.4723335802555084, - "learning_rate": 1.5171130008038246e-05, - "loss": 0.0704, - "step": 28535 - }, - { - "epoch": 0.7244574184541186, - "grad_norm": 0.39510610699653625, - "learning_rate": 1.5170283876972544e-05, - "loss": 0.0792, - "step": 28540 - }, - { - "epoch": 0.7245843381139738, - "grad_norm": 0.43470272421836853, - "learning_rate": 1.5169437745906843e-05, - "loss": 0.0611, - "step": 28545 - }, - { - "epoch": 0.7247112577738292, - "grad_norm": 0.41736653447151184, - "learning_rate": 1.516859161484114e-05, - "loss": 0.0759, - "step": 28550 - }, - { - "epoch": 0.7248381774336845, - "grad_norm": 0.48236891627311707, - "learning_rate": 1.5167745483775438e-05, - "loss": 0.0788, - "step": 28555 - }, - { - "epoch": 0.7249650970935397, - "grad_norm": 0.3266431391239166, - "learning_rate": 1.5166899352709736e-05, - "loss": 0.0692, - "step": 28560 - }, - { - "epoch": 0.7250920167533951, - "grad_norm": 0.8057698011398315, - "learning_rate": 1.5166053221644035e-05, - "loss": 0.0682, - "step": 28565 - }, - { - "epoch": 0.7252189364132504, - "grad_norm": 0.4285777807235718, - "learning_rate": 1.5165207090578331e-05, - "loss": 0.0696, - "step": 28570 - }, - { - "epoch": 0.7253458560731058, - "grad_norm": 0.39764758944511414, - "learning_rate": 1.516436095951263e-05, - "loss": 0.0708, - "step": 28575 - }, - { - "epoch": 0.725472775732961, - "grad_norm": 0.862117350101471, - "learning_rate": 1.5163514828446928e-05, - "loss": 0.0666, - "step": 28580 - }, - { - "epoch": 0.7255996953928163, - "grad_norm": 0.53076171875, - "learning_rate": 1.5162668697381227e-05, - "loss": 0.096, - "step": 28585 - }, - { - "epoch": 0.7257266150526717, - "grad_norm": 0.4680763781070709, - "learning_rate": 1.5161822566315523e-05, - "loss": 0.0895, - "step": 28590 - }, - { - "epoch": 0.725853534712527, - "grad_norm": 0.5773413181304932, - "learning_rate": 1.5160976435249822e-05, - "loss": 0.08, - "step": 28595 - }, - { - "epoch": 0.7259804543723822, - "grad_norm": 0.8948719501495361, - "learning_rate": 1.516013030418412e-05, - "loss": 0.0976, - "step": 28600 - }, - { - "epoch": 0.7261073740322376, - "grad_norm": 0.9618192911148071, - "learning_rate": 1.5159284173118419e-05, - "loss": 0.0725, - "step": 28605 - }, - { - "epoch": 0.7262342936920929, - "grad_norm": 0.7943077087402344, - "learning_rate": 1.5158438042052715e-05, - "loss": 0.0725, - "step": 28610 - }, - { - "epoch": 0.7263612133519483, - "grad_norm": 0.42631497979164124, - "learning_rate": 1.5157591910987014e-05, - "loss": 0.0744, - "step": 28615 - }, - { - "epoch": 0.7264881330118035, - "grad_norm": 0.47363367676734924, - "learning_rate": 1.5156745779921312e-05, - "loss": 0.0697, - "step": 28620 - }, - { - "epoch": 0.7266150526716588, - "grad_norm": 0.9428401589393616, - "learning_rate": 1.515589964885561e-05, - "loss": 0.0741, - "step": 28625 - }, - { - "epoch": 0.7267419723315142, - "grad_norm": 0.5430381894111633, - "learning_rate": 1.5155053517789905e-05, - "loss": 0.076, - "step": 28630 - }, - { - "epoch": 0.7268688919913695, - "grad_norm": 0.5321327447891235, - "learning_rate": 1.5154207386724204e-05, - "loss": 0.0684, - "step": 28635 - }, - { - "epoch": 0.7269958116512247, - "grad_norm": 0.3877311646938324, - "learning_rate": 1.5153361255658502e-05, - "loss": 0.061, - "step": 28640 - }, - { - "epoch": 0.7271227313110801, - "grad_norm": 0.5841807126998901, - "learning_rate": 1.51525151245928e-05, - "loss": 0.0601, - "step": 28645 - }, - { - "epoch": 0.7272496509709354, - "grad_norm": 0.8257840275764465, - "learning_rate": 1.5151668993527097e-05, - "loss": 0.0991, - "step": 28650 - }, - { - "epoch": 0.7273765706307908, - "grad_norm": 0.5279754996299744, - "learning_rate": 1.5150822862461396e-05, - "loss": 0.1014, - "step": 28655 - }, - { - "epoch": 0.727503490290646, - "grad_norm": 0.49089863896369934, - "learning_rate": 1.5149976731395694e-05, - "loss": 0.0775, - "step": 28660 - }, - { - "epoch": 0.7276304099505013, - "grad_norm": 0.9288485050201416, - "learning_rate": 1.5149130600329993e-05, - "loss": 0.1119, - "step": 28665 - }, - { - "epoch": 0.7277573296103567, - "grad_norm": 0.49164873361587524, - "learning_rate": 1.514828446926429e-05, - "loss": 0.0534, - "step": 28670 - }, - { - "epoch": 0.727884249270212, - "grad_norm": 0.49266496300697327, - "learning_rate": 1.5147438338198588e-05, - "loss": 0.0831, - "step": 28675 - }, - { - "epoch": 0.7280111689300672, - "grad_norm": 0.5540217757225037, - "learning_rate": 1.5146592207132886e-05, - "loss": 0.0698, - "step": 28680 - }, - { - "epoch": 0.7281380885899226, - "grad_norm": 0.63337242603302, - "learning_rate": 1.5145746076067184e-05, - "loss": 0.066, - "step": 28685 - }, - { - "epoch": 0.7282650082497779, - "grad_norm": 0.5547446608543396, - "learning_rate": 1.5144899945001481e-05, - "loss": 0.0849, - "step": 28690 - }, - { - "epoch": 0.7283919279096333, - "grad_norm": 0.41927510499954224, - "learning_rate": 1.514405381393578e-05, - "loss": 0.0681, - "step": 28695 - }, - { - "epoch": 0.7285188475694885, - "grad_norm": 0.46466299891471863, - "learning_rate": 1.5143207682870078e-05, - "loss": 0.0784, - "step": 28700 - }, - { - "epoch": 0.7286457672293438, - "grad_norm": 0.5070329308509827, - "learning_rate": 1.5142361551804376e-05, - "loss": 0.086, - "step": 28705 - }, - { - "epoch": 0.7287726868891992, - "grad_norm": 1.1085124015808105, - "learning_rate": 1.5141515420738673e-05, - "loss": 0.0945, - "step": 28710 - }, - { - "epoch": 0.7288996065490545, - "grad_norm": 0.9310800433158875, - "learning_rate": 1.5140669289672972e-05, - "loss": 0.0889, - "step": 28715 - }, - { - "epoch": 0.7290265262089097, - "grad_norm": 0.6583683490753174, - "learning_rate": 1.513982315860727e-05, - "loss": 0.0609, - "step": 28720 - }, - { - "epoch": 0.7291534458687651, - "grad_norm": 0.4912565350532532, - "learning_rate": 1.5138977027541568e-05, - "loss": 0.068, - "step": 28725 - }, - { - "epoch": 0.7292803655286204, - "grad_norm": 0.8356752991676331, - "learning_rate": 1.5138130896475865e-05, - "loss": 0.0742, - "step": 28730 - }, - { - "epoch": 0.7294072851884756, - "grad_norm": 0.3372842073440552, - "learning_rate": 1.5137284765410163e-05, - "loss": 0.0612, - "step": 28735 - }, - { - "epoch": 0.729534204848331, - "grad_norm": 0.5554506778717041, - "learning_rate": 1.5136438634344462e-05, - "loss": 0.0701, - "step": 28740 - }, - { - "epoch": 0.7296611245081863, - "grad_norm": 2.7120072841644287, - "learning_rate": 1.513559250327876e-05, - "loss": 0.0779, - "step": 28745 - }, - { - "epoch": 0.7297880441680417, - "grad_norm": 0.5900833010673523, - "learning_rate": 1.5134746372213055e-05, - "loss": 0.066, - "step": 28750 - }, - { - "epoch": 0.7299149638278969, - "grad_norm": 0.3753034174442291, - "learning_rate": 1.5133900241147355e-05, - "loss": 0.0669, - "step": 28755 - }, - { - "epoch": 0.7300418834877522, - "grad_norm": 0.6239312291145325, - "learning_rate": 1.5133054110081654e-05, - "loss": 0.0891, - "step": 28760 - }, - { - "epoch": 0.7301688031476076, - "grad_norm": 0.5838092565536499, - "learning_rate": 1.5132207979015952e-05, - "loss": 0.073, - "step": 28765 - }, - { - "epoch": 0.7302957228074629, - "grad_norm": 0.7316375970840454, - "learning_rate": 1.5131361847950247e-05, - "loss": 0.0821, - "step": 28770 - }, - { - "epoch": 0.7304226424673181, - "grad_norm": 0.5373832583427429, - "learning_rate": 1.5130515716884546e-05, - "loss": 0.0826, - "step": 28775 - }, - { - "epoch": 0.7305495621271735, - "grad_norm": 0.4450385272502899, - "learning_rate": 1.5129669585818844e-05, - "loss": 0.0707, - "step": 28780 - }, - { - "epoch": 0.7306764817870288, - "grad_norm": 0.6162949800491333, - "learning_rate": 1.5128823454753142e-05, - "loss": 0.0777, - "step": 28785 - }, - { - "epoch": 0.7308034014468842, - "grad_norm": 0.560677170753479, - "learning_rate": 1.5127977323687439e-05, - "loss": 0.079, - "step": 28790 - }, - { - "epoch": 0.7309303211067394, - "grad_norm": 0.36127305030822754, - "learning_rate": 1.5127131192621737e-05, - "loss": 0.075, - "step": 28795 - }, - { - "epoch": 0.7310572407665947, - "grad_norm": 0.7839513421058655, - "learning_rate": 1.5126285061556036e-05, - "loss": 0.0868, - "step": 28800 - }, - { - "epoch": 0.7311841604264501, - "grad_norm": 0.6920422315597534, - "learning_rate": 1.5125438930490334e-05, - "loss": 0.08, - "step": 28805 - }, - { - "epoch": 0.7313110800863054, - "grad_norm": 0.6453902125358582, - "learning_rate": 1.5124592799424631e-05, - "loss": 0.0722, - "step": 28810 - }, - { - "epoch": 0.7314379997461606, - "grad_norm": 0.46376705169677734, - "learning_rate": 1.512374666835893e-05, - "loss": 0.0995, - "step": 28815 - }, - { - "epoch": 0.731564919406016, - "grad_norm": 0.6825188398361206, - "learning_rate": 1.5122900537293228e-05, - "loss": 0.06, - "step": 28820 - }, - { - "epoch": 0.7316918390658713, - "grad_norm": 0.42427316308021545, - "learning_rate": 1.5122054406227526e-05, - "loss": 0.0626, - "step": 28825 - }, - { - "epoch": 0.7318187587257267, - "grad_norm": 0.44706588983535767, - "learning_rate": 1.5121208275161823e-05, - "loss": 0.0627, - "step": 28830 - }, - { - "epoch": 0.7319456783855819, - "grad_norm": 0.586863100528717, - "learning_rate": 1.5120362144096121e-05, - "loss": 0.0741, - "step": 28835 - }, - { - "epoch": 0.7320725980454372, - "grad_norm": 0.5732556581497192, - "learning_rate": 1.511951601303042e-05, - "loss": 0.0851, - "step": 28840 - }, - { - "epoch": 0.7321995177052926, - "grad_norm": 0.42279067635536194, - "learning_rate": 1.5118669881964718e-05, - "loss": 0.0592, - "step": 28845 - }, - { - "epoch": 0.7323264373651479, - "grad_norm": 0.5101013779640198, - "learning_rate": 1.5117823750899015e-05, - "loss": 0.0771, - "step": 28850 - }, - { - "epoch": 0.7324533570250031, - "grad_norm": 0.4859732985496521, - "learning_rate": 1.5116977619833313e-05, - "loss": 0.0576, - "step": 28855 - }, - { - "epoch": 0.7325802766848585, - "grad_norm": 0.5723256468772888, - "learning_rate": 1.5116131488767612e-05, - "loss": 0.0641, - "step": 28860 - }, - { - "epoch": 0.7327071963447138, - "grad_norm": 0.34499892592430115, - "learning_rate": 1.511528535770191e-05, - "loss": 0.0807, - "step": 28865 - }, - { - "epoch": 0.7328341160045692, - "grad_norm": 0.6008086204528809, - "learning_rate": 1.5114439226636207e-05, - "loss": 0.0662, - "step": 28870 - }, - { - "epoch": 0.7329610356644244, - "grad_norm": 0.5718141198158264, - "learning_rate": 1.5113593095570505e-05, - "loss": 0.0723, - "step": 28875 - }, - { - "epoch": 0.7330879553242797, - "grad_norm": 0.4570436477661133, - "learning_rate": 1.5112746964504804e-05, - "loss": 0.0989, - "step": 28880 - }, - { - "epoch": 0.7332148749841351, - "grad_norm": 0.4451131820678711, - "learning_rate": 1.5111900833439102e-05, - "loss": 0.1006, - "step": 28885 - }, - { - "epoch": 0.7333417946439904, - "grad_norm": 0.5988807082176208, - "learning_rate": 1.5111054702373397e-05, - "loss": 0.0728, - "step": 28890 - }, - { - "epoch": 0.7334687143038456, - "grad_norm": 0.47748398780822754, - "learning_rate": 1.5110208571307697e-05, - "loss": 0.0696, - "step": 28895 - }, - { - "epoch": 0.733595633963701, - "grad_norm": 0.5476909279823303, - "learning_rate": 1.5109362440241995e-05, - "loss": 0.0714, - "step": 28900 - }, - { - "epoch": 0.7337225536235563, - "grad_norm": 0.5517981648445129, - "learning_rate": 1.5108516309176294e-05, - "loss": 0.0769, - "step": 28905 - }, - { - "epoch": 0.7338494732834115, - "grad_norm": 0.41952913999557495, - "learning_rate": 1.5107670178110592e-05, - "loss": 0.091, - "step": 28910 - }, - { - "epoch": 0.7339763929432669, - "grad_norm": 0.5800809860229492, - "learning_rate": 1.5106824047044887e-05, - "loss": 0.0862, - "step": 28915 - }, - { - "epoch": 0.7341033126031222, - "grad_norm": 0.6802849769592285, - "learning_rate": 1.5105977915979186e-05, - "loss": 0.0788, - "step": 28920 - }, - { - "epoch": 0.7342302322629776, - "grad_norm": 0.5612084865570068, - "learning_rate": 1.5105131784913484e-05, - "loss": 0.0646, - "step": 28925 - }, - { - "epoch": 0.7343571519228328, - "grad_norm": 0.3070714771747589, - "learning_rate": 1.5104285653847784e-05, - "loss": 0.0664, - "step": 28930 - }, - { - "epoch": 0.7344840715826881, - "grad_norm": 0.4327002167701721, - "learning_rate": 1.510343952278208e-05, - "loss": 0.0714, - "step": 28935 - }, - { - "epoch": 0.7346109912425435, - "grad_norm": 0.4711267948150635, - "learning_rate": 1.5102593391716378e-05, - "loss": 0.0629, - "step": 28940 - }, - { - "epoch": 0.7347379109023988, - "grad_norm": 0.7745065093040466, - "learning_rate": 1.5101747260650676e-05, - "loss": 0.0772, - "step": 28945 - }, - { - "epoch": 0.734864830562254, - "grad_norm": 0.43214666843414307, - "learning_rate": 1.5100901129584974e-05, - "loss": 0.0728, - "step": 28950 - }, - { - "epoch": 0.7349917502221094, - "grad_norm": 1.994478464126587, - "learning_rate": 1.5100054998519271e-05, - "loss": 0.0719, - "step": 28955 - }, - { - "epoch": 0.7351186698819647, - "grad_norm": 0.4371846318244934, - "learning_rate": 1.509920886745357e-05, - "loss": 0.0794, - "step": 28960 - }, - { - "epoch": 0.7352455895418201, - "grad_norm": 0.41734743118286133, - "learning_rate": 1.5098362736387868e-05, - "loss": 0.0773, - "step": 28965 - }, - { - "epoch": 0.7353725092016753, - "grad_norm": 0.7816877961158752, - "learning_rate": 1.5097516605322166e-05, - "loss": 0.0513, - "step": 28970 - }, - { - "epoch": 0.7354994288615306, - "grad_norm": 0.6229501366615295, - "learning_rate": 1.5096670474256463e-05, - "loss": 0.0693, - "step": 28975 - }, - { - "epoch": 0.735626348521386, - "grad_norm": 0.6308608651161194, - "learning_rate": 1.5095824343190761e-05, - "loss": 0.074, - "step": 28980 - }, - { - "epoch": 0.7357532681812413, - "grad_norm": 1.316396951675415, - "learning_rate": 1.509497821212506e-05, - "loss": 0.0677, - "step": 28985 - }, - { - "epoch": 0.7358801878410965, - "grad_norm": 0.7609584331512451, - "learning_rate": 1.5094132081059358e-05, - "loss": 0.0675, - "step": 28990 - }, - { - "epoch": 0.7360071075009519, - "grad_norm": 0.7097399234771729, - "learning_rate": 1.5093285949993655e-05, - "loss": 0.0795, - "step": 28995 - }, - { - "epoch": 0.7361340271608072, - "grad_norm": 1.0924921035766602, - "learning_rate": 1.5092439818927953e-05, - "loss": 0.0626, - "step": 29000 - }, - { - "epoch": 0.7362609468206626, - "grad_norm": 0.7018049955368042, - "learning_rate": 1.5091593687862252e-05, - "loss": 0.0782, - "step": 29005 - }, - { - "epoch": 0.7363878664805178, - "grad_norm": 0.6429917812347412, - "learning_rate": 1.509074755679655e-05, - "loss": 0.0755, - "step": 29010 - }, - { - "epoch": 0.7365147861403731, - "grad_norm": 0.4616933763027191, - "learning_rate": 1.5089901425730847e-05, - "loss": 0.0667, - "step": 29015 - }, - { - "epoch": 0.7366417058002285, - "grad_norm": 0.35857093334198, - "learning_rate": 1.5089055294665145e-05, - "loss": 0.0505, - "step": 29020 - }, - { - "epoch": 0.7367686254600838, - "grad_norm": 0.7778374552726746, - "learning_rate": 1.5088209163599444e-05, - "loss": 0.044, - "step": 29025 - }, - { - "epoch": 0.736895545119939, - "grad_norm": 0.3761694133281708, - "learning_rate": 1.5087363032533742e-05, - "loss": 0.0803, - "step": 29030 - }, - { - "epoch": 0.7370224647797944, - "grad_norm": 0.6231378316879272, - "learning_rate": 1.5086516901468039e-05, - "loss": 0.0919, - "step": 29035 - }, - { - "epoch": 0.7371493844396497, - "grad_norm": 0.4964863657951355, - "learning_rate": 1.5085670770402337e-05, - "loss": 0.06, - "step": 29040 - }, - { - "epoch": 0.7372763040995051, - "grad_norm": 0.6431192755699158, - "learning_rate": 1.5084824639336636e-05, - "loss": 0.0739, - "step": 29045 - }, - { - "epoch": 0.7374032237593603, - "grad_norm": 0.4180096685886383, - "learning_rate": 1.5083978508270934e-05, - "loss": 0.0638, - "step": 29050 - }, - { - "epoch": 0.7375301434192156, - "grad_norm": 0.4509308338165283, - "learning_rate": 1.5083132377205229e-05, - "loss": 0.0783, - "step": 29055 - }, - { - "epoch": 0.737657063079071, - "grad_norm": 0.49969759583473206, - "learning_rate": 1.5082286246139527e-05, - "loss": 0.0754, - "step": 29060 - }, - { - "epoch": 0.7377839827389263, - "grad_norm": 0.737237274646759, - "learning_rate": 1.5081440115073826e-05, - "loss": 0.0725, - "step": 29065 - }, - { - "epoch": 0.7379109023987815, - "grad_norm": 0.44397661089897156, - "learning_rate": 1.5080593984008124e-05, - "loss": 0.0767, - "step": 29070 - }, - { - "epoch": 0.7380378220586369, - "grad_norm": 0.50658118724823, - "learning_rate": 1.5079747852942421e-05, - "loss": 0.0816, - "step": 29075 - }, - { - "epoch": 0.7381647417184922, - "grad_norm": 0.5389224290847778, - "learning_rate": 1.507890172187672e-05, - "loss": 0.0817, - "step": 29080 - }, - { - "epoch": 0.7382916613783475, - "grad_norm": 0.3116966187953949, - "learning_rate": 1.5078055590811018e-05, - "loss": 0.0505, - "step": 29085 - }, - { - "epoch": 0.7384185810382028, - "grad_norm": 0.5881547331809998, - "learning_rate": 1.5077209459745316e-05, - "loss": 0.0611, - "step": 29090 - }, - { - "epoch": 0.7385455006980581, - "grad_norm": 0.7247850894927979, - "learning_rate": 1.5076363328679613e-05, - "loss": 0.0644, - "step": 29095 - }, - { - "epoch": 0.7386724203579135, - "grad_norm": 0.6687891483306885, - "learning_rate": 1.5075517197613911e-05, - "loss": 0.0622, - "step": 29100 - }, - { - "epoch": 0.7387993400177687, - "grad_norm": 0.6308155059814453, - "learning_rate": 1.507467106654821e-05, - "loss": 0.0849, - "step": 29105 - }, - { - "epoch": 0.738926259677624, - "grad_norm": 0.44135576486587524, - "learning_rate": 1.5073824935482508e-05, - "loss": 0.0549, - "step": 29110 - }, - { - "epoch": 0.7390531793374794, - "grad_norm": 0.9015704989433289, - "learning_rate": 1.5072978804416805e-05, - "loss": 0.0802, - "step": 29115 - }, - { - "epoch": 0.7391800989973347, - "grad_norm": 0.5573523044586182, - "learning_rate": 1.5072132673351103e-05, - "loss": 0.062, - "step": 29120 - }, - { - "epoch": 0.73930701865719, - "grad_norm": 0.7422395348548889, - "learning_rate": 1.5071286542285402e-05, - "loss": 0.0763, - "step": 29125 - }, - { - "epoch": 0.7394339383170453, - "grad_norm": 0.5013163089752197, - "learning_rate": 1.50704404112197e-05, - "loss": 0.0846, - "step": 29130 - }, - { - "epoch": 0.7395608579769006, - "grad_norm": 0.7509750127792358, - "learning_rate": 1.5069594280153997e-05, - "loss": 0.0674, - "step": 29135 - }, - { - "epoch": 0.739687777636756, - "grad_norm": 0.795621395111084, - "learning_rate": 1.5068748149088295e-05, - "loss": 0.088, - "step": 29140 - }, - { - "epoch": 0.7398146972966112, - "grad_norm": 0.6417064070701599, - "learning_rate": 1.5067902018022593e-05, - "loss": 0.0805, - "step": 29145 - }, - { - "epoch": 0.7399416169564665, - "grad_norm": 0.5869410037994385, - "learning_rate": 1.5067055886956892e-05, - "loss": 0.0825, - "step": 29150 - }, - { - "epoch": 0.7400685366163219, - "grad_norm": 0.3849017322063446, - "learning_rate": 1.5066209755891189e-05, - "loss": 0.0881, - "step": 29155 - }, - { - "epoch": 0.7401954562761772, - "grad_norm": 0.3494469225406647, - "learning_rate": 1.5065363624825487e-05, - "loss": 0.0573, - "step": 29160 - }, - { - "epoch": 0.7403223759360325, - "grad_norm": 0.4721563160419464, - "learning_rate": 1.5064517493759785e-05, - "loss": 0.0539, - "step": 29165 - }, - { - "epoch": 0.7404492955958878, - "grad_norm": 0.690151035785675, - "learning_rate": 1.5063671362694084e-05, - "loss": 0.0814, - "step": 29170 - }, - { - "epoch": 0.7405762152557431, - "grad_norm": 0.4491805136203766, - "learning_rate": 1.506282523162838e-05, - "loss": 0.0797, - "step": 29175 - }, - { - "epoch": 0.7407031349155985, - "grad_norm": 0.5341126918792725, - "learning_rate": 1.5061979100562679e-05, - "loss": 0.0678, - "step": 29180 - }, - { - "epoch": 0.7408300545754537, - "grad_norm": 0.5417337417602539, - "learning_rate": 1.5061132969496977e-05, - "loss": 0.0661, - "step": 29185 - }, - { - "epoch": 0.740956974235309, - "grad_norm": 0.6197111010551453, - "learning_rate": 1.5060286838431276e-05, - "loss": 0.0926, - "step": 29190 - }, - { - "epoch": 0.7410838938951644, - "grad_norm": 0.46154525876045227, - "learning_rate": 1.505944070736557e-05, - "loss": 0.0898, - "step": 29195 - }, - { - "epoch": 0.7412108135550197, - "grad_norm": 0.36451011896133423, - "learning_rate": 1.5058594576299869e-05, - "loss": 0.067, - "step": 29200 - }, - { - "epoch": 0.741337733214875, - "grad_norm": 0.4772532284259796, - "learning_rate": 1.5057748445234167e-05, - "loss": 0.068, - "step": 29205 - }, - { - "epoch": 0.7414646528747303, - "grad_norm": 0.34949177503585815, - "learning_rate": 1.5056902314168466e-05, - "loss": 0.0594, - "step": 29210 - }, - { - "epoch": 0.7415915725345856, - "grad_norm": 0.40196770429611206, - "learning_rate": 1.5056056183102763e-05, - "loss": 0.0738, - "step": 29215 - }, - { - "epoch": 0.741718492194441, - "grad_norm": 0.3938974142074585, - "learning_rate": 1.5055210052037061e-05, - "loss": 0.0735, - "step": 29220 - }, - { - "epoch": 0.7418454118542962, - "grad_norm": 0.5069459676742554, - "learning_rate": 1.505436392097136e-05, - "loss": 0.0756, - "step": 29225 - }, - { - "epoch": 0.7419723315141515, - "grad_norm": 0.715030312538147, - "learning_rate": 1.5053517789905658e-05, - "loss": 0.077, - "step": 29230 - }, - { - "epoch": 0.7420992511740069, - "grad_norm": 0.5647764205932617, - "learning_rate": 1.5052671658839955e-05, - "loss": 0.0786, - "step": 29235 - }, - { - "epoch": 0.7422261708338622, - "grad_norm": 0.5304135680198669, - "learning_rate": 1.5051825527774253e-05, - "loss": 0.0665, - "step": 29240 - }, - { - "epoch": 0.7423530904937174, - "grad_norm": 0.786213755607605, - "learning_rate": 1.5050979396708551e-05, - "loss": 0.0747, - "step": 29245 - }, - { - "epoch": 0.7424800101535728, - "grad_norm": 1.6710072755813599, - "learning_rate": 1.505013326564285e-05, - "loss": 0.0872, - "step": 29250 - }, - { - "epoch": 0.7426069298134281, - "grad_norm": 0.5587740540504456, - "learning_rate": 1.5049287134577146e-05, - "loss": 0.0806, - "step": 29255 - }, - { - "epoch": 0.7427338494732834, - "grad_norm": 0.4285151958465576, - "learning_rate": 1.5048441003511445e-05, - "loss": 0.0632, - "step": 29260 - }, - { - "epoch": 0.7428607691331387, - "grad_norm": 0.5226601362228394, - "learning_rate": 1.5047594872445743e-05, - "loss": 0.0521, - "step": 29265 - }, - { - "epoch": 0.742987688792994, - "grad_norm": 0.48641645908355713, - "learning_rate": 1.5046748741380042e-05, - "loss": 0.0668, - "step": 29270 - }, - { - "epoch": 0.7431146084528494, - "grad_norm": 0.45654237270355225, - "learning_rate": 1.5045902610314338e-05, - "loss": 0.077, - "step": 29275 - }, - { - "epoch": 0.7432415281127046, - "grad_norm": 0.5735882520675659, - "learning_rate": 1.5045056479248637e-05, - "loss": 0.0835, - "step": 29280 - }, - { - "epoch": 0.74336844777256, - "grad_norm": 0.5734319686889648, - "learning_rate": 1.5044210348182935e-05, - "loss": 0.0799, - "step": 29285 - }, - { - "epoch": 0.7434953674324153, - "grad_norm": 0.60293048620224, - "learning_rate": 1.5043364217117234e-05, - "loss": 0.0779, - "step": 29290 - }, - { - "epoch": 0.7436222870922706, - "grad_norm": 0.9662447571754456, - "learning_rate": 1.504251808605153e-05, - "loss": 0.1008, - "step": 29295 - }, - { - "epoch": 0.7437492067521259, - "grad_norm": 0.5744656920433044, - "learning_rate": 1.5041671954985829e-05, - "loss": 0.0545, - "step": 29300 - }, - { - "epoch": 0.7438761264119812, - "grad_norm": 0.8249129056930542, - "learning_rate": 1.5040825823920127e-05, - "loss": 0.0602, - "step": 29305 - }, - { - "epoch": 0.7440030460718365, - "grad_norm": 0.4817701280117035, - "learning_rate": 1.5039979692854425e-05, - "loss": 0.0837, - "step": 29310 - }, - { - "epoch": 0.7441299657316919, - "grad_norm": 1.3828024864196777, - "learning_rate": 1.503913356178872e-05, - "loss": 0.088, - "step": 29315 - }, - { - "epoch": 0.7442568853915471, - "grad_norm": 0.46283793449401855, - "learning_rate": 1.503828743072302e-05, - "loss": 0.0647, - "step": 29320 - }, - { - "epoch": 0.7443838050514024, - "grad_norm": 0.6355276703834534, - "learning_rate": 1.5037441299657319e-05, - "loss": 0.0778, - "step": 29325 - }, - { - "epoch": 0.7445107247112578, - "grad_norm": 0.7077255845069885, - "learning_rate": 1.5036595168591617e-05, - "loss": 0.0652, - "step": 29330 - }, - { - "epoch": 0.7446376443711131, - "grad_norm": 0.45861050486564636, - "learning_rate": 1.5035749037525912e-05, - "loss": 0.074, - "step": 29335 - }, - { - "epoch": 0.7447645640309684, - "grad_norm": 0.6531497240066528, - "learning_rate": 1.503490290646021e-05, - "loss": 0.0838, - "step": 29340 - }, - { - "epoch": 0.7448914836908237, - "grad_norm": 0.5625487565994263, - "learning_rate": 1.503405677539451e-05, - "loss": 0.077, - "step": 29345 - }, - { - "epoch": 0.745018403350679, - "grad_norm": 0.8987553715705872, - "learning_rate": 1.5033210644328808e-05, - "loss": 0.1118, - "step": 29350 - }, - { - "epoch": 0.7451453230105344, - "grad_norm": 0.599827229976654, - "learning_rate": 1.5032364513263104e-05, - "loss": 0.0637, - "step": 29355 - }, - { - "epoch": 0.7452722426703896, - "grad_norm": 0.5634419322013855, - "learning_rate": 1.5031518382197403e-05, - "loss": 0.1088, - "step": 29360 - }, - { - "epoch": 0.745399162330245, - "grad_norm": 0.540033221244812, - "learning_rate": 1.5030672251131701e-05, - "loss": 0.078, - "step": 29365 - }, - { - "epoch": 0.7455260819901003, - "grad_norm": 0.6860496997833252, - "learning_rate": 1.5029826120066e-05, - "loss": 0.0688, - "step": 29370 - }, - { - "epoch": 0.7456530016499556, - "grad_norm": 0.4995521306991577, - "learning_rate": 1.5028979989000296e-05, - "loss": 0.0746, - "step": 29375 - }, - { - "epoch": 0.7457799213098109, - "grad_norm": 0.5682209134101868, - "learning_rate": 1.5028133857934595e-05, - "loss": 0.0847, - "step": 29380 - }, - { - "epoch": 0.7459068409696662, - "grad_norm": 0.5011436939239502, - "learning_rate": 1.5027287726868893e-05, - "loss": 0.0862, - "step": 29385 - }, - { - "epoch": 0.7460337606295215, - "grad_norm": 0.353626012802124, - "learning_rate": 1.5026441595803191e-05, - "loss": 0.0719, - "step": 29390 - }, - { - "epoch": 0.7461606802893769, - "grad_norm": 0.483928918838501, - "learning_rate": 1.5025595464737488e-05, - "loss": 0.0708, - "step": 29395 - }, - { - "epoch": 0.7462875999492321, - "grad_norm": 0.43881699442863464, - "learning_rate": 1.5024749333671787e-05, - "loss": 0.082, - "step": 29400 - }, - { - "epoch": 0.7464145196090874, - "grad_norm": 0.47535064816474915, - "learning_rate": 1.5023903202606085e-05, - "loss": 0.0618, - "step": 29405 - }, - { - "epoch": 0.7465414392689428, - "grad_norm": 0.6462476849555969, - "learning_rate": 1.5023057071540383e-05, - "loss": 0.1024, - "step": 29410 - }, - { - "epoch": 0.7466683589287981, - "grad_norm": 0.38345471024513245, - "learning_rate": 1.502221094047468e-05, - "loss": 0.0659, - "step": 29415 - }, - { - "epoch": 0.7467952785886534, - "grad_norm": 0.36030250787734985, - "learning_rate": 1.5021364809408978e-05, - "loss": 0.086, - "step": 29420 - }, - { - "epoch": 0.7469221982485087, - "grad_norm": 0.852904736995697, - "learning_rate": 1.5020518678343277e-05, - "loss": 0.0699, - "step": 29425 - }, - { - "epoch": 0.747049117908364, - "grad_norm": 0.3887009620666504, - "learning_rate": 1.5019672547277575e-05, - "loss": 0.0528, - "step": 29430 - }, - { - "epoch": 0.7471760375682193, - "grad_norm": 0.6893263459205627, - "learning_rate": 1.5018826416211874e-05, - "loss": 0.0757, - "step": 29435 - }, - { - "epoch": 0.7473029572280746, - "grad_norm": 0.6226376295089722, - "learning_rate": 1.501798028514617e-05, - "loss": 0.0856, - "step": 29440 - }, - { - "epoch": 0.74742987688793, - "grad_norm": 0.8518234491348267, - "learning_rate": 1.5017134154080469e-05, - "loss": 0.0748, - "step": 29445 - }, - { - "epoch": 0.7475567965477853, - "grad_norm": 0.4656777083873749, - "learning_rate": 1.5016288023014767e-05, - "loss": 0.0829, - "step": 29450 - }, - { - "epoch": 0.7476837162076405, - "grad_norm": 0.5402419567108154, - "learning_rate": 1.5015441891949066e-05, - "loss": 0.0547, - "step": 29455 - }, - { - "epoch": 0.7478106358674959, - "grad_norm": 0.6234961152076721, - "learning_rate": 1.5014595760883362e-05, - "loss": 0.0692, - "step": 29460 - }, - { - "epoch": 0.7479375555273512, - "grad_norm": 0.6057019233703613, - "learning_rate": 1.501374962981766e-05, - "loss": 0.0676, - "step": 29465 - }, - { - "epoch": 0.7480644751872065, - "grad_norm": 0.5897005796432495, - "learning_rate": 1.5012903498751959e-05, - "loss": 0.075, - "step": 29470 - }, - { - "epoch": 0.7481913948470618, - "grad_norm": 0.41661930084228516, - "learning_rate": 1.5012057367686257e-05, - "loss": 0.0744, - "step": 29475 - }, - { - "epoch": 0.7483183145069171, - "grad_norm": 0.5922761559486389, - "learning_rate": 1.5011211236620552e-05, - "loss": 0.072, - "step": 29480 - }, - { - "epoch": 0.7484452341667724, - "grad_norm": 0.516040027141571, - "learning_rate": 1.5010365105554851e-05, - "loss": 0.0747, - "step": 29485 - }, - { - "epoch": 0.7485721538266278, - "grad_norm": 1.0038280487060547, - "learning_rate": 1.500951897448915e-05, - "loss": 0.0756, - "step": 29490 - }, - { - "epoch": 0.748699073486483, - "grad_norm": 0.6666210293769836, - "learning_rate": 1.500867284342345e-05, - "loss": 0.0888, - "step": 29495 - }, - { - "epoch": 0.7488259931463384, - "grad_norm": 0.25433292984962463, - "learning_rate": 1.5007826712357744e-05, - "loss": 0.0861, - "step": 29500 - }, - { - "epoch": 0.7489529128061937, - "grad_norm": 0.43251073360443115, - "learning_rate": 1.5006980581292043e-05, - "loss": 0.068, - "step": 29505 - }, - { - "epoch": 0.749079832466049, - "grad_norm": 0.2602269947528839, - "learning_rate": 1.5006134450226341e-05, - "loss": 0.0513, - "step": 29510 - }, - { - "epoch": 0.7492067521259043, - "grad_norm": 2.5019757747650146, - "learning_rate": 1.500528831916064e-05, - "loss": 0.0842, - "step": 29515 - }, - { - "epoch": 0.7493336717857596, - "grad_norm": 0.5245572328567505, - "learning_rate": 1.5004442188094936e-05, - "loss": 0.085, - "step": 29520 - }, - { - "epoch": 0.7494605914456149, - "grad_norm": 0.6160480380058289, - "learning_rate": 1.5003596057029235e-05, - "loss": 0.0758, - "step": 29525 - }, - { - "epoch": 0.7495875111054703, - "grad_norm": 0.5174956321716309, - "learning_rate": 1.5002749925963533e-05, - "loss": 0.0616, - "step": 29530 - }, - { - "epoch": 0.7497144307653255, - "grad_norm": 0.7363651394844055, - "learning_rate": 1.5001903794897832e-05, - "loss": 0.0671, - "step": 29535 - }, - { - "epoch": 0.7498413504251809, - "grad_norm": 0.5853056311607361, - "learning_rate": 1.5001057663832128e-05, - "loss": 0.0902, - "step": 29540 - }, - { - "epoch": 0.7499682700850362, - "grad_norm": 0.4836338758468628, - "learning_rate": 1.5000211532766427e-05, - "loss": 0.0679, - "step": 29545 - }, - { - "epoch": 0.7500951897448915, - "grad_norm": 0.4978012144565582, - "learning_rate": 1.4999365401700725e-05, - "loss": 0.0843, - "step": 29550 - }, - { - "epoch": 0.7502221094047468, - "grad_norm": 0.45237281918525696, - "learning_rate": 1.4998519270635023e-05, - "loss": 0.1056, - "step": 29555 - }, - { - "epoch": 0.7503490290646021, - "grad_norm": 0.55535888671875, - "learning_rate": 1.499767313956932e-05, - "loss": 0.0817, - "step": 29560 - }, - { - "epoch": 0.7504759487244574, - "grad_norm": 0.6387419104576111, - "learning_rate": 1.4996827008503619e-05, - "loss": 0.0926, - "step": 29565 - }, - { - "epoch": 0.7506028683843128, - "grad_norm": 0.5325235724449158, - "learning_rate": 1.4995980877437917e-05, - "loss": 0.0709, - "step": 29570 - }, - { - "epoch": 0.750729788044168, - "grad_norm": 0.5773869156837463, - "learning_rate": 1.4995134746372215e-05, - "loss": 0.0874, - "step": 29575 - }, - { - "epoch": 0.7508567077040234, - "grad_norm": 0.5291109085083008, - "learning_rate": 1.4994288615306512e-05, - "loss": 0.065, - "step": 29580 - }, - { - "epoch": 0.7509836273638787, - "grad_norm": 0.37285420298576355, - "learning_rate": 1.499344248424081e-05, - "loss": 0.0607, - "step": 29585 - }, - { - "epoch": 0.7511105470237339, - "grad_norm": 0.40408453345298767, - "learning_rate": 1.4992596353175109e-05, - "loss": 0.0683, - "step": 29590 - }, - { - "epoch": 0.7512374666835893, - "grad_norm": 0.524634063243866, - "learning_rate": 1.4991750222109407e-05, - "loss": 0.0522, - "step": 29595 - }, - { - "epoch": 0.7513643863434446, - "grad_norm": 0.43153446912765503, - "learning_rate": 1.4990904091043704e-05, - "loss": 0.0662, - "step": 29600 - }, - { - "epoch": 0.7514913060032999, - "grad_norm": 0.4864256978034973, - "learning_rate": 1.4990057959978002e-05, - "loss": 0.0647, - "step": 29605 - }, - { - "epoch": 0.7516182256631552, - "grad_norm": 0.43027666211128235, - "learning_rate": 1.49892118289123e-05, - "loss": 0.0847, - "step": 29610 - }, - { - "epoch": 0.7517451453230105, - "grad_norm": 0.5384827852249146, - "learning_rate": 1.49883656978466e-05, - "loss": 0.082, - "step": 29615 - }, - { - "epoch": 0.7518720649828659, - "grad_norm": 0.4932291805744171, - "learning_rate": 1.4987519566780894e-05, - "loss": 0.0753, - "step": 29620 - }, - { - "epoch": 0.7519989846427212, - "grad_norm": 0.766000509262085, - "learning_rate": 1.4986673435715193e-05, - "loss": 0.1068, - "step": 29625 - }, - { - "epoch": 0.7521259043025764, - "grad_norm": 1.094312310218811, - "learning_rate": 1.4985827304649491e-05, - "loss": 0.0613, - "step": 29630 - }, - { - "epoch": 0.7522528239624318, - "grad_norm": 0.6779441833496094, - "learning_rate": 1.498498117358379e-05, - "loss": 0.0526, - "step": 29635 - }, - { - "epoch": 0.7523797436222871, - "grad_norm": 0.5117449164390564, - "learning_rate": 1.4984135042518086e-05, - "loss": 0.0523, - "step": 29640 - }, - { - "epoch": 0.7525066632821424, - "grad_norm": 0.5868191719055176, - "learning_rate": 1.4983288911452385e-05, - "loss": 0.0744, - "step": 29645 - }, - { - "epoch": 0.7526335829419977, - "grad_norm": 0.6942634582519531, - "learning_rate": 1.4982442780386683e-05, - "loss": 0.0844, - "step": 29650 - }, - { - "epoch": 0.752760502601853, - "grad_norm": 0.5478904843330383, - "learning_rate": 1.4981596649320981e-05, - "loss": 0.0705, - "step": 29655 - }, - { - "epoch": 0.7528874222617084, - "grad_norm": 0.4551319181919098, - "learning_rate": 1.4980750518255278e-05, - "loss": 0.0743, - "step": 29660 - }, - { - "epoch": 0.7530143419215637, - "grad_norm": 0.4157784879207611, - "learning_rate": 1.4979904387189576e-05, - "loss": 0.0516, - "step": 29665 - }, - { - "epoch": 0.7531412615814189, - "grad_norm": 0.8041831851005554, - "learning_rate": 1.4979058256123875e-05, - "loss": 0.0851, - "step": 29670 - }, - { - "epoch": 0.7532681812412743, - "grad_norm": 0.5145654678344727, - "learning_rate": 1.4978212125058173e-05, - "loss": 0.0881, - "step": 29675 - }, - { - "epoch": 0.7533951009011296, - "grad_norm": 1.2028818130493164, - "learning_rate": 1.497736599399247e-05, - "loss": 0.0836, - "step": 29680 - }, - { - "epoch": 0.7535220205609849, - "grad_norm": 0.7105884552001953, - "learning_rate": 1.4976519862926768e-05, - "loss": 0.0534, - "step": 29685 - }, - { - "epoch": 0.7536489402208402, - "grad_norm": 0.7454654574394226, - "learning_rate": 1.4975673731861067e-05, - "loss": 0.0693, - "step": 29690 - }, - { - "epoch": 0.7537758598806955, - "grad_norm": 0.42729970812797546, - "learning_rate": 1.4974827600795365e-05, - "loss": 0.0692, - "step": 29695 - }, - { - "epoch": 0.7539027795405508, - "grad_norm": 0.5493413805961609, - "learning_rate": 1.4973981469729662e-05, - "loss": 0.0661, - "step": 29700 - }, - { - "epoch": 0.7540296992004062, - "grad_norm": 0.4867919385433197, - "learning_rate": 1.497313533866396e-05, - "loss": 0.071, - "step": 29705 - }, - { - "epoch": 0.7541566188602614, - "grad_norm": 0.5953279137611389, - "learning_rate": 1.4972289207598259e-05, - "loss": 0.0766, - "step": 29710 - }, - { - "epoch": 0.7542835385201168, - "grad_norm": 0.662907063961029, - "learning_rate": 1.4971443076532557e-05, - "loss": 0.0752, - "step": 29715 - }, - { - "epoch": 0.7544104581799721, - "grad_norm": 0.29934924840927124, - "learning_rate": 1.4970596945466854e-05, - "loss": 0.0528, - "step": 29720 - }, - { - "epoch": 0.7545373778398274, - "grad_norm": 0.8729093074798584, - "learning_rate": 1.4969750814401152e-05, - "loss": 0.0874, - "step": 29725 - }, - { - "epoch": 0.7546642974996827, - "grad_norm": 0.5261000394821167, - "learning_rate": 1.496890468333545e-05, - "loss": 0.0682, - "step": 29730 - }, - { - "epoch": 0.754791217159538, - "grad_norm": 0.5229023694992065, - "learning_rate": 1.4968058552269749e-05, - "loss": 0.0642, - "step": 29735 - }, - { - "epoch": 0.7549181368193933, - "grad_norm": 0.6798512935638428, - "learning_rate": 1.4967212421204046e-05, - "loss": 0.0882, - "step": 29740 - }, - { - "epoch": 0.7550450564792487, - "grad_norm": 1.297752857208252, - "learning_rate": 1.4966366290138344e-05, - "loss": 0.0919, - "step": 29745 - }, - { - "epoch": 0.7551719761391039, - "grad_norm": 0.6955862641334534, - "learning_rate": 1.4965520159072642e-05, - "loss": 0.0776, - "step": 29750 - }, - { - "epoch": 0.7552988957989593, - "grad_norm": 0.6500052213668823, - "learning_rate": 1.4964674028006941e-05, - "loss": 0.0711, - "step": 29755 - }, - { - "epoch": 0.7554258154588146, - "grad_norm": 0.5278045535087585, - "learning_rate": 1.4963827896941236e-05, - "loss": 0.0521, - "step": 29760 - }, - { - "epoch": 0.7555527351186698, - "grad_norm": 1.8142311573028564, - "learning_rate": 1.4962981765875534e-05, - "loss": 0.0743, - "step": 29765 - }, - { - "epoch": 0.7556796547785252, - "grad_norm": 0.5149450302124023, - "learning_rate": 1.4962135634809833e-05, - "loss": 0.071, - "step": 29770 - }, - { - "epoch": 0.7558065744383805, - "grad_norm": 0.3879036605358124, - "learning_rate": 1.4961289503744131e-05, - "loss": 0.0641, - "step": 29775 - }, - { - "epoch": 0.7559334940982358, - "grad_norm": 0.41836419701576233, - "learning_rate": 1.4960443372678428e-05, - "loss": 0.0785, - "step": 29780 - }, - { - "epoch": 0.7560604137580911, - "grad_norm": 0.47589805722236633, - "learning_rate": 1.4959597241612726e-05, - "loss": 0.0878, - "step": 29785 - }, - { - "epoch": 0.7561873334179464, - "grad_norm": 1.3729356527328491, - "learning_rate": 1.4958751110547025e-05, - "loss": 0.0667, - "step": 29790 - }, - { - "epoch": 0.7563142530778018, - "grad_norm": 0.38263261318206787, - "learning_rate": 1.4957904979481323e-05, - "loss": 0.0761, - "step": 29795 - }, - { - "epoch": 0.7564411727376571, - "grad_norm": 0.5796956419944763, - "learning_rate": 1.495705884841562e-05, - "loss": 0.0737, - "step": 29800 - }, - { - "epoch": 0.7565680923975123, - "grad_norm": 0.37834733724594116, - "learning_rate": 1.4956212717349918e-05, - "loss": 0.0669, - "step": 29805 - }, - { - "epoch": 0.7566950120573677, - "grad_norm": 0.46348610520362854, - "learning_rate": 1.4955366586284217e-05, - "loss": 0.0584, - "step": 29810 - }, - { - "epoch": 0.756821931717223, - "grad_norm": 0.6900023221969604, - "learning_rate": 1.4954520455218515e-05, - "loss": 0.0633, - "step": 29815 - }, - { - "epoch": 0.7569488513770783, - "grad_norm": 0.7331473231315613, - "learning_rate": 1.4953674324152812e-05, - "loss": 0.0756, - "step": 29820 - }, - { - "epoch": 0.7570757710369336, - "grad_norm": 0.4344514012336731, - "learning_rate": 1.495282819308711e-05, - "loss": 0.0545, - "step": 29825 - }, - { - "epoch": 0.7572026906967889, - "grad_norm": 1.109406590461731, - "learning_rate": 1.4951982062021408e-05, - "loss": 0.0846, - "step": 29830 - }, - { - "epoch": 0.7573296103566443, - "grad_norm": 0.7990314960479736, - "learning_rate": 1.4951135930955707e-05, - "loss": 0.0935, - "step": 29835 - }, - { - "epoch": 0.7574565300164996, - "grad_norm": 0.39833390712738037, - "learning_rate": 1.4950289799890004e-05, - "loss": 0.0643, - "step": 29840 - }, - { - "epoch": 0.7575834496763548, - "grad_norm": 0.6242804527282715, - "learning_rate": 1.4949443668824302e-05, - "loss": 0.0706, - "step": 29845 - }, - { - "epoch": 0.7577103693362102, - "grad_norm": 0.38884738087654114, - "learning_rate": 1.49485975377586e-05, - "loss": 0.0618, - "step": 29850 - }, - { - "epoch": 0.7578372889960655, - "grad_norm": 0.4600982069969177, - "learning_rate": 1.4947751406692899e-05, - "loss": 0.0592, - "step": 29855 - }, - { - "epoch": 0.7579642086559208, - "grad_norm": 0.44843173027038574, - "learning_rate": 1.4946905275627195e-05, - "loss": 0.065, - "step": 29860 - }, - { - "epoch": 0.7580911283157761, - "grad_norm": 0.6397937536239624, - "learning_rate": 1.4946059144561494e-05, - "loss": 0.0884, - "step": 29865 - }, - { - "epoch": 0.7582180479756314, - "grad_norm": 0.5592297315597534, - "learning_rate": 1.4945213013495792e-05, - "loss": 0.0875, - "step": 29870 - }, - { - "epoch": 0.7583449676354868, - "grad_norm": 0.575652003288269, - "learning_rate": 1.494436688243009e-05, - "loss": 0.0707, - "step": 29875 - }, - { - "epoch": 0.7584718872953421, - "grad_norm": 0.8700566291809082, - "learning_rate": 1.4943520751364386e-05, - "loss": 0.0745, - "step": 29880 - }, - { - "epoch": 0.7585988069551973, - "grad_norm": 0.45290303230285645, - "learning_rate": 1.4942674620298686e-05, - "loss": 0.0768, - "step": 29885 - }, - { - "epoch": 0.7587257266150527, - "grad_norm": 0.3171761631965637, - "learning_rate": 1.4941828489232984e-05, - "loss": 0.0603, - "step": 29890 - }, - { - "epoch": 0.758852646274908, - "grad_norm": 0.4780208468437195, - "learning_rate": 1.4940982358167283e-05, - "loss": 0.0731, - "step": 29895 - }, - { - "epoch": 0.7589795659347633, - "grad_norm": 0.6445929408073425, - "learning_rate": 1.4940136227101578e-05, - "loss": 0.0804, - "step": 29900 - }, - { - "epoch": 0.7591064855946186, - "grad_norm": 0.8894585967063904, - "learning_rate": 1.4939290096035876e-05, - "loss": 0.0901, - "step": 29905 - }, - { - "epoch": 0.7592334052544739, - "grad_norm": 0.37375855445861816, - "learning_rate": 1.4938443964970174e-05, - "loss": 0.072, - "step": 29910 - }, - { - "epoch": 0.7593603249143293, - "grad_norm": 0.3967142701148987, - "learning_rate": 1.4937597833904473e-05, - "loss": 0.0654, - "step": 29915 - }, - { - "epoch": 0.7594872445741846, - "grad_norm": 0.5756111145019531, - "learning_rate": 1.493675170283877e-05, - "loss": 0.0848, - "step": 29920 - }, - { - "epoch": 0.7596141642340398, - "grad_norm": 0.5455024838447571, - "learning_rate": 1.4935905571773068e-05, - "loss": 0.0578, - "step": 29925 - }, - { - "epoch": 0.7597410838938952, - "grad_norm": 0.5772226452827454, - "learning_rate": 1.4935059440707366e-05, - "loss": 0.065, - "step": 29930 - }, - { - "epoch": 0.7598680035537505, - "grad_norm": 0.5768173336982727, - "learning_rate": 1.4934213309641665e-05, - "loss": 0.0689, - "step": 29935 - }, - { - "epoch": 0.7599949232136057, - "grad_norm": 0.5401756167411804, - "learning_rate": 1.4933367178575963e-05, - "loss": 0.058, - "step": 29940 - }, - { - "epoch": 0.7601218428734611, - "grad_norm": 0.5979132056236267, - "learning_rate": 1.493252104751026e-05, - "loss": 0.0883, - "step": 29945 - }, - { - "epoch": 0.7602487625333164, - "grad_norm": 0.7020392417907715, - "learning_rate": 1.4931674916444558e-05, - "loss": 0.0663, - "step": 29950 - }, - { - "epoch": 0.7603756821931718, - "grad_norm": 0.5310001969337463, - "learning_rate": 1.4930828785378857e-05, - "loss": 0.0722, - "step": 29955 - }, - { - "epoch": 0.760502601853027, - "grad_norm": 0.7375436425209045, - "learning_rate": 1.4929982654313155e-05, - "loss": 0.071, - "step": 29960 - }, - { - "epoch": 0.7606295215128823, - "grad_norm": 0.5252154469490051, - "learning_rate": 1.4929136523247452e-05, - "loss": 0.0667, - "step": 29965 - }, - { - "epoch": 0.7607564411727377, - "grad_norm": 0.7287197113037109, - "learning_rate": 1.492829039218175e-05, - "loss": 0.0772, - "step": 29970 - }, - { - "epoch": 0.760883360832593, - "grad_norm": 1.311398983001709, - "learning_rate": 1.4927444261116049e-05, - "loss": 0.0549, - "step": 29975 - }, - { - "epoch": 0.7610102804924482, - "grad_norm": 0.6240014433860779, - "learning_rate": 1.4926598130050347e-05, - "loss": 0.0578, - "step": 29980 - }, - { - "epoch": 0.7611372001523036, - "grad_norm": 0.5280140042304993, - "learning_rate": 1.4925751998984644e-05, - "loss": 0.0664, - "step": 29985 - }, - { - "epoch": 0.7612641198121589, - "grad_norm": 0.4737898111343384, - "learning_rate": 1.4924905867918942e-05, - "loss": 0.0592, - "step": 29990 - }, - { - "epoch": 0.7613910394720143, - "grad_norm": 0.6120059490203857, - "learning_rate": 1.492405973685324e-05, - "loss": 0.0829, - "step": 29995 - }, - { - "epoch": 0.7615179591318695, - "grad_norm": 1.5761935710906982, - "learning_rate": 1.4923213605787539e-05, - "loss": 0.0782, - "step": 30000 - }, - { - "epoch": 0.7616448787917248, - "grad_norm": 0.4019160568714142, - "learning_rate": 1.4922367474721836e-05, - "loss": 0.0854, - "step": 30005 - }, - { - "epoch": 0.7617717984515802, - "grad_norm": 0.4776358902454376, - "learning_rate": 1.4921521343656134e-05, - "loss": 0.0661, - "step": 30010 - }, - { - "epoch": 0.7618987181114355, - "grad_norm": 0.5293766260147095, - "learning_rate": 1.4920675212590432e-05, - "loss": 0.0649, - "step": 30015 - }, - { - "epoch": 0.7620256377712907, - "grad_norm": 0.6001604795455933, - "learning_rate": 1.491982908152473e-05, - "loss": 0.0932, - "step": 30020 - }, - { - "epoch": 0.7621525574311461, - "grad_norm": 0.5147014856338501, - "learning_rate": 1.4918982950459027e-05, - "loss": 0.0779, - "step": 30025 - }, - { - "epoch": 0.7622794770910014, - "grad_norm": 0.4256642758846283, - "learning_rate": 1.4918136819393326e-05, - "loss": 0.0901, - "step": 30030 - }, - { - "epoch": 0.7624063967508568, - "grad_norm": 0.9620353579521179, - "learning_rate": 1.4917290688327624e-05, - "loss": 0.094, - "step": 30035 - }, - { - "epoch": 0.762533316410712, - "grad_norm": 0.5547955632209778, - "learning_rate": 1.4916444557261923e-05, - "loss": 0.0843, - "step": 30040 - }, - { - "epoch": 0.7626602360705673, - "grad_norm": 0.9371635913848877, - "learning_rate": 1.4915598426196218e-05, - "loss": 0.0765, - "step": 30045 - }, - { - "epoch": 0.7627871557304227, - "grad_norm": 0.48247161507606506, - "learning_rate": 1.4914752295130516e-05, - "loss": 0.0598, - "step": 30050 - }, - { - "epoch": 0.762914075390278, - "grad_norm": 0.6551575064659119, - "learning_rate": 1.4913906164064815e-05, - "loss": 0.0935, - "step": 30055 - }, - { - "epoch": 0.7630409950501332, - "grad_norm": 0.4488649070262909, - "learning_rate": 1.4913060032999115e-05, - "loss": 0.0733, - "step": 30060 - }, - { - "epoch": 0.7631679147099886, - "grad_norm": 0.7957088947296143, - "learning_rate": 1.491221390193341e-05, - "loss": 0.0894, - "step": 30065 - }, - { - "epoch": 0.7632948343698439, - "grad_norm": 0.45995205640792847, - "learning_rate": 1.4911367770867708e-05, - "loss": 0.0683, - "step": 30070 - }, - { - "epoch": 0.7634217540296993, - "grad_norm": 0.5659030079841614, - "learning_rate": 1.4910521639802006e-05, - "loss": 0.0747, - "step": 30075 - }, - { - "epoch": 0.7635486736895545, - "grad_norm": 0.4601350724697113, - "learning_rate": 1.4909675508736305e-05, - "loss": 0.0612, - "step": 30080 - }, - { - "epoch": 0.7636755933494098, - "grad_norm": 0.4109269678592682, - "learning_rate": 1.4908829377670602e-05, - "loss": 0.0866, - "step": 30085 - }, - { - "epoch": 0.7638025130092652, - "grad_norm": 0.47649136185646057, - "learning_rate": 1.49079832466049e-05, - "loss": 0.0619, - "step": 30090 - }, - { - "epoch": 0.7639294326691205, - "grad_norm": 0.4922964870929718, - "learning_rate": 1.4907137115539198e-05, - "loss": 0.0668, - "step": 30095 - }, - { - "epoch": 0.7640563523289757, - "grad_norm": 1.4113637208938599, - "learning_rate": 1.4906290984473497e-05, - "loss": 0.0688, - "step": 30100 - }, - { - "epoch": 0.7641832719888311, - "grad_norm": 0.7833617329597473, - "learning_rate": 1.4905444853407793e-05, - "loss": 0.0777, - "step": 30105 - }, - { - "epoch": 0.7643101916486864, - "grad_norm": 0.4685598611831665, - "learning_rate": 1.4904598722342092e-05, - "loss": 0.0723, - "step": 30110 - }, - { - "epoch": 0.7644371113085416, - "grad_norm": 0.5594310164451599, - "learning_rate": 1.490375259127639e-05, - "loss": 0.0808, - "step": 30115 - }, - { - "epoch": 0.764564030968397, - "grad_norm": 0.6624841690063477, - "learning_rate": 1.4902906460210689e-05, - "loss": 0.08, - "step": 30120 - }, - { - "epoch": 0.7646909506282523, - "grad_norm": 0.5104637742042542, - "learning_rate": 1.4902060329144985e-05, - "loss": 0.0596, - "step": 30125 - }, - { - "epoch": 0.7648178702881077, - "grad_norm": 0.6444082260131836, - "learning_rate": 1.4901214198079284e-05, - "loss": 0.0567, - "step": 30130 - }, - { - "epoch": 0.7649447899479629, - "grad_norm": 0.46281588077545166, - "learning_rate": 1.4900368067013582e-05, - "loss": 0.0711, - "step": 30135 - }, - { - "epoch": 0.7650717096078182, - "grad_norm": 1.2092818021774292, - "learning_rate": 1.489952193594788e-05, - "loss": 0.0699, - "step": 30140 - }, - { - "epoch": 0.7651986292676736, - "grad_norm": 0.6539837718009949, - "learning_rate": 1.4898675804882177e-05, - "loss": 0.0789, - "step": 30145 - }, - { - "epoch": 0.7653255489275289, - "grad_norm": 0.5425387620925903, - "learning_rate": 1.4897829673816476e-05, - "loss": 0.0608, - "step": 30150 - }, - { - "epoch": 0.7654524685873841, - "grad_norm": 0.4669956564903259, - "learning_rate": 1.4896983542750774e-05, - "loss": 0.0568, - "step": 30155 - }, - { - "epoch": 0.7655793882472395, - "grad_norm": 0.680367112159729, - "learning_rate": 1.4896137411685072e-05, - "loss": 0.0819, - "step": 30160 - }, - { - "epoch": 0.7657063079070948, - "grad_norm": 0.28739216923713684, - "learning_rate": 1.489529128061937e-05, - "loss": 0.0652, - "step": 30165 - }, - { - "epoch": 0.7658332275669502, - "grad_norm": 0.49954766035079956, - "learning_rate": 1.4894445149553668e-05, - "loss": 0.0751, - "step": 30170 - }, - { - "epoch": 0.7659601472268054, - "grad_norm": 0.5181225538253784, - "learning_rate": 1.4893599018487966e-05, - "loss": 0.0796, - "step": 30175 - }, - { - "epoch": 0.7660870668866607, - "grad_norm": 0.26571303606033325, - "learning_rate": 1.4892752887422264e-05, - "loss": 0.0688, - "step": 30180 - }, - { - "epoch": 0.7662139865465161, - "grad_norm": 0.5462766289710999, - "learning_rate": 1.489190675635656e-05, - "loss": 0.0873, - "step": 30185 - }, - { - "epoch": 0.7663409062063714, - "grad_norm": 0.5059949159622192, - "learning_rate": 1.4891060625290858e-05, - "loss": 0.0567, - "step": 30190 - }, - { - "epoch": 0.7664678258662266, - "grad_norm": 0.7935439348220825, - "learning_rate": 1.4890214494225156e-05, - "loss": 0.0701, - "step": 30195 - }, - { - "epoch": 0.766594745526082, - "grad_norm": 0.49052372574806213, - "learning_rate": 1.4889368363159455e-05, - "loss": 0.0784, - "step": 30200 - }, - { - "epoch": 0.7667216651859373, - "grad_norm": 0.692450761795044, - "learning_rate": 1.4888522232093751e-05, - "loss": 0.0685, - "step": 30205 - }, - { - "epoch": 0.7668485848457927, - "grad_norm": 0.6642053723335266, - "learning_rate": 1.488767610102805e-05, - "loss": 0.0605, - "step": 30210 - }, - { - "epoch": 0.7669755045056479, - "grad_norm": 0.4950358271598816, - "learning_rate": 1.4886829969962348e-05, - "loss": 0.0842, - "step": 30215 - }, - { - "epoch": 0.7671024241655032, - "grad_norm": 0.5119472742080688, - "learning_rate": 1.4885983838896647e-05, - "loss": 0.0668, - "step": 30220 - }, - { - "epoch": 0.7672293438253586, - "grad_norm": 0.4668087363243103, - "learning_rate": 1.4885137707830943e-05, - "loss": 0.0594, - "step": 30225 - }, - { - "epoch": 0.7673562634852139, - "grad_norm": 0.5948642492294312, - "learning_rate": 1.4884291576765242e-05, - "loss": 0.0926, - "step": 30230 - }, - { - "epoch": 0.7674831831450691, - "grad_norm": 0.3609302341938019, - "learning_rate": 1.488344544569954e-05, - "loss": 0.0801, - "step": 30235 - }, - { - "epoch": 0.7676101028049245, - "grad_norm": 0.4156063497066498, - "learning_rate": 1.4882599314633838e-05, - "loss": 0.0615, - "step": 30240 - }, - { - "epoch": 0.7677370224647798, - "grad_norm": 0.6675037145614624, - "learning_rate": 1.4881753183568135e-05, - "loss": 0.0644, - "step": 30245 - }, - { - "epoch": 0.7678639421246352, - "grad_norm": 0.8472537398338318, - "learning_rate": 1.4880907052502434e-05, - "loss": 0.0742, - "step": 30250 - }, - { - "epoch": 0.7679908617844904, - "grad_norm": 2.053394317626953, - "learning_rate": 1.4880060921436732e-05, - "loss": 0.085, - "step": 30255 - }, - { - "epoch": 0.7681177814443457, - "grad_norm": 0.7767593860626221, - "learning_rate": 1.487921479037103e-05, - "loss": 0.0792, - "step": 30260 - }, - { - "epoch": 0.7682447011042011, - "grad_norm": 0.8150629997253418, - "learning_rate": 1.4878368659305327e-05, - "loss": 0.0814, - "step": 30265 - }, - { - "epoch": 0.7683716207640564, - "grad_norm": 0.39425066113471985, - "learning_rate": 1.4877522528239625e-05, - "loss": 0.0672, - "step": 30270 - }, - { - "epoch": 0.7684985404239116, - "grad_norm": 0.5923006534576416, - "learning_rate": 1.4876676397173924e-05, - "loss": 0.0729, - "step": 30275 - }, - { - "epoch": 0.768625460083767, - "grad_norm": 0.6596285104751587, - "learning_rate": 1.4875830266108222e-05, - "loss": 0.0712, - "step": 30280 - }, - { - "epoch": 0.7687523797436223, - "grad_norm": 0.4602755308151245, - "learning_rate": 1.4874984135042519e-05, - "loss": 0.0743, - "step": 30285 - }, - { - "epoch": 0.7688792994034775, - "grad_norm": 0.5921041369438171, - "learning_rate": 1.4874138003976817e-05, - "loss": 0.0651, - "step": 30290 - }, - { - "epoch": 0.7690062190633329, - "grad_norm": 0.5446915626525879, - "learning_rate": 1.4873291872911116e-05, - "loss": 0.0728, - "step": 30295 - }, - { - "epoch": 0.7691331387231882, - "grad_norm": 0.5777859091758728, - "learning_rate": 1.4872445741845414e-05, - "loss": 0.061, - "step": 30300 - }, - { - "epoch": 0.7692600583830436, - "grad_norm": 0.6727259755134583, - "learning_rate": 1.487159961077971e-05, - "loss": 0.0709, - "step": 30305 - }, - { - "epoch": 0.7693869780428988, - "grad_norm": 0.6136509776115417, - "learning_rate": 1.487075347971401e-05, - "loss": 0.0756, - "step": 30310 - }, - { - "epoch": 0.7695138977027541, - "grad_norm": 0.6126183271408081, - "learning_rate": 1.4869907348648308e-05, - "loss": 0.0597, - "step": 30315 - }, - { - "epoch": 0.7696408173626095, - "grad_norm": 0.3909342586994171, - "learning_rate": 1.4869061217582606e-05, - "loss": 0.0706, - "step": 30320 - }, - { - "epoch": 0.7697677370224648, - "grad_norm": 0.8121792674064636, - "learning_rate": 1.4868215086516901e-05, - "loss": 0.0863, - "step": 30325 - }, - { - "epoch": 0.76989465668232, - "grad_norm": 0.6135502457618713, - "learning_rate": 1.48673689554512e-05, - "loss": 0.057, - "step": 30330 - }, - { - "epoch": 0.7700215763421754, - "grad_norm": 0.5371590256690979, - "learning_rate": 1.4866522824385498e-05, - "loss": 0.0629, - "step": 30335 - }, - { - "epoch": 0.7701484960020307, - "grad_norm": 0.4908217489719391, - "learning_rate": 1.4865676693319796e-05, - "loss": 0.0848, - "step": 30340 - }, - { - "epoch": 0.7702754156618861, - "grad_norm": 0.6081997752189636, - "learning_rate": 1.4864830562254093e-05, - "loss": 0.0787, - "step": 30345 - }, - { - "epoch": 0.7704023353217413, - "grad_norm": 0.560494065284729, - "learning_rate": 1.4863984431188391e-05, - "loss": 0.0642, - "step": 30350 - }, - { - "epoch": 0.7705292549815966, - "grad_norm": 0.3568163514137268, - "learning_rate": 1.486313830012269e-05, - "loss": 0.0766, - "step": 30355 - }, - { - "epoch": 0.770656174641452, - "grad_norm": 0.41624337434768677, - "learning_rate": 1.4862292169056988e-05, - "loss": 0.0816, - "step": 30360 - }, - { - "epoch": 0.7707830943013073, - "grad_norm": 0.5065106749534607, - "learning_rate": 1.4861446037991285e-05, - "loss": 0.0805, - "step": 30365 - }, - { - "epoch": 0.7709100139611625, - "grad_norm": 1.1701816320419312, - "learning_rate": 1.4860599906925583e-05, - "loss": 0.0775, - "step": 30370 - }, - { - "epoch": 0.7710369336210179, - "grad_norm": 0.6785059571266174, - "learning_rate": 1.4859753775859882e-05, - "loss": 0.059, - "step": 30375 - }, - { - "epoch": 0.7711638532808732, - "grad_norm": 0.4670150876045227, - "learning_rate": 1.485890764479418e-05, - "loss": 0.0845, - "step": 30380 - }, - { - "epoch": 0.7712907729407286, - "grad_norm": 0.5429149866104126, - "learning_rate": 1.4858061513728477e-05, - "loss": 0.0864, - "step": 30385 - }, - { - "epoch": 0.7714176926005838, - "grad_norm": 0.6390225887298584, - "learning_rate": 1.4857215382662775e-05, - "loss": 0.0805, - "step": 30390 - }, - { - "epoch": 0.7715446122604391, - "grad_norm": 0.41784724593162537, - "learning_rate": 1.4856369251597074e-05, - "loss": 0.0678, - "step": 30395 - }, - { - "epoch": 0.7716715319202945, - "grad_norm": 0.8452796936035156, - "learning_rate": 1.4855523120531372e-05, - "loss": 0.0616, - "step": 30400 - }, - { - "epoch": 0.7717984515801498, - "grad_norm": 0.49870625138282776, - "learning_rate": 1.4854676989465669e-05, - "loss": 0.0645, - "step": 30405 - }, - { - "epoch": 0.771925371240005, - "grad_norm": 0.47993922233581543, - "learning_rate": 1.4853830858399967e-05, - "loss": 0.0639, - "step": 30410 - }, - { - "epoch": 0.7720522908998604, - "grad_norm": 0.5391345024108887, - "learning_rate": 1.4852984727334266e-05, - "loss": 0.0676, - "step": 30415 - }, - { - "epoch": 0.7721792105597157, - "grad_norm": 0.5722408294677734, - "learning_rate": 1.4852138596268564e-05, - "loss": 0.0665, - "step": 30420 - }, - { - "epoch": 0.7723061302195711, - "grad_norm": 1.6434823274612427, - "learning_rate": 1.485129246520286e-05, - "loss": 0.0778, - "step": 30425 - }, - { - "epoch": 0.7724330498794263, - "grad_norm": 0.4961491525173187, - "learning_rate": 1.4850446334137159e-05, - "loss": 0.0782, - "step": 30430 - }, - { - "epoch": 0.7725599695392816, - "grad_norm": 0.41744646430015564, - "learning_rate": 1.4849600203071457e-05, - "loss": 0.0775, - "step": 30435 - }, - { - "epoch": 0.772686889199137, - "grad_norm": 0.35135531425476074, - "learning_rate": 1.4848754072005756e-05, - "loss": 0.0686, - "step": 30440 - }, - { - "epoch": 0.7728138088589923, - "grad_norm": 0.5640256404876709, - "learning_rate": 1.4847907940940051e-05, - "loss": 0.0604, - "step": 30445 - }, - { - "epoch": 0.7729407285188475, - "grad_norm": 0.4191535413265228, - "learning_rate": 1.4847061809874351e-05, - "loss": 0.0701, - "step": 30450 - }, - { - "epoch": 0.7730676481787029, - "grad_norm": 0.4101462960243225, - "learning_rate": 1.484621567880865e-05, - "loss": 0.057, - "step": 30455 - }, - { - "epoch": 0.7731945678385582, - "grad_norm": 0.7200819253921509, - "learning_rate": 1.4845369547742948e-05, - "loss": 0.0672, - "step": 30460 - }, - { - "epoch": 0.7733214874984135, - "grad_norm": 0.46943381428718567, - "learning_rate": 1.4844523416677246e-05, - "loss": 0.0602, - "step": 30465 - }, - { - "epoch": 0.7734484071582688, - "grad_norm": 0.48293331265449524, - "learning_rate": 1.4843677285611541e-05, - "loss": 0.0796, - "step": 30470 - }, - { - "epoch": 0.7735753268181241, - "grad_norm": 0.5462067127227783, - "learning_rate": 1.484283115454584e-05, - "loss": 0.0696, - "step": 30475 - }, - { - "epoch": 0.7737022464779795, - "grad_norm": 0.5972978472709656, - "learning_rate": 1.4841985023480138e-05, - "loss": 0.0849, - "step": 30480 - }, - { - "epoch": 0.7738291661378347, - "grad_norm": 0.5914309620857239, - "learning_rate": 1.4841138892414438e-05, - "loss": 0.079, - "step": 30485 - }, - { - "epoch": 0.77395608579769, - "grad_norm": 0.6182258129119873, - "learning_rate": 1.4840292761348733e-05, - "loss": 0.0559, - "step": 30490 - }, - { - "epoch": 0.7740830054575454, - "grad_norm": 0.4651826322078705, - "learning_rate": 1.4839446630283032e-05, - "loss": 0.08, - "step": 30495 - }, - { - "epoch": 0.7742099251174007, - "grad_norm": 0.39365822076797485, - "learning_rate": 1.483860049921733e-05, - "loss": 0.0664, - "step": 30500 - }, - { - "epoch": 0.774336844777256, - "grad_norm": 0.6731411814689636, - "learning_rate": 1.4837754368151628e-05, - "loss": 0.0962, - "step": 30505 - }, - { - "epoch": 0.7744637644371113, - "grad_norm": 0.8519754409790039, - "learning_rate": 1.4836908237085925e-05, - "loss": 0.0835, - "step": 30510 - }, - { - "epoch": 0.7745906840969666, - "grad_norm": 1.0141290426254272, - "learning_rate": 1.4836062106020223e-05, - "loss": 0.0757, - "step": 30515 - }, - { - "epoch": 0.774717603756822, - "grad_norm": 0.6135609745979309, - "learning_rate": 1.4835215974954522e-05, - "loss": 0.0774, - "step": 30520 - }, - { - "epoch": 0.7748445234166772, - "grad_norm": 0.43043050169944763, - "learning_rate": 1.483436984388882e-05, - "loss": 0.0724, - "step": 30525 - }, - { - "epoch": 0.7749714430765325, - "grad_norm": 0.5069081783294678, - "learning_rate": 1.4833523712823117e-05, - "loss": 0.0544, - "step": 30530 - }, - { - "epoch": 0.7750983627363879, - "grad_norm": 1.9563994407653809, - "learning_rate": 1.4832677581757415e-05, - "loss": 0.0756, - "step": 30535 - }, - { - "epoch": 0.7752252823962432, - "grad_norm": 0.5397592782974243, - "learning_rate": 1.4831831450691714e-05, - "loss": 0.0642, - "step": 30540 - }, - { - "epoch": 0.7753522020560984, - "grad_norm": 0.7320276498794556, - "learning_rate": 1.4830985319626012e-05, - "loss": 0.0924, - "step": 30545 - }, - { - "epoch": 0.7754791217159538, - "grad_norm": 0.5129140615463257, - "learning_rate": 1.4830139188560309e-05, - "loss": 0.0621, - "step": 30550 - }, - { - "epoch": 0.7756060413758091, - "grad_norm": 0.3828965425491333, - "learning_rate": 1.4829293057494607e-05, - "loss": 0.0673, - "step": 30555 - }, - { - "epoch": 0.7757329610356645, - "grad_norm": 0.819807231426239, - "learning_rate": 1.4828446926428906e-05, - "loss": 0.0617, - "step": 30560 - }, - { - "epoch": 0.7758598806955197, - "grad_norm": 0.48565420508384705, - "learning_rate": 1.4827600795363204e-05, - "loss": 0.0658, - "step": 30565 - }, - { - "epoch": 0.775986800355375, - "grad_norm": 0.5064593553543091, - "learning_rate": 1.48267546642975e-05, - "loss": 0.0623, - "step": 30570 - }, - { - "epoch": 0.7761137200152304, - "grad_norm": 0.6187313199043274, - "learning_rate": 1.48259085332318e-05, - "loss": 0.0806, - "step": 30575 - }, - { - "epoch": 0.7762406396750857, - "grad_norm": 0.46752530336380005, - "learning_rate": 1.4825062402166098e-05, - "loss": 0.065, - "step": 30580 - }, - { - "epoch": 0.776367559334941, - "grad_norm": 0.4986947476863861, - "learning_rate": 1.4824216271100396e-05, - "loss": 0.0663, - "step": 30585 - }, - { - "epoch": 0.7764944789947963, - "grad_norm": 0.36943739652633667, - "learning_rate": 1.4823370140034693e-05, - "loss": 0.0689, - "step": 30590 - }, - { - "epoch": 0.7766213986546516, - "grad_norm": 0.5383251309394836, - "learning_rate": 1.4822524008968991e-05, - "loss": 0.087, - "step": 30595 - }, - { - "epoch": 0.776748318314507, - "grad_norm": 0.6788865923881531, - "learning_rate": 1.482167787790329e-05, - "loss": 0.07, - "step": 30600 - }, - { - "epoch": 0.7768752379743622, - "grad_norm": 0.396610289812088, - "learning_rate": 1.4820831746837588e-05, - "loss": 0.0934, - "step": 30605 - }, - { - "epoch": 0.7770021576342175, - "grad_norm": 0.3157411217689514, - "learning_rate": 1.4819985615771883e-05, - "loss": 0.0548, - "step": 30610 - }, - { - "epoch": 0.7771290772940729, - "grad_norm": 0.6718364953994751, - "learning_rate": 1.4819139484706181e-05, - "loss": 0.0616, - "step": 30615 - }, - { - "epoch": 0.7772559969539282, - "grad_norm": 0.6272094249725342, - "learning_rate": 1.481829335364048e-05, - "loss": 0.0678, - "step": 30620 - }, - { - "epoch": 0.7773829166137834, - "grad_norm": 0.5178799033164978, - "learning_rate": 1.4817447222574778e-05, - "loss": 0.0624, - "step": 30625 - }, - { - "epoch": 0.7775098362736388, - "grad_norm": 0.41116195917129517, - "learning_rate": 1.4816601091509075e-05, - "loss": 0.0675, - "step": 30630 - }, - { - "epoch": 0.7776367559334941, - "grad_norm": 0.7192368507385254, - "learning_rate": 1.4815754960443373e-05, - "loss": 0.0659, - "step": 30635 - }, - { - "epoch": 0.7777636755933494, - "grad_norm": 0.5193915367126465, - "learning_rate": 1.4814908829377672e-05, - "loss": 0.0596, - "step": 30640 - }, - { - "epoch": 0.7778905952532047, - "grad_norm": 0.6167057156562805, - "learning_rate": 1.481406269831197e-05, - "loss": 0.0892, - "step": 30645 - }, - { - "epoch": 0.77801751491306, - "grad_norm": 0.46683770418167114, - "learning_rate": 1.4813216567246267e-05, - "loss": 0.0804, - "step": 30650 - }, - { - "epoch": 0.7781444345729154, - "grad_norm": 0.5757700204849243, - "learning_rate": 1.4812370436180565e-05, - "loss": 0.0728, - "step": 30655 - }, - { - "epoch": 0.7782713542327706, - "grad_norm": 0.8998892307281494, - "learning_rate": 1.4811524305114864e-05, - "loss": 0.0709, - "step": 30660 - }, - { - "epoch": 0.778398273892626, - "grad_norm": 0.8724544644355774, - "learning_rate": 1.4810678174049162e-05, - "loss": 0.0911, - "step": 30665 - }, - { - "epoch": 0.7785251935524813, - "grad_norm": 0.42512497305870056, - "learning_rate": 1.4809832042983459e-05, - "loss": 0.0792, - "step": 30670 - }, - { - "epoch": 0.7786521132123366, - "grad_norm": 0.48368510603904724, - "learning_rate": 1.4808985911917757e-05, - "loss": 0.0592, - "step": 30675 - }, - { - "epoch": 0.7787790328721919, - "grad_norm": 0.45768478512763977, - "learning_rate": 1.4808139780852055e-05, - "loss": 0.066, - "step": 30680 - }, - { - "epoch": 0.7789059525320472, - "grad_norm": 0.4730912148952484, - "learning_rate": 1.4807293649786354e-05, - "loss": 0.0769, - "step": 30685 - }, - { - "epoch": 0.7790328721919025, - "grad_norm": 0.35665127635002136, - "learning_rate": 1.480644751872065e-05, - "loss": 0.0598, - "step": 30690 - }, - { - "epoch": 0.7791597918517579, - "grad_norm": 0.5809875726699829, - "learning_rate": 1.4805601387654949e-05, - "loss": 0.0787, - "step": 30695 - }, - { - "epoch": 0.7792867115116131, - "grad_norm": 0.588925838470459, - "learning_rate": 1.4804755256589247e-05, - "loss": 0.0692, - "step": 30700 - }, - { - "epoch": 0.7794136311714684, - "grad_norm": 0.47749602794647217, - "learning_rate": 1.4803909125523546e-05, - "loss": 0.0834, - "step": 30705 - }, - { - "epoch": 0.7795405508313238, - "grad_norm": 0.22754353284835815, - "learning_rate": 1.4803062994457842e-05, - "loss": 0.0606, - "step": 30710 - }, - { - "epoch": 0.7796674704911791, - "grad_norm": 0.5650187134742737, - "learning_rate": 1.4802216863392141e-05, - "loss": 0.0594, - "step": 30715 - }, - { - "epoch": 0.7797943901510344, - "grad_norm": 0.6660652756690979, - "learning_rate": 1.480137073232644e-05, - "loss": 0.0558, - "step": 30720 - }, - { - "epoch": 0.7799213098108897, - "grad_norm": 0.4821256697177887, - "learning_rate": 1.4800524601260738e-05, - "loss": 0.0865, - "step": 30725 - }, - { - "epoch": 0.780048229470745, - "grad_norm": 0.6139072775840759, - "learning_rate": 1.4799678470195034e-05, - "loss": 0.08, - "step": 30730 - }, - { - "epoch": 0.7801751491306004, - "grad_norm": 0.8270078897476196, - "learning_rate": 1.4798832339129333e-05, - "loss": 0.0795, - "step": 30735 - }, - { - "epoch": 0.7803020687904556, - "grad_norm": 0.5354040265083313, - "learning_rate": 1.4797986208063631e-05, - "loss": 0.0624, - "step": 30740 - }, - { - "epoch": 0.780428988450311, - "grad_norm": 0.48216837644577026, - "learning_rate": 1.479714007699793e-05, - "loss": 0.0607, - "step": 30745 - }, - { - "epoch": 0.7805559081101663, - "grad_norm": 0.41486406326293945, - "learning_rate": 1.4796293945932225e-05, - "loss": 0.0437, - "step": 30750 - }, - { - "epoch": 0.7806828277700216, - "grad_norm": 0.4032995402812958, - "learning_rate": 1.4795447814866523e-05, - "loss": 0.0821, - "step": 30755 - }, - { - "epoch": 0.7808097474298769, - "grad_norm": 0.4619740843772888, - "learning_rate": 1.4794601683800821e-05, - "loss": 0.049, - "step": 30760 - }, - { - "epoch": 0.7809366670897322, - "grad_norm": 0.5550089478492737, - "learning_rate": 1.479375555273512e-05, - "loss": 0.0719, - "step": 30765 - }, - { - "epoch": 0.7810635867495875, - "grad_norm": 0.3332850933074951, - "learning_rate": 1.4792909421669417e-05, - "loss": 0.0623, - "step": 30770 - }, - { - "epoch": 0.7811905064094429, - "grad_norm": 0.5312429666519165, - "learning_rate": 1.4792063290603715e-05, - "loss": 0.0699, - "step": 30775 - }, - { - "epoch": 0.7813174260692981, - "grad_norm": 0.37628087401390076, - "learning_rate": 1.4791217159538013e-05, - "loss": 0.0547, - "step": 30780 - }, - { - "epoch": 0.7814443457291534, - "grad_norm": 0.7119603753089905, - "learning_rate": 1.4790371028472312e-05, - "loss": 0.0624, - "step": 30785 - }, - { - "epoch": 0.7815712653890088, - "grad_norm": 0.6147637963294983, - "learning_rate": 1.4789524897406608e-05, - "loss": 0.0695, - "step": 30790 - }, - { - "epoch": 0.781698185048864, - "grad_norm": 0.4485616981983185, - "learning_rate": 1.4788678766340907e-05, - "loss": 0.0686, - "step": 30795 - }, - { - "epoch": 0.7818251047087194, - "grad_norm": 0.7875478863716125, - "learning_rate": 1.4787832635275205e-05, - "loss": 0.0761, - "step": 30800 - }, - { - "epoch": 0.7819520243685747, - "grad_norm": 1.08090078830719, - "learning_rate": 1.4786986504209504e-05, - "loss": 0.0634, - "step": 30805 - }, - { - "epoch": 0.78207894402843, - "grad_norm": 0.7091637849807739, - "learning_rate": 1.47861403731438e-05, - "loss": 0.0636, - "step": 30810 - }, - { - "epoch": 0.7822058636882853, - "grad_norm": 0.41307327151298523, - "learning_rate": 1.4785294242078099e-05, - "loss": 0.0917, - "step": 30815 - }, - { - "epoch": 0.7823327833481406, - "grad_norm": 0.4309277832508087, - "learning_rate": 1.4784448111012397e-05, - "loss": 0.0748, - "step": 30820 - }, - { - "epoch": 0.7824597030079959, - "grad_norm": 0.5003345608711243, - "learning_rate": 1.4783601979946696e-05, - "loss": 0.0775, - "step": 30825 - }, - { - "epoch": 0.7825866226678513, - "grad_norm": 0.6731211543083191, - "learning_rate": 1.4782755848880992e-05, - "loss": 0.0694, - "step": 30830 - }, - { - "epoch": 0.7827135423277065, - "grad_norm": 0.3983331322669983, - "learning_rate": 1.478190971781529e-05, - "loss": 0.0553, - "step": 30835 - }, - { - "epoch": 0.7828404619875619, - "grad_norm": 0.5172597765922546, - "learning_rate": 1.4781063586749589e-05, - "loss": 0.0583, - "step": 30840 - }, - { - "epoch": 0.7829673816474172, - "grad_norm": 0.6505067944526672, - "learning_rate": 1.4780217455683887e-05, - "loss": 0.0793, - "step": 30845 - }, - { - "epoch": 0.7830943013072725, - "grad_norm": 0.5708786249160767, - "learning_rate": 1.4779371324618184e-05, - "loss": 0.0828, - "step": 30850 - }, - { - "epoch": 0.7832212209671278, - "grad_norm": 0.32028961181640625, - "learning_rate": 1.4778525193552483e-05, - "loss": 0.0678, - "step": 30855 - }, - { - "epoch": 0.7833481406269831, - "grad_norm": 0.2887182831764221, - "learning_rate": 1.4777679062486781e-05, - "loss": 0.0511, - "step": 30860 - }, - { - "epoch": 0.7834750602868384, - "grad_norm": 0.33483487367630005, - "learning_rate": 1.477683293142108e-05, - "loss": 0.0736, - "step": 30865 - }, - { - "epoch": 0.7836019799466938, - "grad_norm": 0.7583030462265015, - "learning_rate": 1.4775986800355374e-05, - "loss": 0.0836, - "step": 30870 - }, - { - "epoch": 0.783728899606549, - "grad_norm": 0.3691342771053314, - "learning_rate": 1.4775140669289675e-05, - "loss": 0.0617, - "step": 30875 - }, - { - "epoch": 0.7838558192664044, - "grad_norm": 0.522986888885498, - "learning_rate": 1.4774294538223973e-05, - "loss": 0.0734, - "step": 30880 - }, - { - "epoch": 0.7839827389262597, - "grad_norm": 0.5579305291175842, - "learning_rate": 1.4773448407158271e-05, - "loss": 0.0735, - "step": 30885 - }, - { - "epoch": 0.784109658586115, - "grad_norm": 0.7230187654495239, - "learning_rate": 1.4772602276092566e-05, - "loss": 0.0667, - "step": 30890 - }, - { - "epoch": 0.7842365782459703, - "grad_norm": 0.4178733229637146, - "learning_rate": 1.4771756145026865e-05, - "loss": 0.0579, - "step": 30895 - }, - { - "epoch": 0.7843634979058256, - "grad_norm": 0.28267133235931396, - "learning_rate": 1.4770910013961163e-05, - "loss": 0.0625, - "step": 30900 - }, - { - "epoch": 0.7844904175656809, - "grad_norm": 0.733267068862915, - "learning_rate": 1.4770063882895462e-05, - "loss": 0.1043, - "step": 30905 - }, - { - "epoch": 0.7846173372255363, - "grad_norm": 0.5525285005569458, - "learning_rate": 1.4769217751829758e-05, - "loss": 0.0669, - "step": 30910 - }, - { - "epoch": 0.7847442568853915, - "grad_norm": 0.46094128489494324, - "learning_rate": 1.4768371620764057e-05, - "loss": 0.0593, - "step": 30915 - }, - { - "epoch": 0.7848711765452469, - "grad_norm": 0.7031602263450623, - "learning_rate": 1.4767525489698355e-05, - "loss": 0.0922, - "step": 30920 - }, - { - "epoch": 0.7849980962051022, - "grad_norm": 0.5579974055290222, - "learning_rate": 1.4766679358632653e-05, - "loss": 0.078, - "step": 30925 - }, - { - "epoch": 0.7851250158649575, - "grad_norm": 0.6950144171714783, - "learning_rate": 1.476583322756695e-05, - "loss": 0.0848, - "step": 30930 - }, - { - "epoch": 0.7852519355248128, - "grad_norm": 0.5589301586151123, - "learning_rate": 1.4764987096501249e-05, - "loss": 0.0761, - "step": 30935 - }, - { - "epoch": 0.7853788551846681, - "grad_norm": 0.5518297553062439, - "learning_rate": 1.4764140965435547e-05, - "loss": 0.065, - "step": 30940 - }, - { - "epoch": 0.7855057748445234, - "grad_norm": 0.8098900318145752, - "learning_rate": 1.4763294834369845e-05, - "loss": 0.0651, - "step": 30945 - }, - { - "epoch": 0.7856326945043788, - "grad_norm": 0.5179086327552795, - "learning_rate": 1.4762448703304142e-05, - "loss": 0.0893, - "step": 30950 - }, - { - "epoch": 0.785759614164234, - "grad_norm": 0.3082595765590668, - "learning_rate": 1.476160257223844e-05, - "loss": 0.0632, - "step": 30955 - }, - { - "epoch": 0.7858865338240894, - "grad_norm": 0.5005350112915039, - "learning_rate": 1.4760756441172739e-05, - "loss": 0.0705, - "step": 30960 - }, - { - "epoch": 0.7860134534839447, - "grad_norm": 0.4449203312397003, - "learning_rate": 1.4759910310107037e-05, - "loss": 0.0557, - "step": 30965 - }, - { - "epoch": 0.7861403731437999, - "grad_norm": 1.8027777671813965, - "learning_rate": 1.4759064179041336e-05, - "loss": 0.0654, - "step": 30970 - }, - { - "epoch": 0.7862672928036553, - "grad_norm": 0.6387362480163574, - "learning_rate": 1.4758218047975632e-05, - "loss": 0.0808, - "step": 30975 - }, - { - "epoch": 0.7863942124635106, - "grad_norm": 0.5748385787010193, - "learning_rate": 1.475737191690993e-05, - "loss": 0.0523, - "step": 30980 - }, - { - "epoch": 0.7865211321233659, - "grad_norm": 0.3813713788986206, - "learning_rate": 1.475652578584423e-05, - "loss": 0.0657, - "step": 30985 - }, - { - "epoch": 0.7866480517832212, - "grad_norm": 0.6522166728973389, - "learning_rate": 1.4755679654778528e-05, - "loss": 0.0911, - "step": 30990 - }, - { - "epoch": 0.7867749714430765, - "grad_norm": 0.35927101969718933, - "learning_rate": 1.4754833523712824e-05, - "loss": 0.0516, - "step": 30995 - }, - { - "epoch": 0.7869018911029318, - "grad_norm": 0.6662034392356873, - "learning_rate": 1.4753987392647123e-05, - "loss": 0.0583, - "step": 31000 - }, - { - "epoch": 0.7870288107627872, - "grad_norm": 0.38864943385124207, - "learning_rate": 1.4753141261581421e-05, - "loss": 0.0559, - "step": 31005 - }, - { - "epoch": 0.7871557304226424, - "grad_norm": 0.6196631789207458, - "learning_rate": 1.475229513051572e-05, - "loss": 0.0758, - "step": 31010 - }, - { - "epoch": 0.7872826500824978, - "grad_norm": 0.5190602540969849, - "learning_rate": 1.4751448999450016e-05, - "loss": 0.0761, - "step": 31015 - }, - { - "epoch": 0.7874095697423531, - "grad_norm": 0.6095659732818604, - "learning_rate": 1.4750602868384315e-05, - "loss": 0.0718, - "step": 31020 - }, - { - "epoch": 0.7875364894022084, - "grad_norm": 0.4070691168308258, - "learning_rate": 1.4749756737318613e-05, - "loss": 0.0719, - "step": 31025 - }, - { - "epoch": 0.7876634090620637, - "grad_norm": 0.9937835335731506, - "learning_rate": 1.4748910606252911e-05, - "loss": 0.112, - "step": 31030 - }, - { - "epoch": 0.787790328721919, - "grad_norm": 0.6979738473892212, - "learning_rate": 1.4748064475187206e-05, - "loss": 0.0494, - "step": 31035 - }, - { - "epoch": 0.7879172483817743, - "grad_norm": 0.7408387064933777, - "learning_rate": 1.4747218344121505e-05, - "loss": 0.0744, - "step": 31040 - }, - { - "epoch": 0.7880441680416297, - "grad_norm": 0.4042978882789612, - "learning_rate": 1.4746372213055803e-05, - "loss": 0.0754, - "step": 31045 - }, - { - "epoch": 0.7881710877014849, - "grad_norm": 0.9426077008247375, - "learning_rate": 1.4745526081990103e-05, - "loss": 0.0725, - "step": 31050 - }, - { - "epoch": 0.7882980073613403, - "grad_norm": 0.5470988154411316, - "learning_rate": 1.4744679950924398e-05, - "loss": 0.0789, - "step": 31055 - }, - { - "epoch": 0.7884249270211956, - "grad_norm": 0.6255896091461182, - "learning_rate": 1.4743833819858697e-05, - "loss": 0.0549, - "step": 31060 - }, - { - "epoch": 0.7885518466810509, - "grad_norm": 0.4417186975479126, - "learning_rate": 1.4742987688792995e-05, - "loss": 0.0467, - "step": 31065 - }, - { - "epoch": 0.7886787663409062, - "grad_norm": 0.9744243025779724, - "learning_rate": 1.4742141557727294e-05, - "loss": 0.0866, - "step": 31070 - }, - { - "epoch": 0.7888056860007615, - "grad_norm": 0.6004839539527893, - "learning_rate": 1.474129542666159e-05, - "loss": 0.0801, - "step": 31075 - }, - { - "epoch": 0.7889326056606168, - "grad_norm": 0.4121416509151459, - "learning_rate": 1.4740449295595889e-05, - "loss": 0.0597, - "step": 31080 - }, - { - "epoch": 0.7890595253204722, - "grad_norm": 0.42272984981536865, - "learning_rate": 1.4739603164530187e-05, - "loss": 0.0667, - "step": 31085 - }, - { - "epoch": 0.7891864449803274, - "grad_norm": 0.35546940565109253, - "learning_rate": 1.4738757033464485e-05, - "loss": 0.0749, - "step": 31090 - }, - { - "epoch": 0.7893133646401828, - "grad_norm": 0.37792718410491943, - "learning_rate": 1.4737910902398782e-05, - "loss": 0.0687, - "step": 31095 - }, - { - "epoch": 0.7894402843000381, - "grad_norm": 0.49862733483314514, - "learning_rate": 1.473706477133308e-05, - "loss": 0.078, - "step": 31100 - }, - { - "epoch": 0.7895672039598934, - "grad_norm": 0.7323864698410034, - "learning_rate": 1.4736218640267379e-05, - "loss": 0.0837, - "step": 31105 - }, - { - "epoch": 0.7896941236197487, - "grad_norm": 0.41864004731178284, - "learning_rate": 1.4735372509201677e-05, - "loss": 0.0619, - "step": 31110 - }, - { - "epoch": 0.789821043279604, - "grad_norm": 0.41876402497291565, - "learning_rate": 1.4734526378135974e-05, - "loss": 0.0601, - "step": 31115 - }, - { - "epoch": 0.7899479629394593, - "grad_norm": 0.33513906598091125, - "learning_rate": 1.4733680247070272e-05, - "loss": 0.0494, - "step": 31120 - }, - { - "epoch": 0.7900748825993147, - "grad_norm": 0.45627814531326294, - "learning_rate": 1.4732834116004571e-05, - "loss": 0.0672, - "step": 31125 - }, - { - "epoch": 0.7902018022591699, - "grad_norm": 0.6347711682319641, - "learning_rate": 1.473198798493887e-05, - "loss": 0.0632, - "step": 31130 - }, - { - "epoch": 0.7903287219190253, - "grad_norm": 0.4594348967075348, - "learning_rate": 1.4731141853873166e-05, - "loss": 0.0856, - "step": 31135 - }, - { - "epoch": 0.7904556415788806, - "grad_norm": 0.609465479850769, - "learning_rate": 1.4730295722807464e-05, - "loss": 0.0863, - "step": 31140 - }, - { - "epoch": 0.7905825612387358, - "grad_norm": 0.5869989395141602, - "learning_rate": 1.4729449591741763e-05, - "loss": 0.0551, - "step": 31145 - }, - { - "epoch": 0.7907094808985912, - "grad_norm": 0.5911492705345154, - "learning_rate": 1.4728603460676061e-05, - "loss": 0.059, - "step": 31150 - }, - { - "epoch": 0.7908364005584465, - "grad_norm": 0.5700567960739136, - "learning_rate": 1.4727757329610358e-05, - "loss": 0.0541, - "step": 31155 - }, - { - "epoch": 0.7909633202183018, - "grad_norm": 0.5051849484443665, - "learning_rate": 1.4726911198544656e-05, - "loss": 0.0692, - "step": 31160 - }, - { - "epoch": 0.7910902398781571, - "grad_norm": 0.6776118278503418, - "learning_rate": 1.4726065067478955e-05, - "loss": 0.0671, - "step": 31165 - }, - { - "epoch": 0.7912171595380124, - "grad_norm": 0.4038228988647461, - "learning_rate": 1.4725218936413253e-05, - "loss": 0.0535, - "step": 31170 - }, - { - "epoch": 0.7913440791978678, - "grad_norm": 0.6465486288070679, - "learning_rate": 1.4724372805347548e-05, - "loss": 0.0784, - "step": 31175 - }, - { - "epoch": 0.7914709988577231, - "grad_norm": 0.773932158946991, - "learning_rate": 1.4723526674281847e-05, - "loss": 0.0767, - "step": 31180 - }, - { - "epoch": 0.7915979185175783, - "grad_norm": 0.5481513738632202, - "learning_rate": 1.4722680543216145e-05, - "loss": 0.0763, - "step": 31185 - }, - { - "epoch": 0.7917248381774337, - "grad_norm": 0.4041943848133087, - "learning_rate": 1.4721834412150443e-05, - "loss": 0.0752, - "step": 31190 - }, - { - "epoch": 0.791851757837289, - "grad_norm": 0.6799060702323914, - "learning_rate": 1.472098828108474e-05, - "loss": 0.0535, - "step": 31195 - }, - { - "epoch": 0.7919786774971443, - "grad_norm": 1.116987705230713, - "learning_rate": 1.4720142150019038e-05, - "loss": 0.0744, - "step": 31200 - }, - { - "epoch": 0.7921055971569996, - "grad_norm": 0.5516142845153809, - "learning_rate": 1.4719296018953337e-05, - "loss": 0.0598, - "step": 31205 - }, - { - "epoch": 0.7922325168168549, - "grad_norm": 0.5075421929359436, - "learning_rate": 1.4718449887887635e-05, - "loss": 0.0633, - "step": 31210 - }, - { - "epoch": 0.7923594364767103, - "grad_norm": 0.6581828594207764, - "learning_rate": 1.4717603756821932e-05, - "loss": 0.0599, - "step": 31215 - }, - { - "epoch": 0.7924863561365656, - "grad_norm": 0.5559775233268738, - "learning_rate": 1.471675762575623e-05, - "loss": 0.0874, - "step": 31220 - }, - { - "epoch": 0.7926132757964208, - "grad_norm": 0.552604079246521, - "learning_rate": 1.4715911494690529e-05, - "loss": 0.0569, - "step": 31225 - }, - { - "epoch": 0.7927401954562762, - "grad_norm": 0.5853256583213806, - "learning_rate": 1.4715065363624827e-05, - "loss": 0.0714, - "step": 31230 - }, - { - "epoch": 0.7928671151161315, - "grad_norm": 0.5384026169776917, - "learning_rate": 1.4714219232559124e-05, - "loss": 0.0593, - "step": 31235 - }, - { - "epoch": 0.7929940347759868, - "grad_norm": 0.7916586399078369, - "learning_rate": 1.4713373101493422e-05, - "loss": 0.0646, - "step": 31240 - }, - { - "epoch": 0.7931209544358421, - "grad_norm": 0.32097145915031433, - "learning_rate": 1.471252697042772e-05, - "loss": 0.0659, - "step": 31245 - }, - { - "epoch": 0.7932478740956974, - "grad_norm": 0.39184603095054626, - "learning_rate": 1.4711680839362019e-05, - "loss": 0.0797, - "step": 31250 - }, - { - "epoch": 0.7933747937555528, - "grad_norm": 0.7815892100334167, - "learning_rate": 1.4710834708296316e-05, - "loss": 0.0975, - "step": 31255 - }, - { - "epoch": 0.7935017134154081, - "grad_norm": 0.9076364636421204, - "learning_rate": 1.4709988577230614e-05, - "loss": 0.0552, - "step": 31260 - }, - { - "epoch": 0.7936286330752633, - "grad_norm": 0.27303287386894226, - "learning_rate": 1.4709142446164913e-05, - "loss": 0.0626, - "step": 31265 - }, - { - "epoch": 0.7937555527351187, - "grad_norm": 0.40939632058143616, - "learning_rate": 1.4708296315099211e-05, - "loss": 0.0598, - "step": 31270 - }, - { - "epoch": 0.793882472394974, - "grad_norm": 0.5317339301109314, - "learning_rate": 1.4707450184033508e-05, - "loss": 0.0689, - "step": 31275 - }, - { - "epoch": 0.7940093920548293, - "grad_norm": 0.8589683771133423, - "learning_rate": 1.4706604052967806e-05, - "loss": 0.0971, - "step": 31280 - }, - { - "epoch": 0.7941363117146846, - "grad_norm": 0.7081406712532043, - "learning_rate": 1.4705757921902105e-05, - "loss": 0.0644, - "step": 31285 - }, - { - "epoch": 0.7942632313745399, - "grad_norm": 0.6752145886421204, - "learning_rate": 1.4704911790836403e-05, - "loss": 0.0639, - "step": 31290 - }, - { - "epoch": 0.7943901510343953, - "grad_norm": 1.2190133333206177, - "learning_rate": 1.47040656597707e-05, - "loss": 0.0638, - "step": 31295 - }, - { - "epoch": 0.7945170706942506, - "grad_norm": 0.5084303617477417, - "learning_rate": 1.4703219528704998e-05, - "loss": 0.0599, - "step": 31300 - }, - { - "epoch": 0.7946439903541058, - "grad_norm": 0.6977577805519104, - "learning_rate": 1.4702373397639296e-05, - "loss": 0.0746, - "step": 31305 - }, - { - "epoch": 0.7947709100139612, - "grad_norm": 0.43269839882850647, - "learning_rate": 1.4701527266573595e-05, - "loss": 0.0953, - "step": 31310 - }, - { - "epoch": 0.7948978296738165, - "grad_norm": 0.7576026320457458, - "learning_rate": 1.470068113550789e-05, - "loss": 0.1003, - "step": 31315 - }, - { - "epoch": 0.7950247493336717, - "grad_norm": 0.6355413198471069, - "learning_rate": 1.4699835004442188e-05, - "loss": 0.0657, - "step": 31320 - }, - { - "epoch": 0.7951516689935271, - "grad_norm": 0.41640138626098633, - "learning_rate": 1.4698988873376487e-05, - "loss": 0.0625, - "step": 31325 - }, - { - "epoch": 0.7952785886533824, - "grad_norm": 0.5630760192871094, - "learning_rate": 1.4698142742310785e-05, - "loss": 0.06, - "step": 31330 - }, - { - "epoch": 0.7954055083132378, - "grad_norm": 0.32657647132873535, - "learning_rate": 1.4697296611245082e-05, - "loss": 0.0631, - "step": 31335 - }, - { - "epoch": 0.795532427973093, - "grad_norm": 0.9410591721534729, - "learning_rate": 1.469645048017938e-05, - "loss": 0.0715, - "step": 31340 - }, - { - "epoch": 0.7956593476329483, - "grad_norm": 0.5733791589736938, - "learning_rate": 1.4695604349113679e-05, - "loss": 0.0548, - "step": 31345 - }, - { - "epoch": 0.7957862672928037, - "grad_norm": 0.496520072221756, - "learning_rate": 1.4694758218047977e-05, - "loss": 0.0594, - "step": 31350 - }, - { - "epoch": 0.795913186952659, - "grad_norm": 0.6382497549057007, - "learning_rate": 1.4693912086982274e-05, - "loss": 0.072, - "step": 31355 - }, - { - "epoch": 0.7960401066125142, - "grad_norm": 0.8783393502235413, - "learning_rate": 1.4693065955916572e-05, - "loss": 0.067, - "step": 31360 - }, - { - "epoch": 0.7961670262723696, - "grad_norm": 1.1961106061935425, - "learning_rate": 1.469221982485087e-05, - "loss": 0.0807, - "step": 31365 - }, - { - "epoch": 0.7962939459322249, - "grad_norm": 0.8381438851356506, - "learning_rate": 1.4691373693785169e-05, - "loss": 0.0735, - "step": 31370 - }, - { - "epoch": 0.7964208655920803, - "grad_norm": 0.5200831890106201, - "learning_rate": 1.4690527562719466e-05, - "loss": 0.056, - "step": 31375 - }, - { - "epoch": 0.7965477852519355, - "grad_norm": 0.44273772835731506, - "learning_rate": 1.4689681431653764e-05, - "loss": 0.083, - "step": 31380 - }, - { - "epoch": 0.7966747049117908, - "grad_norm": 0.5617771148681641, - "learning_rate": 1.4688835300588062e-05, - "loss": 0.0577, - "step": 31385 - }, - { - "epoch": 0.7968016245716462, - "grad_norm": 0.4597710967063904, - "learning_rate": 1.468798916952236e-05, - "loss": 0.0493, - "step": 31390 - }, - { - "epoch": 0.7969285442315015, - "grad_norm": 0.3594757318496704, - "learning_rate": 1.4687143038456657e-05, - "loss": 0.0801, - "step": 31395 - }, - { - "epoch": 0.7970554638913567, - "grad_norm": 0.928960382938385, - "learning_rate": 1.4686296907390956e-05, - "loss": 0.059, - "step": 31400 - }, - { - "epoch": 0.7971823835512121, - "grad_norm": 0.6160193085670471, - "learning_rate": 1.4685450776325254e-05, - "loss": 0.0833, - "step": 31405 - }, - { - "epoch": 0.7973093032110674, - "grad_norm": 0.5799722075462341, - "learning_rate": 1.4684604645259553e-05, - "loss": 0.0694, - "step": 31410 - }, - { - "epoch": 0.7974362228709228, - "grad_norm": 0.7669366598129272, - "learning_rate": 1.468375851419385e-05, - "loss": 0.0687, - "step": 31415 - }, - { - "epoch": 0.797563142530778, - "grad_norm": 0.31648972630500793, - "learning_rate": 1.4682912383128148e-05, - "loss": 0.0677, - "step": 31420 - }, - { - "epoch": 0.7976900621906333, - "grad_norm": 0.4493318200111389, - "learning_rate": 1.4682066252062446e-05, - "loss": 0.055, - "step": 31425 - }, - { - "epoch": 0.7978169818504887, - "grad_norm": 0.8560696840286255, - "learning_rate": 1.4681220120996745e-05, - "loss": 0.0528, - "step": 31430 - }, - { - "epoch": 0.797943901510344, - "grad_norm": 0.5268526673316956, - "learning_rate": 1.468037398993104e-05, - "loss": 0.0679, - "step": 31435 - }, - { - "epoch": 0.7980708211701992, - "grad_norm": 0.6025999784469604, - "learning_rate": 1.467952785886534e-05, - "loss": 0.0698, - "step": 31440 - }, - { - "epoch": 0.7981977408300546, - "grad_norm": 0.5927044749259949, - "learning_rate": 1.4678681727799638e-05, - "loss": 0.075, - "step": 31445 - }, - { - "epoch": 0.7983246604899099, - "grad_norm": 0.6313742995262146, - "learning_rate": 1.4677835596733937e-05, - "loss": 0.089, - "step": 31450 - }, - { - "epoch": 0.7984515801497652, - "grad_norm": 0.4394491910934448, - "learning_rate": 1.4676989465668232e-05, - "loss": 0.0895, - "step": 31455 - }, - { - "epoch": 0.7985784998096205, - "grad_norm": 0.7413906455039978, - "learning_rate": 1.467614333460253e-05, - "loss": 0.0544, - "step": 31460 - }, - { - "epoch": 0.7987054194694758, - "grad_norm": 0.30309876799583435, - "learning_rate": 1.4675297203536828e-05, - "loss": 0.0607, - "step": 31465 - }, - { - "epoch": 0.7988323391293312, - "grad_norm": 0.7073938846588135, - "learning_rate": 1.4674451072471127e-05, - "loss": 0.0851, - "step": 31470 - }, - { - "epoch": 0.7989592587891865, - "grad_norm": 0.681186318397522, - "learning_rate": 1.4673604941405423e-05, - "loss": 0.0521, - "step": 31475 - }, - { - "epoch": 0.7990861784490417, - "grad_norm": 0.30737918615341187, - "learning_rate": 1.4672758810339722e-05, - "loss": 0.0683, - "step": 31480 - }, - { - "epoch": 0.7992130981088971, - "grad_norm": 0.6807456016540527, - "learning_rate": 1.467191267927402e-05, - "loss": 0.0821, - "step": 31485 - }, - { - "epoch": 0.7993400177687524, - "grad_norm": 0.6981473565101624, - "learning_rate": 1.4671066548208319e-05, - "loss": 0.0643, - "step": 31490 - }, - { - "epoch": 0.7994669374286076, - "grad_norm": 0.5720154643058777, - "learning_rate": 1.4670220417142617e-05, - "loss": 0.0722, - "step": 31495 - }, - { - "epoch": 0.799593857088463, - "grad_norm": 0.5406628251075745, - "learning_rate": 1.4669374286076914e-05, - "loss": 0.0534, - "step": 31500 - }, - { - "epoch": 0.7997207767483183, - "grad_norm": 0.7936629056930542, - "learning_rate": 1.4668528155011212e-05, - "loss": 0.0657, - "step": 31505 - }, - { - "epoch": 0.7998476964081737, - "grad_norm": 1.2765287160873413, - "learning_rate": 1.466768202394551e-05, - "loss": 0.0677, - "step": 31510 - }, - { - "epoch": 0.7999746160680289, - "grad_norm": 1.0569250583648682, - "learning_rate": 1.4666835892879809e-05, - "loss": 0.066, - "step": 31515 - }, - { - "epoch": 0.8001015357278842, - "grad_norm": 0.5301938056945801, - "learning_rate": 1.4665989761814106e-05, - "loss": 0.063, - "step": 31520 - }, - { - "epoch": 0.8002284553877396, - "grad_norm": 0.4646141529083252, - "learning_rate": 1.4665143630748404e-05, - "loss": 0.0718, - "step": 31525 - }, - { - "epoch": 0.8003553750475949, - "grad_norm": 0.5470342636108398, - "learning_rate": 1.4664297499682702e-05, - "loss": 0.0728, - "step": 31530 - }, - { - "epoch": 0.8004822947074501, - "grad_norm": 0.4109179377555847, - "learning_rate": 1.4663451368617001e-05, - "loss": 0.0802, - "step": 31535 - }, - { - "epoch": 0.8006092143673055, - "grad_norm": 0.8864179253578186, - "learning_rate": 1.4662605237551298e-05, - "loss": 0.06, - "step": 31540 - }, - { - "epoch": 0.8007361340271608, - "grad_norm": 0.5556333661079407, - "learning_rate": 1.4661759106485596e-05, - "loss": 0.0567, - "step": 31545 - }, - { - "epoch": 0.8008630536870162, - "grad_norm": 0.4734117388725281, - "learning_rate": 1.4660912975419894e-05, - "loss": 0.0569, - "step": 31550 - }, - { - "epoch": 0.8009899733468714, - "grad_norm": 0.41964998841285706, - "learning_rate": 1.4660066844354193e-05, - "loss": 0.0633, - "step": 31555 - }, - { - "epoch": 0.8011168930067267, - "grad_norm": 0.7044942378997803, - "learning_rate": 1.465922071328849e-05, - "loss": 0.0941, - "step": 31560 - }, - { - "epoch": 0.8012438126665821, - "grad_norm": 0.9172584414482117, - "learning_rate": 1.4658374582222788e-05, - "loss": 0.0751, - "step": 31565 - }, - { - "epoch": 0.8013707323264374, - "grad_norm": 0.6531245112419128, - "learning_rate": 1.4657528451157086e-05, - "loss": 0.0648, - "step": 31570 - }, - { - "epoch": 0.8014976519862926, - "grad_norm": 0.35570046305656433, - "learning_rate": 1.4656682320091385e-05, - "loss": 0.0721, - "step": 31575 - }, - { - "epoch": 0.801624571646148, - "grad_norm": 0.8676646947860718, - "learning_rate": 1.4655836189025681e-05, - "loss": 0.0632, - "step": 31580 - }, - { - "epoch": 0.8017514913060033, - "grad_norm": 0.3987806439399719, - "learning_rate": 1.465499005795998e-05, - "loss": 0.0764, - "step": 31585 - }, - { - "epoch": 0.8018784109658587, - "grad_norm": 0.30868977308273315, - "learning_rate": 1.4654143926894278e-05, - "loss": 0.0648, - "step": 31590 - }, - { - "epoch": 0.8020053306257139, - "grad_norm": 0.3310337960720062, - "learning_rate": 1.4653297795828577e-05, - "loss": 0.0638, - "step": 31595 - }, - { - "epoch": 0.8021322502855692, - "grad_norm": 0.4115409851074219, - "learning_rate": 1.4652451664762872e-05, - "loss": 0.0725, - "step": 31600 - }, - { - "epoch": 0.8022591699454246, - "grad_norm": 0.5954260230064392, - "learning_rate": 1.465160553369717e-05, - "loss": 0.0756, - "step": 31605 - }, - { - "epoch": 0.8023860896052799, - "grad_norm": 0.6481203436851501, - "learning_rate": 1.4650759402631468e-05, - "loss": 0.0873, - "step": 31610 - }, - { - "epoch": 0.8025130092651351, - "grad_norm": 0.4669174253940582, - "learning_rate": 1.4649913271565769e-05, - "loss": 0.0721, - "step": 31615 - }, - { - "epoch": 0.8026399289249905, - "grad_norm": 0.41747725009918213, - "learning_rate": 1.4649067140500064e-05, - "loss": 0.075, - "step": 31620 - }, - { - "epoch": 0.8027668485848458, - "grad_norm": 0.4390605092048645, - "learning_rate": 1.4648221009434362e-05, - "loss": 0.0839, - "step": 31625 - }, - { - "epoch": 0.8028937682447012, - "grad_norm": 1.2405056953430176, - "learning_rate": 1.464737487836866e-05, - "loss": 0.0548, - "step": 31630 - }, - { - "epoch": 0.8030206879045564, - "grad_norm": 0.452435702085495, - "learning_rate": 1.4646528747302959e-05, - "loss": 0.0616, - "step": 31635 - }, - { - "epoch": 0.8031476075644117, - "grad_norm": 0.5780614018440247, - "learning_rate": 1.4645682616237255e-05, - "loss": 0.0965, - "step": 31640 - }, - { - "epoch": 0.8032745272242671, - "grad_norm": 0.8496705293655396, - "learning_rate": 1.4644836485171554e-05, - "loss": 0.0762, - "step": 31645 - }, - { - "epoch": 0.8034014468841224, - "grad_norm": 0.6008865833282471, - "learning_rate": 1.4643990354105852e-05, - "loss": 0.0626, - "step": 31650 - }, - { - "epoch": 0.8035283665439776, - "grad_norm": 0.7089963555335999, - "learning_rate": 1.464314422304015e-05, - "loss": 0.0682, - "step": 31655 - }, - { - "epoch": 0.803655286203833, - "grad_norm": 0.7300284504890442, - "learning_rate": 1.4642298091974447e-05, - "loss": 0.0863, - "step": 31660 - }, - { - "epoch": 0.8037822058636883, - "grad_norm": 0.46904006600379944, - "learning_rate": 1.4641451960908746e-05, - "loss": 0.0539, - "step": 31665 - }, - { - "epoch": 0.8039091255235435, - "grad_norm": 0.38605931401252747, - "learning_rate": 1.4640605829843044e-05, - "loss": 0.0572, - "step": 31670 - }, - { - "epoch": 0.8040360451833989, - "grad_norm": 0.6069431900978088, - "learning_rate": 1.4639759698777343e-05, - "loss": 0.0776, - "step": 31675 - }, - { - "epoch": 0.8041629648432542, - "grad_norm": 0.8661899566650391, - "learning_rate": 1.463891356771164e-05, - "loss": 0.0998, - "step": 31680 - }, - { - "epoch": 0.8042898845031096, - "grad_norm": 0.5753029584884644, - "learning_rate": 1.4638067436645938e-05, - "loss": 0.0618, - "step": 31685 - }, - { - "epoch": 0.8044168041629648, - "grad_norm": 0.5425769090652466, - "learning_rate": 1.4637221305580236e-05, - "loss": 0.063, - "step": 31690 - }, - { - "epoch": 0.8045437238228201, - "grad_norm": 0.5770301222801208, - "learning_rate": 1.4636375174514535e-05, - "loss": 0.0687, - "step": 31695 - }, - { - "epoch": 0.8046706434826755, - "grad_norm": 0.46857842803001404, - "learning_rate": 1.4635529043448831e-05, - "loss": 0.0611, - "step": 31700 - }, - { - "epoch": 0.8047975631425308, - "grad_norm": 0.6160946488380432, - "learning_rate": 1.463468291238313e-05, - "loss": 0.0557, - "step": 31705 - }, - { - "epoch": 0.804924482802386, - "grad_norm": 0.5910822749137878, - "learning_rate": 1.4633836781317428e-05, - "loss": 0.0554, - "step": 31710 - }, - { - "epoch": 0.8050514024622414, - "grad_norm": 0.4373597204685211, - "learning_rate": 1.4632990650251726e-05, - "loss": 0.0744, - "step": 31715 - }, - { - "epoch": 0.8051783221220967, - "grad_norm": 1.0969234704971313, - "learning_rate": 1.4632144519186023e-05, - "loss": 0.0585, - "step": 31720 - }, - { - "epoch": 0.8053052417819521, - "grad_norm": 0.41583478450775146, - "learning_rate": 1.4631298388120322e-05, - "loss": 0.0581, - "step": 31725 - }, - { - "epoch": 0.8054321614418073, - "grad_norm": 0.46000605821609497, - "learning_rate": 1.463045225705462e-05, - "loss": 0.0666, - "step": 31730 - }, - { - "epoch": 0.8055590811016626, - "grad_norm": 0.6271188855171204, - "learning_rate": 1.4629606125988918e-05, - "loss": 0.067, - "step": 31735 - }, - { - "epoch": 0.805686000761518, - "grad_norm": 0.3830825388431549, - "learning_rate": 1.4628759994923213e-05, - "loss": 0.069, - "step": 31740 - }, - { - "epoch": 0.8058129204213733, - "grad_norm": 0.41999220848083496, - "learning_rate": 1.4627913863857512e-05, - "loss": 0.0763, - "step": 31745 - }, - { - "epoch": 0.8059398400812285, - "grad_norm": 0.6005051136016846, - "learning_rate": 1.462706773279181e-05, - "loss": 0.0544, - "step": 31750 - }, - { - "epoch": 0.8060667597410839, - "grad_norm": 0.7363251447677612, - "learning_rate": 1.4626221601726109e-05, - "loss": 0.0757, - "step": 31755 - }, - { - "epoch": 0.8061936794009392, - "grad_norm": 0.5375709533691406, - "learning_rate": 1.4625375470660405e-05, - "loss": 0.0625, - "step": 31760 - }, - { - "epoch": 0.8063205990607946, - "grad_norm": 0.49116265773773193, - "learning_rate": 1.4624529339594704e-05, - "loss": 0.0708, - "step": 31765 - }, - { - "epoch": 0.8064475187206498, - "grad_norm": 0.49714866280555725, - "learning_rate": 1.4623683208529002e-05, - "loss": 0.0735, - "step": 31770 - }, - { - "epoch": 0.8065744383805051, - "grad_norm": 0.44283947348594666, - "learning_rate": 1.46228370774633e-05, - "loss": 0.0538, - "step": 31775 - }, - { - "epoch": 0.8067013580403605, - "grad_norm": 0.542914628982544, - "learning_rate": 1.4621990946397597e-05, - "loss": 0.075, - "step": 31780 - }, - { - "epoch": 0.8068282777002158, - "grad_norm": 0.7619739770889282, - "learning_rate": 1.4621144815331896e-05, - "loss": 0.0578, - "step": 31785 - }, - { - "epoch": 0.806955197360071, - "grad_norm": 0.4004208743572235, - "learning_rate": 1.4620298684266194e-05, - "loss": 0.0541, - "step": 31790 - }, - { - "epoch": 0.8070821170199264, - "grad_norm": 0.5726788640022278, - "learning_rate": 1.4619452553200492e-05, - "loss": 0.0629, - "step": 31795 - }, - { - "epoch": 0.8072090366797817, - "grad_norm": 0.7251501679420471, - "learning_rate": 1.4618606422134789e-05, - "loss": 0.0628, - "step": 31800 - }, - { - "epoch": 0.8073359563396371, - "grad_norm": 0.5653364658355713, - "learning_rate": 1.4617760291069087e-05, - "loss": 0.0701, - "step": 31805 - }, - { - "epoch": 0.8074628759994923, - "grad_norm": 0.42605581879615784, - "learning_rate": 1.4616914160003386e-05, - "loss": 0.0598, - "step": 31810 - }, - { - "epoch": 0.8075897956593476, - "grad_norm": 0.44251736998558044, - "learning_rate": 1.4616068028937684e-05, - "loss": 0.0748, - "step": 31815 - }, - { - "epoch": 0.807716715319203, - "grad_norm": 0.6827097535133362, - "learning_rate": 1.4615221897871981e-05, - "loss": 0.0547, - "step": 31820 - }, - { - "epoch": 0.8078436349790583, - "grad_norm": 0.3893918991088867, - "learning_rate": 1.461437576680628e-05, - "loss": 0.059, - "step": 31825 - }, - { - "epoch": 0.8079705546389135, - "grad_norm": 1.1470643281936646, - "learning_rate": 1.4613529635740578e-05, - "loss": 0.0505, - "step": 31830 - }, - { - "epoch": 0.8080974742987689, - "grad_norm": 0.7452414035797119, - "learning_rate": 1.4612683504674876e-05, - "loss": 0.0585, - "step": 31835 - }, - { - "epoch": 0.8082243939586242, - "grad_norm": 0.49280697107315063, - "learning_rate": 1.4611837373609173e-05, - "loss": 0.0634, - "step": 31840 - }, - { - "epoch": 0.8083513136184795, - "grad_norm": 0.3633350729942322, - "learning_rate": 1.4610991242543471e-05, - "loss": 0.0522, - "step": 31845 - }, - { - "epoch": 0.8084782332783348, - "grad_norm": 1.2988396883010864, - "learning_rate": 1.461014511147777e-05, - "loss": 0.0847, - "step": 31850 - }, - { - "epoch": 0.8086051529381901, - "grad_norm": 0.25741687417030334, - "learning_rate": 1.4609298980412068e-05, - "loss": 0.0469, - "step": 31855 - }, - { - "epoch": 0.8087320725980455, - "grad_norm": 0.7019333839416504, - "learning_rate": 1.4608452849346363e-05, - "loss": 0.095, - "step": 31860 - }, - { - "epoch": 0.8088589922579007, - "grad_norm": 0.5369635224342346, - "learning_rate": 1.4607606718280663e-05, - "loss": 0.0684, - "step": 31865 - }, - { - "epoch": 0.808985911917756, - "grad_norm": 0.8545995950698853, - "learning_rate": 1.4606760587214962e-05, - "loss": 0.0637, - "step": 31870 - }, - { - "epoch": 0.8091128315776114, - "grad_norm": 0.7118280529975891, - "learning_rate": 1.460591445614926e-05, - "loss": 0.0729, - "step": 31875 - }, - { - "epoch": 0.8092397512374667, - "grad_norm": 0.7824745774269104, - "learning_rate": 1.4605068325083555e-05, - "loss": 0.0724, - "step": 31880 - }, - { - "epoch": 0.809366670897322, - "grad_norm": 0.6420778632164001, - "learning_rate": 1.4604222194017853e-05, - "loss": 0.0662, - "step": 31885 - }, - { - "epoch": 0.8094935905571773, - "grad_norm": 0.5481047630310059, - "learning_rate": 1.4603376062952152e-05, - "loss": 0.0685, - "step": 31890 - }, - { - "epoch": 0.8096205102170326, - "grad_norm": 0.3606581687927246, - "learning_rate": 1.460252993188645e-05, - "loss": 0.0611, - "step": 31895 - }, - { - "epoch": 0.809747429876888, - "grad_norm": 0.4238505959510803, - "learning_rate": 1.4601683800820747e-05, - "loss": 0.0708, - "step": 31900 - }, - { - "epoch": 0.8098743495367432, - "grad_norm": 0.39400714635849, - "learning_rate": 1.4600837669755045e-05, - "loss": 0.0551, - "step": 31905 - }, - { - "epoch": 0.8100012691965985, - "grad_norm": 0.738487720489502, - "learning_rate": 1.4599991538689344e-05, - "loss": 0.0813, - "step": 31910 - }, - { - "epoch": 0.8101281888564539, - "grad_norm": 0.6504172682762146, - "learning_rate": 1.4599145407623642e-05, - "loss": 0.0761, - "step": 31915 - }, - { - "epoch": 0.8102551085163092, - "grad_norm": 0.47832757234573364, - "learning_rate": 1.4598299276557939e-05, - "loss": 0.0776, - "step": 31920 - }, - { - "epoch": 0.8103820281761644, - "grad_norm": 0.4687819182872772, - "learning_rate": 1.4597453145492237e-05, - "loss": 0.0627, - "step": 31925 - }, - { - "epoch": 0.8105089478360198, - "grad_norm": 0.5340430736541748, - "learning_rate": 1.4596607014426536e-05, - "loss": 0.065, - "step": 31930 - }, - { - "epoch": 0.8106358674958751, - "grad_norm": 0.40819665789604187, - "learning_rate": 1.4595760883360834e-05, - "loss": 0.0707, - "step": 31935 - }, - { - "epoch": 0.8107627871557305, - "grad_norm": 0.8096659779548645, - "learning_rate": 1.459491475229513e-05, - "loss": 0.0746, - "step": 31940 - }, - { - "epoch": 0.8108897068155857, - "grad_norm": 0.5110306143760681, - "learning_rate": 1.459406862122943e-05, - "loss": 0.0708, - "step": 31945 - }, - { - "epoch": 0.811016626475441, - "grad_norm": 0.5238982439041138, - "learning_rate": 1.4593222490163728e-05, - "loss": 0.0544, - "step": 31950 - }, - { - "epoch": 0.8111435461352964, - "grad_norm": 0.45503664016723633, - "learning_rate": 1.4592376359098026e-05, - "loss": 0.0658, - "step": 31955 - }, - { - "epoch": 0.8112704657951517, - "grad_norm": 0.39792123436927795, - "learning_rate": 1.4591530228032323e-05, - "loss": 0.0903, - "step": 31960 - }, - { - "epoch": 0.811397385455007, - "grad_norm": 0.5286582112312317, - "learning_rate": 1.4590684096966621e-05, - "loss": 0.0847, - "step": 31965 - }, - { - "epoch": 0.8115243051148623, - "grad_norm": 0.8602451086044312, - "learning_rate": 1.458983796590092e-05, - "loss": 0.0642, - "step": 31970 - }, - { - "epoch": 0.8116512247747176, - "grad_norm": 0.3356584310531616, - "learning_rate": 1.4588991834835218e-05, - "loss": 0.0616, - "step": 31975 - }, - { - "epoch": 0.811778144434573, - "grad_norm": 0.40615925192832947, - "learning_rate": 1.4588145703769515e-05, - "loss": 0.0641, - "step": 31980 - }, - { - "epoch": 0.8119050640944282, - "grad_norm": 2.2844300270080566, - "learning_rate": 1.4587299572703813e-05, - "loss": 0.0593, - "step": 31985 - }, - { - "epoch": 0.8120319837542835, - "grad_norm": 0.5911190509796143, - "learning_rate": 1.4586453441638111e-05, - "loss": 0.0638, - "step": 31990 - }, - { - "epoch": 0.8121589034141389, - "grad_norm": 0.24788208305835724, - "learning_rate": 1.458560731057241e-05, - "loss": 0.0665, - "step": 31995 - }, - { - "epoch": 0.8122858230739941, - "grad_norm": 0.7258987426757812, - "learning_rate": 1.4584761179506708e-05, - "loss": 0.0806, - "step": 32000 - }, - { - "epoch": 0.8124127427338494, - "grad_norm": 0.6861207485198975, - "learning_rate": 1.4583915048441005e-05, - "loss": 0.0747, - "step": 32005 - }, - { - "epoch": 0.8125396623937048, - "grad_norm": 1.3914625644683838, - "learning_rate": 1.4583068917375303e-05, - "loss": 0.0982, - "step": 32010 - }, - { - "epoch": 0.8126665820535601, - "grad_norm": 0.6290092468261719, - "learning_rate": 1.4582222786309602e-05, - "loss": 0.0686, - "step": 32015 - }, - { - "epoch": 0.8127935017134154, - "grad_norm": 0.6636860966682434, - "learning_rate": 1.45813766552439e-05, - "loss": 0.0697, - "step": 32020 - }, - { - "epoch": 0.8129204213732707, - "grad_norm": 0.6737001538276672, - "learning_rate": 1.4580530524178195e-05, - "loss": 0.0502, - "step": 32025 - }, - { - "epoch": 0.813047341033126, - "grad_norm": 0.4417462944984436, - "learning_rate": 1.4579684393112494e-05, - "loss": 0.0577, - "step": 32030 - }, - { - "epoch": 0.8131742606929814, - "grad_norm": 0.48130127787590027, - "learning_rate": 1.4578838262046792e-05, - "loss": 0.0569, - "step": 32035 - }, - { - "epoch": 0.8133011803528366, - "grad_norm": 0.4211369454860687, - "learning_rate": 1.4577992130981092e-05, - "loss": 0.0586, - "step": 32040 - }, - { - "epoch": 0.813428100012692, - "grad_norm": 0.39567816257476807, - "learning_rate": 1.4577145999915387e-05, - "loss": 0.0653, - "step": 32045 - }, - { - "epoch": 0.8135550196725473, - "grad_norm": 0.47054344415664673, - "learning_rate": 1.4576299868849685e-05, - "loss": 0.0674, - "step": 32050 - }, - { - "epoch": 0.8136819393324026, - "grad_norm": 0.3262748718261719, - "learning_rate": 1.4575453737783984e-05, - "loss": 0.071, - "step": 32055 - }, - { - "epoch": 0.8138088589922579, - "grad_norm": 0.6591029167175293, - "learning_rate": 1.4574607606718282e-05, - "loss": 0.0503, - "step": 32060 - }, - { - "epoch": 0.8139357786521132, - "grad_norm": 0.5387338399887085, - "learning_rate": 1.4573761475652579e-05, - "loss": 0.0565, - "step": 32065 - }, - { - "epoch": 0.8140626983119685, - "grad_norm": 0.46013471484184265, - "learning_rate": 1.4572915344586877e-05, - "loss": 0.0695, - "step": 32070 - }, - { - "epoch": 0.8141896179718239, - "grad_norm": 0.4640803933143616, - "learning_rate": 1.4572069213521176e-05, - "loss": 0.0523, - "step": 32075 - }, - { - "epoch": 0.8143165376316791, - "grad_norm": 0.6301133036613464, - "learning_rate": 1.4571223082455474e-05, - "loss": 0.0501, - "step": 32080 - }, - { - "epoch": 0.8144434572915344, - "grad_norm": 1.2549062967300415, - "learning_rate": 1.4570376951389771e-05, - "loss": 0.0693, - "step": 32085 - }, - { - "epoch": 0.8145703769513898, - "grad_norm": 0.57168048620224, - "learning_rate": 1.456953082032407e-05, - "loss": 0.0728, - "step": 32090 - }, - { - "epoch": 0.8146972966112451, - "grad_norm": 1.2547777891159058, - "learning_rate": 1.4568684689258368e-05, - "loss": 0.0532, - "step": 32095 - }, - { - "epoch": 0.8148242162711004, - "grad_norm": 0.5076450705528259, - "learning_rate": 1.4567838558192666e-05, - "loss": 0.0691, - "step": 32100 - }, - { - "epoch": 0.8149511359309557, - "grad_norm": 0.6362398266792297, - "learning_rate": 1.4566992427126963e-05, - "loss": 0.081, - "step": 32105 - }, - { - "epoch": 0.815078055590811, - "grad_norm": 0.5206549763679504, - "learning_rate": 1.4566146296061261e-05, - "loss": 0.066, - "step": 32110 - }, - { - "epoch": 0.8152049752506664, - "grad_norm": 0.463113933801651, - "learning_rate": 1.456530016499556e-05, - "loss": 0.0688, - "step": 32115 - }, - { - "epoch": 0.8153318949105216, - "grad_norm": 0.4754665195941925, - "learning_rate": 1.4564454033929858e-05, - "loss": 0.0751, - "step": 32120 - }, - { - "epoch": 0.8154588145703769, - "grad_norm": 0.671792209148407, - "learning_rate": 1.4563607902864155e-05, - "loss": 0.091, - "step": 32125 - }, - { - "epoch": 0.8155857342302323, - "grad_norm": 0.37650465965270996, - "learning_rate": 1.4562761771798453e-05, - "loss": 0.0629, - "step": 32130 - }, - { - "epoch": 0.8157126538900876, - "grad_norm": 0.6086805462837219, - "learning_rate": 1.4561915640732752e-05, - "loss": 0.0679, - "step": 32135 - }, - { - "epoch": 0.8158395735499429, - "grad_norm": 0.37701278924942017, - "learning_rate": 1.456106950966705e-05, - "loss": 0.0481, - "step": 32140 - }, - { - "epoch": 0.8159664932097982, - "grad_norm": 0.41181835532188416, - "learning_rate": 1.4560223378601347e-05, - "loss": 0.0729, - "step": 32145 - }, - { - "epoch": 0.8160934128696535, - "grad_norm": 0.5124828219413757, - "learning_rate": 1.4559377247535645e-05, - "loss": 0.0825, - "step": 32150 - }, - { - "epoch": 0.8162203325295089, - "grad_norm": 1.257502794265747, - "learning_rate": 1.4558531116469943e-05, - "loss": 0.0671, - "step": 32155 - }, - { - "epoch": 0.8163472521893641, - "grad_norm": 0.5177577137947083, - "learning_rate": 1.4557684985404242e-05, - "loss": 0.0619, - "step": 32160 - }, - { - "epoch": 0.8164741718492194, - "grad_norm": 1.0207014083862305, - "learning_rate": 1.4556838854338537e-05, - "loss": 0.0637, - "step": 32165 - }, - { - "epoch": 0.8166010915090748, - "grad_norm": 0.7706487774848938, - "learning_rate": 1.4555992723272835e-05, - "loss": 0.0626, - "step": 32170 - }, - { - "epoch": 0.81672801116893, - "grad_norm": 0.3615344762802124, - "learning_rate": 1.4555146592207134e-05, - "loss": 0.0788, - "step": 32175 - }, - { - "epoch": 0.8168549308287854, - "grad_norm": 0.537794291973114, - "learning_rate": 1.4554300461141432e-05, - "loss": 0.0638, - "step": 32180 - }, - { - "epoch": 0.8169818504886407, - "grad_norm": 1.1742591857910156, - "learning_rate": 1.4553454330075729e-05, - "loss": 0.0709, - "step": 32185 - }, - { - "epoch": 0.817108770148496, - "grad_norm": 0.6090229153633118, - "learning_rate": 1.4552608199010027e-05, - "loss": 0.0639, - "step": 32190 - }, - { - "epoch": 0.8172356898083513, - "grad_norm": 0.4591110646724701, - "learning_rate": 1.4551762067944326e-05, - "loss": 0.0563, - "step": 32195 - }, - { - "epoch": 0.8173626094682066, - "grad_norm": 1.1732603311538696, - "learning_rate": 1.4550915936878624e-05, - "loss": 0.0753, - "step": 32200 - }, - { - "epoch": 0.8174895291280619, - "grad_norm": 0.7622882723808289, - "learning_rate": 1.455006980581292e-05, - "loss": 0.0634, - "step": 32205 - }, - { - "epoch": 0.8176164487879173, - "grad_norm": 0.46045786142349243, - "learning_rate": 1.4549223674747219e-05, - "loss": 0.0603, - "step": 32210 - }, - { - "epoch": 0.8177433684477725, - "grad_norm": 0.3807072639465332, - "learning_rate": 1.4548377543681517e-05, - "loss": 0.0769, - "step": 32215 - }, - { - "epoch": 0.8178702881076279, - "grad_norm": 0.547321081161499, - "learning_rate": 1.4547531412615816e-05, - "loss": 0.071, - "step": 32220 - }, - { - "epoch": 0.8179972077674832, - "grad_norm": 1.1295654773712158, - "learning_rate": 1.4546685281550113e-05, - "loss": 0.0909, - "step": 32225 - }, - { - "epoch": 0.8181241274273385, - "grad_norm": 1.7509944438934326, - "learning_rate": 1.4545839150484411e-05, - "loss": 0.068, - "step": 32230 - }, - { - "epoch": 0.8182510470871938, - "grad_norm": 0.6088405251502991, - "learning_rate": 1.454499301941871e-05, - "loss": 0.0606, - "step": 32235 - }, - { - "epoch": 0.8183779667470491, - "grad_norm": 0.5897732973098755, - "learning_rate": 1.4544146888353008e-05, - "loss": 0.0744, - "step": 32240 - }, - { - "epoch": 0.8185048864069044, - "grad_norm": 0.8202386498451233, - "learning_rate": 1.4543300757287305e-05, - "loss": 0.0748, - "step": 32245 - }, - { - "epoch": 0.8186318060667598, - "grad_norm": 0.554581344127655, - "learning_rate": 1.4542454626221603e-05, - "loss": 0.0518, - "step": 32250 - }, - { - "epoch": 0.818758725726615, - "grad_norm": 0.5341105461120605, - "learning_rate": 1.4541608495155901e-05, - "loss": 0.0809, - "step": 32255 - }, - { - "epoch": 0.8188856453864704, - "grad_norm": 0.42349573969841003, - "learning_rate": 1.45407623640902e-05, - "loss": 0.0705, - "step": 32260 - }, - { - "epoch": 0.8190125650463257, - "grad_norm": 0.48515018820762634, - "learning_rate": 1.4539916233024496e-05, - "loss": 0.0469, - "step": 32265 - }, - { - "epoch": 0.819139484706181, - "grad_norm": 0.7894166707992554, - "learning_rate": 1.4539070101958795e-05, - "loss": 0.0786, - "step": 32270 - }, - { - "epoch": 0.8192664043660363, - "grad_norm": 0.32389867305755615, - "learning_rate": 1.4538223970893093e-05, - "loss": 0.0575, - "step": 32275 - }, - { - "epoch": 0.8193933240258916, - "grad_norm": 0.4825463593006134, - "learning_rate": 1.4537377839827392e-05, - "loss": 0.0842, - "step": 32280 - }, - { - "epoch": 0.8195202436857469, - "grad_norm": 0.5654813647270203, - "learning_rate": 1.4536531708761688e-05, - "loss": 0.0843, - "step": 32285 - }, - { - "epoch": 0.8196471633456023, - "grad_norm": 0.6517273783683777, - "learning_rate": 1.4535685577695987e-05, - "loss": 0.0997, - "step": 32290 - }, - { - "epoch": 0.8197740830054575, - "grad_norm": 0.4428234100341797, - "learning_rate": 1.4534839446630285e-05, - "loss": 0.0745, - "step": 32295 - }, - { - "epoch": 0.8199010026653129, - "grad_norm": 0.6039060354232788, - "learning_rate": 1.4533993315564584e-05, - "loss": 0.0761, - "step": 32300 - }, - { - "epoch": 0.8200279223251682, - "grad_norm": 0.5339555740356445, - "learning_rate": 1.4533147184498879e-05, - "loss": 0.0691, - "step": 32305 - }, - { - "epoch": 0.8201548419850235, - "grad_norm": 0.7925965189933777, - "learning_rate": 1.4532301053433177e-05, - "loss": 0.0645, - "step": 32310 - }, - { - "epoch": 0.8202817616448788, - "grad_norm": 0.5920505523681641, - "learning_rate": 1.4531454922367475e-05, - "loss": 0.0836, - "step": 32315 - }, - { - "epoch": 0.8204086813047341, - "grad_norm": 0.4400867521762848, - "learning_rate": 1.4530608791301774e-05, - "loss": 0.0686, - "step": 32320 - }, - { - "epoch": 0.8205356009645894, - "grad_norm": 0.3117685317993164, - "learning_rate": 1.452976266023607e-05, - "loss": 0.0675, - "step": 32325 - }, - { - "epoch": 0.8206625206244448, - "grad_norm": 0.5640902519226074, - "learning_rate": 1.4528916529170369e-05, - "loss": 0.0805, - "step": 32330 - }, - { - "epoch": 0.8207894402843, - "grad_norm": 0.3891015648841858, - "learning_rate": 1.4528070398104667e-05, - "loss": 0.0813, - "step": 32335 - }, - { - "epoch": 0.8209163599441553, - "grad_norm": 0.5366904139518738, - "learning_rate": 1.4527224267038966e-05, - "loss": 0.0782, - "step": 32340 - }, - { - "epoch": 0.8210432796040107, - "grad_norm": 0.7238845825195312, - "learning_rate": 1.4526378135973262e-05, - "loss": 0.0631, - "step": 32345 - }, - { - "epoch": 0.8211701992638659, - "grad_norm": 0.4935288727283478, - "learning_rate": 1.452553200490756e-05, - "loss": 0.0786, - "step": 32350 - }, - { - "epoch": 0.8212971189237213, - "grad_norm": 1.0742913484573364, - "learning_rate": 1.452468587384186e-05, - "loss": 0.0587, - "step": 32355 - }, - { - "epoch": 0.8214240385835766, - "grad_norm": 0.4001081883907318, - "learning_rate": 1.4523839742776158e-05, - "loss": 0.0535, - "step": 32360 - }, - { - "epoch": 0.8215509582434319, - "grad_norm": 0.4920411705970764, - "learning_rate": 1.4522993611710454e-05, - "loss": 0.0646, - "step": 32365 - }, - { - "epoch": 0.8216778779032872, - "grad_norm": 0.6015076041221619, - "learning_rate": 1.4522147480644753e-05, - "loss": 0.0614, - "step": 32370 - }, - { - "epoch": 0.8218047975631425, - "grad_norm": 0.5077353119850159, - "learning_rate": 1.4521301349579051e-05, - "loss": 0.0789, - "step": 32375 - }, - { - "epoch": 0.8219317172229978, - "grad_norm": 0.3539247512817383, - "learning_rate": 1.452045521851335e-05, - "loss": 0.0562, - "step": 32380 - }, - { - "epoch": 0.8220586368828532, - "grad_norm": 0.3793327808380127, - "learning_rate": 1.4519609087447646e-05, - "loss": 0.0483, - "step": 32385 - }, - { - "epoch": 0.8221855565427084, - "grad_norm": 0.5108028054237366, - "learning_rate": 1.4518762956381945e-05, - "loss": 0.0565, - "step": 32390 - }, - { - "epoch": 0.8223124762025638, - "grad_norm": 0.567057728767395, - "learning_rate": 1.4517916825316243e-05, - "loss": 0.0636, - "step": 32395 - }, - { - "epoch": 0.8224393958624191, - "grad_norm": 0.365479052066803, - "learning_rate": 1.4517070694250541e-05, - "loss": 0.0595, - "step": 32400 - }, - { - "epoch": 0.8225663155222744, - "grad_norm": 0.7706887125968933, - "learning_rate": 1.4516224563184838e-05, - "loss": 0.0716, - "step": 32405 - }, - { - "epoch": 0.8226932351821297, - "grad_norm": 0.9360737800598145, - "learning_rate": 1.4515378432119137e-05, - "loss": 0.053, - "step": 32410 - }, - { - "epoch": 0.822820154841985, - "grad_norm": 0.7645504474639893, - "learning_rate": 1.4514532301053435e-05, - "loss": 0.0651, - "step": 32415 - }, - { - "epoch": 0.8229470745018403, - "grad_norm": 0.4767343997955322, - "learning_rate": 1.4513686169987733e-05, - "loss": 0.0569, - "step": 32420 - }, - { - "epoch": 0.8230739941616957, - "grad_norm": 0.511388897895813, - "learning_rate": 1.4512840038922028e-05, - "loss": 0.0626, - "step": 32425 - }, - { - "epoch": 0.8232009138215509, - "grad_norm": 0.6478121876716614, - "learning_rate": 1.4511993907856328e-05, - "loss": 0.0591, - "step": 32430 - }, - { - "epoch": 0.8233278334814063, - "grad_norm": 1.229840636253357, - "learning_rate": 1.4511147776790627e-05, - "loss": 0.0566, - "step": 32435 - }, - { - "epoch": 0.8234547531412616, - "grad_norm": 0.3571224510669708, - "learning_rate": 1.4510301645724925e-05, - "loss": 0.065, - "step": 32440 - }, - { - "epoch": 0.8235816728011169, - "grad_norm": 0.5312193632125854, - "learning_rate": 1.450945551465922e-05, - "loss": 0.0565, - "step": 32445 - }, - { - "epoch": 0.8237085924609722, - "grad_norm": 0.9235612750053406, - "learning_rate": 1.4508609383593519e-05, - "loss": 0.0616, - "step": 32450 - }, - { - "epoch": 0.8238355121208275, - "grad_norm": 0.8530414700508118, - "learning_rate": 1.4507763252527817e-05, - "loss": 0.0619, - "step": 32455 - }, - { - "epoch": 0.8239624317806828, - "grad_norm": 0.5759170651435852, - "learning_rate": 1.4506917121462115e-05, - "loss": 0.0994, - "step": 32460 - }, - { - "epoch": 0.8240893514405382, - "grad_norm": 1.0487806797027588, - "learning_rate": 1.4506070990396412e-05, - "loss": 0.0634, - "step": 32465 - }, - { - "epoch": 0.8242162711003934, - "grad_norm": 0.5406204462051392, - "learning_rate": 1.450522485933071e-05, - "loss": 0.0567, - "step": 32470 - }, - { - "epoch": 0.8243431907602488, - "grad_norm": 0.3746079206466675, - "learning_rate": 1.4504378728265009e-05, - "loss": 0.071, - "step": 32475 - }, - { - "epoch": 0.8244701104201041, - "grad_norm": 0.33823534846305847, - "learning_rate": 1.4503532597199307e-05, - "loss": 0.0644, - "step": 32480 - }, - { - "epoch": 0.8245970300799594, - "grad_norm": 0.5118838548660278, - "learning_rate": 1.4502686466133604e-05, - "loss": 0.0875, - "step": 32485 - }, - { - "epoch": 0.8247239497398147, - "grad_norm": 0.6196996569633484, - "learning_rate": 1.4501840335067903e-05, - "loss": 0.0891, - "step": 32490 - }, - { - "epoch": 0.82485086939967, - "grad_norm": 0.565567135810852, - "learning_rate": 1.4500994204002201e-05, - "loss": 0.0683, - "step": 32495 - }, - { - "epoch": 0.8249777890595253, - "grad_norm": 0.5510566234588623, - "learning_rate": 1.45001480729365e-05, - "loss": 0.0701, - "step": 32500 - }, - { - "epoch": 0.8251047087193807, - "grad_norm": 0.7219123840332031, - "learning_rate": 1.4499301941870796e-05, - "loss": 0.0703, - "step": 32505 - }, - { - "epoch": 0.8252316283792359, - "grad_norm": 0.3303782045841217, - "learning_rate": 1.4498455810805094e-05, - "loss": 0.063, - "step": 32510 - }, - { - "epoch": 0.8253585480390913, - "grad_norm": 0.4117899239063263, - "learning_rate": 1.4497609679739393e-05, - "loss": 0.0634, - "step": 32515 - }, - { - "epoch": 0.8254854676989466, - "grad_norm": 0.5463738441467285, - "learning_rate": 1.4496763548673691e-05, - "loss": 0.058, - "step": 32520 - }, - { - "epoch": 0.8256123873588018, - "grad_norm": 0.49573570489883423, - "learning_rate": 1.449591741760799e-05, - "loss": 0.0471, - "step": 32525 - }, - { - "epoch": 0.8257393070186572, - "grad_norm": 0.33813732862472534, - "learning_rate": 1.4495071286542286e-05, - "loss": 0.063, - "step": 32530 - }, - { - "epoch": 0.8258662266785125, - "grad_norm": 0.34854015707969666, - "learning_rate": 1.4494225155476585e-05, - "loss": 0.0522, - "step": 32535 - }, - { - "epoch": 0.8259931463383678, - "grad_norm": 0.2877201437950134, - "learning_rate": 1.4493379024410883e-05, - "loss": 0.0737, - "step": 32540 - }, - { - "epoch": 0.8261200659982231, - "grad_norm": 0.4698816239833832, - "learning_rate": 1.4492532893345182e-05, - "loss": 0.074, - "step": 32545 - }, - { - "epoch": 0.8262469856580784, - "grad_norm": 0.6104480028152466, - "learning_rate": 1.4491686762279478e-05, - "loss": 0.0689, - "step": 32550 - }, - { - "epoch": 0.8263739053179338, - "grad_norm": 0.4385824203491211, - "learning_rate": 1.4490840631213777e-05, - "loss": 0.0743, - "step": 32555 - }, - { - "epoch": 0.8265008249777891, - "grad_norm": 2.73111891746521, - "learning_rate": 1.4489994500148075e-05, - "loss": 0.0851, - "step": 32560 - }, - { - "epoch": 0.8266277446376443, - "grad_norm": 0.6402004361152649, - "learning_rate": 1.4489148369082373e-05, - "loss": 0.0643, - "step": 32565 - }, - { - "epoch": 0.8267546642974997, - "grad_norm": 0.5210484862327576, - "learning_rate": 1.448830223801667e-05, - "loss": 0.0735, - "step": 32570 - }, - { - "epoch": 0.826881583957355, - "grad_norm": 0.49080246686935425, - "learning_rate": 1.4487456106950969e-05, - "loss": 0.0871, - "step": 32575 - }, - { - "epoch": 0.8270085036172103, - "grad_norm": 3.8296127319335938, - "learning_rate": 1.4486609975885267e-05, - "loss": 0.0617, - "step": 32580 - }, - { - "epoch": 0.8271354232770656, - "grad_norm": 0.9950587749481201, - "learning_rate": 1.4485763844819565e-05, - "loss": 0.0639, - "step": 32585 - }, - { - "epoch": 0.8272623429369209, - "grad_norm": 0.5639054775238037, - "learning_rate": 1.448491771375386e-05, - "loss": 0.06, - "step": 32590 - }, - { - "epoch": 0.8273892625967763, - "grad_norm": 0.6198403239250183, - "learning_rate": 1.4484071582688159e-05, - "loss": 0.0743, - "step": 32595 - }, - { - "epoch": 0.8275161822566316, - "grad_norm": 0.589515745639801, - "learning_rate": 1.4483225451622457e-05, - "loss": 0.1039, - "step": 32600 - }, - { - "epoch": 0.8276431019164868, - "grad_norm": 0.45968759059906006, - "learning_rate": 1.4482379320556757e-05, - "loss": 0.0943, - "step": 32605 - }, - { - "epoch": 0.8277700215763422, - "grad_norm": 0.512458086013794, - "learning_rate": 1.4481533189491052e-05, - "loss": 0.0661, - "step": 32610 - }, - { - "epoch": 0.8278969412361975, - "grad_norm": 0.6159492135047913, - "learning_rate": 1.448068705842535e-05, - "loss": 0.0675, - "step": 32615 - }, - { - "epoch": 0.8280238608960528, - "grad_norm": 0.5835955739021301, - "learning_rate": 1.4479840927359649e-05, - "loss": 0.0648, - "step": 32620 - }, - { - "epoch": 0.8281507805559081, - "grad_norm": 0.4257288873195648, - "learning_rate": 1.4478994796293947e-05, - "loss": 0.0569, - "step": 32625 - }, - { - "epoch": 0.8282777002157634, - "grad_norm": 0.6230466365814209, - "learning_rate": 1.4478148665228244e-05, - "loss": 0.0739, - "step": 32630 - }, - { - "epoch": 0.8284046198756188, - "grad_norm": 0.5193502902984619, - "learning_rate": 1.4477302534162543e-05, - "loss": 0.0639, - "step": 32635 - }, - { - "epoch": 0.8285315395354741, - "grad_norm": 0.49657219648361206, - "learning_rate": 1.4476456403096841e-05, - "loss": 0.0798, - "step": 32640 - }, - { - "epoch": 0.8286584591953293, - "grad_norm": 0.44742855429649353, - "learning_rate": 1.447561027203114e-05, - "loss": 0.0695, - "step": 32645 - }, - { - "epoch": 0.8287853788551847, - "grad_norm": 0.4987706243991852, - "learning_rate": 1.4474764140965436e-05, - "loss": 0.0776, - "step": 32650 - }, - { - "epoch": 0.82891229851504, - "grad_norm": 0.3827221095561981, - "learning_rate": 1.4473918009899735e-05, - "loss": 0.0546, - "step": 32655 - }, - { - "epoch": 0.8290392181748953, - "grad_norm": 0.3191714882850647, - "learning_rate": 1.4473071878834033e-05, - "loss": 0.0699, - "step": 32660 - }, - { - "epoch": 0.8291661378347506, - "grad_norm": 0.33762332797050476, - "learning_rate": 1.4472225747768331e-05, - "loss": 0.0767, - "step": 32665 - }, - { - "epoch": 0.8292930574946059, - "grad_norm": 0.6669378876686096, - "learning_rate": 1.4471379616702628e-05, - "loss": 0.0628, - "step": 32670 - }, - { - "epoch": 0.8294199771544613, - "grad_norm": 0.6070554256439209, - "learning_rate": 1.4470533485636926e-05, - "loss": 0.0589, - "step": 32675 - }, - { - "epoch": 0.8295468968143166, - "grad_norm": 0.42419323325157166, - "learning_rate": 1.4469687354571225e-05, - "loss": 0.0833, - "step": 32680 - }, - { - "epoch": 0.8296738164741718, - "grad_norm": 0.36352285742759705, - "learning_rate": 1.4468841223505523e-05, - "loss": 0.0693, - "step": 32685 - }, - { - "epoch": 0.8298007361340272, - "grad_norm": 0.49149399995803833, - "learning_rate": 1.446799509243982e-05, - "loss": 0.0641, - "step": 32690 - }, - { - "epoch": 0.8299276557938825, - "grad_norm": 0.4792380630970001, - "learning_rate": 1.4467148961374118e-05, - "loss": 0.0557, - "step": 32695 - }, - { - "epoch": 0.8300545754537377, - "grad_norm": 0.3428802788257599, - "learning_rate": 1.4466302830308417e-05, - "loss": 0.0623, - "step": 32700 - }, - { - "epoch": 0.8301814951135931, - "grad_norm": 0.6220483183860779, - "learning_rate": 1.4465456699242715e-05, - "loss": 0.0686, - "step": 32705 - }, - { - "epoch": 0.8303084147734484, - "grad_norm": 0.6948777437210083, - "learning_rate": 1.4464610568177012e-05, - "loss": 0.0821, - "step": 32710 - }, - { - "epoch": 0.8304353344333038, - "grad_norm": 0.7796914577484131, - "learning_rate": 1.446376443711131e-05, - "loss": 0.0759, - "step": 32715 - }, - { - "epoch": 0.830562254093159, - "grad_norm": 0.7736451625823975, - "learning_rate": 1.4462918306045609e-05, - "loss": 0.0783, - "step": 32720 - }, - { - "epoch": 0.8306891737530143, - "grad_norm": 0.5693037509918213, - "learning_rate": 1.4462072174979907e-05, - "loss": 0.0741, - "step": 32725 - }, - { - "epoch": 0.8308160934128697, - "grad_norm": 0.4782044291496277, - "learning_rate": 1.4461226043914202e-05, - "loss": 0.084, - "step": 32730 - }, - { - "epoch": 0.830943013072725, - "grad_norm": 0.4120763838291168, - "learning_rate": 1.44603799128485e-05, - "loss": 0.0705, - "step": 32735 - }, - { - "epoch": 0.8310699327325802, - "grad_norm": 0.39103877544403076, - "learning_rate": 1.4459533781782799e-05, - "loss": 0.0645, - "step": 32740 - }, - { - "epoch": 0.8311968523924356, - "grad_norm": 0.48525550961494446, - "learning_rate": 1.4458687650717097e-05, - "loss": 0.054, - "step": 32745 - }, - { - "epoch": 0.8313237720522909, - "grad_norm": 0.5419496297836304, - "learning_rate": 1.4457841519651394e-05, - "loss": 0.0645, - "step": 32750 - }, - { - "epoch": 0.8314506917121463, - "grad_norm": 0.4710750877857208, - "learning_rate": 1.4456995388585692e-05, - "loss": 0.0551, - "step": 32755 - }, - { - "epoch": 0.8315776113720015, - "grad_norm": 0.5779423713684082, - "learning_rate": 1.445614925751999e-05, - "loss": 0.0705, - "step": 32760 - }, - { - "epoch": 0.8317045310318568, - "grad_norm": 0.5158020853996277, - "learning_rate": 1.445530312645429e-05, - "loss": 0.0772, - "step": 32765 - }, - { - "epoch": 0.8318314506917122, - "grad_norm": 0.4024525582790375, - "learning_rate": 1.4454456995388586e-05, - "loss": 0.0754, - "step": 32770 - }, - { - "epoch": 0.8319583703515675, - "grad_norm": 0.5488525629043579, - "learning_rate": 1.4453610864322884e-05, - "loss": 0.0624, - "step": 32775 - }, - { - "epoch": 0.8320852900114227, - "grad_norm": 0.614681601524353, - "learning_rate": 1.4452764733257183e-05, - "loss": 0.0654, - "step": 32780 - }, - { - "epoch": 0.8322122096712781, - "grad_norm": 0.4472516179084778, - "learning_rate": 1.4451918602191481e-05, - "loss": 0.0725, - "step": 32785 - }, - { - "epoch": 0.8323391293311334, - "grad_norm": 0.5016518831253052, - "learning_rate": 1.4451072471125778e-05, - "loss": 0.0789, - "step": 32790 - }, - { - "epoch": 0.8324660489909887, - "grad_norm": 0.5342526435852051, - "learning_rate": 1.4450226340060076e-05, - "loss": 0.058, - "step": 32795 - }, - { - "epoch": 0.832592968650844, - "grad_norm": 0.6646429300308228, - "learning_rate": 1.4449380208994375e-05, - "loss": 0.0755, - "step": 32800 - }, - { - "epoch": 0.8327198883106993, - "grad_norm": 0.964902937412262, - "learning_rate": 1.4448534077928673e-05, - "loss": 0.0504, - "step": 32805 - }, - { - "epoch": 0.8328468079705547, - "grad_norm": 0.3709750473499298, - "learning_rate": 1.444768794686297e-05, - "loss": 0.0579, - "step": 32810 - }, - { - "epoch": 0.83297372763041, - "grad_norm": 0.8053092360496521, - "learning_rate": 1.4446841815797268e-05, - "loss": 0.065, - "step": 32815 - }, - { - "epoch": 0.8331006472902652, - "grad_norm": 0.4349350035190582, - "learning_rate": 1.4445995684731567e-05, - "loss": 0.0676, - "step": 32820 - }, - { - "epoch": 0.8332275669501206, - "grad_norm": 0.5250813961029053, - "learning_rate": 1.4445149553665865e-05, - "loss": 0.0907, - "step": 32825 - }, - { - "epoch": 0.8333544866099759, - "grad_norm": 0.5355914831161499, - "learning_rate": 1.4444303422600162e-05, - "loss": 0.0706, - "step": 32830 - }, - { - "epoch": 0.8334814062698312, - "grad_norm": 0.6141570806503296, - "learning_rate": 1.444345729153446e-05, - "loss": 0.0621, - "step": 32835 - }, - { - "epoch": 0.8336083259296865, - "grad_norm": 0.4461732506752014, - "learning_rate": 1.4442611160468758e-05, - "loss": 0.0621, - "step": 32840 - }, - { - "epoch": 0.8337352455895418, - "grad_norm": 0.4861437976360321, - "learning_rate": 1.4441765029403057e-05, - "loss": 0.0861, - "step": 32845 - }, - { - "epoch": 0.8338621652493972, - "grad_norm": 0.8390300273895264, - "learning_rate": 1.4440918898337354e-05, - "loss": 0.056, - "step": 32850 - }, - { - "epoch": 0.8339890849092525, - "grad_norm": 0.6755895614624023, - "learning_rate": 1.4440072767271652e-05, - "loss": 0.0802, - "step": 32855 - }, - { - "epoch": 0.8341160045691077, - "grad_norm": 0.602419912815094, - "learning_rate": 1.443922663620595e-05, - "loss": 0.0782, - "step": 32860 - }, - { - "epoch": 0.8342429242289631, - "grad_norm": 0.630896806716919, - "learning_rate": 1.4438380505140249e-05, - "loss": 0.0671, - "step": 32865 - }, - { - "epoch": 0.8343698438888184, - "grad_norm": 0.6729292869567871, - "learning_rate": 1.4437534374074544e-05, - "loss": 0.0559, - "step": 32870 - }, - { - "epoch": 0.8344967635486736, - "grad_norm": 0.4810226857662201, - "learning_rate": 1.4436688243008842e-05, - "loss": 0.0683, - "step": 32875 - }, - { - "epoch": 0.834623683208529, - "grad_norm": 0.5468842387199402, - "learning_rate": 1.443584211194314e-05, - "loss": 0.0871, - "step": 32880 - }, - { - "epoch": 0.8347506028683843, - "grad_norm": 0.5746564269065857, - "learning_rate": 1.4434995980877439e-05, - "loss": 0.0859, - "step": 32885 - }, - { - "epoch": 0.8348775225282397, - "grad_norm": 0.4655057191848755, - "learning_rate": 1.4434149849811736e-05, - "loss": 0.0685, - "step": 32890 - }, - { - "epoch": 0.8350044421880949, - "grad_norm": 0.42792806029319763, - "learning_rate": 1.4433303718746034e-05, - "loss": 0.0575, - "step": 32895 - }, - { - "epoch": 0.8351313618479502, - "grad_norm": 0.7248384356498718, - "learning_rate": 1.4432457587680333e-05, - "loss": 0.0531, - "step": 32900 - }, - { - "epoch": 0.8352582815078056, - "grad_norm": 0.5235882997512817, - "learning_rate": 1.4431611456614631e-05, - "loss": 0.0721, - "step": 32905 - }, - { - "epoch": 0.8353852011676609, - "grad_norm": 0.9353030323982239, - "learning_rate": 1.4430765325548928e-05, - "loss": 0.0784, - "step": 32910 - }, - { - "epoch": 0.8355121208275161, - "grad_norm": 0.39874908328056335, - "learning_rate": 1.4429919194483226e-05, - "loss": 0.0625, - "step": 32915 - }, - { - "epoch": 0.8356390404873715, - "grad_norm": 0.45836013555526733, - "learning_rate": 1.4429073063417524e-05, - "loss": 0.0613, - "step": 32920 - }, - { - "epoch": 0.8357659601472268, - "grad_norm": 0.5721408128738403, - "learning_rate": 1.4428226932351823e-05, - "loss": 0.0653, - "step": 32925 - }, - { - "epoch": 0.8358928798070822, - "grad_norm": 0.3255437910556793, - "learning_rate": 1.442738080128612e-05, - "loss": 0.0567, - "step": 32930 - }, - { - "epoch": 0.8360197994669374, - "grad_norm": 0.3496831953525543, - "learning_rate": 1.4426534670220418e-05, - "loss": 0.0741, - "step": 32935 - }, - { - "epoch": 0.8361467191267927, - "grad_norm": 0.29643577337265015, - "learning_rate": 1.4425688539154716e-05, - "loss": 0.0659, - "step": 32940 - }, - { - "epoch": 0.8362736387866481, - "grad_norm": 0.5018746256828308, - "learning_rate": 1.4424842408089015e-05, - "loss": 0.0821, - "step": 32945 - }, - { - "epoch": 0.8364005584465034, - "grad_norm": 0.6610234975814819, - "learning_rate": 1.4423996277023311e-05, - "loss": 0.08, - "step": 32950 - }, - { - "epoch": 0.8365274781063586, - "grad_norm": 0.6022353172302246, - "learning_rate": 1.442315014595761e-05, - "loss": 0.0626, - "step": 32955 - }, - { - "epoch": 0.836654397766214, - "grad_norm": 0.6117549538612366, - "learning_rate": 1.4422304014891908e-05, - "loss": 0.0647, - "step": 32960 - }, - { - "epoch": 0.8367813174260693, - "grad_norm": 0.5178433060646057, - "learning_rate": 1.4421457883826207e-05, - "loss": 0.0803, - "step": 32965 - }, - { - "epoch": 0.8369082370859247, - "grad_norm": 0.5786886215209961, - "learning_rate": 1.4420611752760503e-05, - "loss": 0.0718, - "step": 32970 - }, - { - "epoch": 0.8370351567457799, - "grad_norm": 0.714501678943634, - "learning_rate": 1.4419765621694802e-05, - "loss": 0.0617, - "step": 32975 - }, - { - "epoch": 0.8371620764056352, - "grad_norm": 0.7138329148292542, - "learning_rate": 1.44189194906291e-05, - "loss": 0.0749, - "step": 32980 - }, - { - "epoch": 0.8372889960654906, - "grad_norm": 0.4159245491027832, - "learning_rate": 1.4418073359563399e-05, - "loss": 0.0663, - "step": 32985 - }, - { - "epoch": 0.8374159157253459, - "grad_norm": 0.5900177359580994, - "learning_rate": 1.4417227228497694e-05, - "loss": 0.0604, - "step": 32990 - }, - { - "epoch": 0.8375428353852011, - "grad_norm": 0.47192078828811646, - "learning_rate": 1.4416381097431994e-05, - "loss": 0.0569, - "step": 32995 - }, - { - "epoch": 0.8376697550450565, - "grad_norm": 0.6264024972915649, - "learning_rate": 1.4415534966366292e-05, - "loss": 0.0685, - "step": 33000 - }, - { - "epoch": 0.8377966747049118, - "grad_norm": 0.46940627694129944, - "learning_rate": 1.441468883530059e-05, - "loss": 0.0571, - "step": 33005 - }, - { - "epoch": 0.8379235943647672, - "grad_norm": 0.4805179834365845, - "learning_rate": 1.4413842704234885e-05, - "loss": 0.0639, - "step": 33010 - }, - { - "epoch": 0.8380505140246224, - "grad_norm": 2.3784632682800293, - "learning_rate": 1.4412996573169184e-05, - "loss": 0.0684, - "step": 33015 - }, - { - "epoch": 0.8381774336844777, - "grad_norm": 0.48729655146598816, - "learning_rate": 1.4412150442103482e-05, - "loss": 0.0585, - "step": 33020 - }, - { - "epoch": 0.8383043533443331, - "grad_norm": 0.3300178647041321, - "learning_rate": 1.441130431103778e-05, - "loss": 0.0421, - "step": 33025 - }, - { - "epoch": 0.8384312730041883, - "grad_norm": 0.4107823669910431, - "learning_rate": 1.441045817997208e-05, - "loss": 0.0789, - "step": 33030 - }, - { - "epoch": 0.8385581926640436, - "grad_norm": 0.4778127670288086, - "learning_rate": 1.4409612048906376e-05, - "loss": 0.0771, - "step": 33035 - }, - { - "epoch": 0.838685112323899, - "grad_norm": 0.47594690322875977, - "learning_rate": 1.4408765917840674e-05, - "loss": 0.0666, - "step": 33040 - }, - { - "epoch": 0.8388120319837543, - "grad_norm": 0.41224005818367004, - "learning_rate": 1.4407919786774973e-05, - "loss": 0.0745, - "step": 33045 - }, - { - "epoch": 0.8389389516436095, - "grad_norm": 0.4445095360279083, - "learning_rate": 1.4407073655709271e-05, - "loss": 0.0806, - "step": 33050 - }, - { - "epoch": 0.8390658713034649, - "grad_norm": 0.511183500289917, - "learning_rate": 1.4406227524643568e-05, - "loss": 0.1037, - "step": 33055 - }, - { - "epoch": 0.8391927909633202, - "grad_norm": 0.6216638088226318, - "learning_rate": 1.4405381393577866e-05, - "loss": 0.0646, - "step": 33060 - }, - { - "epoch": 0.8393197106231756, - "grad_norm": 0.5588344931602478, - "learning_rate": 1.4404535262512165e-05, - "loss": 0.0557, - "step": 33065 - }, - { - "epoch": 0.8394466302830308, - "grad_norm": 0.6707053780555725, - "learning_rate": 1.4403689131446463e-05, - "loss": 0.0631, - "step": 33070 - }, - { - "epoch": 0.8395735499428861, - "grad_norm": 0.7907190322875977, - "learning_rate": 1.440284300038076e-05, - "loss": 0.0667, - "step": 33075 - }, - { - "epoch": 0.8397004696027415, - "grad_norm": 0.4750858545303345, - "learning_rate": 1.4401996869315058e-05, - "loss": 0.0637, - "step": 33080 - }, - { - "epoch": 0.8398273892625968, - "grad_norm": 0.4876639246940613, - "learning_rate": 1.4401150738249356e-05, - "loss": 0.0703, - "step": 33085 - }, - { - "epoch": 0.839954308922452, - "grad_norm": 0.9885450601577759, - "learning_rate": 1.4400304607183655e-05, - "loss": 0.0789, - "step": 33090 - }, - { - "epoch": 0.8400812285823074, - "grad_norm": 0.529279887676239, - "learning_rate": 1.4399458476117952e-05, - "loss": 0.0531, - "step": 33095 - }, - { - "epoch": 0.8402081482421627, - "grad_norm": 0.48990482091903687, - "learning_rate": 1.439861234505225e-05, - "loss": 0.0892, - "step": 33100 - }, - { - "epoch": 0.8403350679020181, - "grad_norm": 0.4782700538635254, - "learning_rate": 1.4397766213986548e-05, - "loss": 0.0827, - "step": 33105 - }, - { - "epoch": 0.8404619875618733, - "grad_norm": 0.5945051312446594, - "learning_rate": 1.4396920082920847e-05, - "loss": 0.079, - "step": 33110 - }, - { - "epoch": 0.8405889072217286, - "grad_norm": 0.9060770869255066, - "learning_rate": 1.4396073951855143e-05, - "loss": 0.0583, - "step": 33115 - }, - { - "epoch": 0.840715826881584, - "grad_norm": 0.5947684645652771, - "learning_rate": 1.4395227820789442e-05, - "loss": 0.052, - "step": 33120 - }, - { - "epoch": 0.8408427465414393, - "grad_norm": 0.8086190819740295, - "learning_rate": 1.439438168972374e-05, - "loss": 0.0829, - "step": 33125 - }, - { - "epoch": 0.8409696662012945, - "grad_norm": 0.7434649467468262, - "learning_rate": 1.4393535558658039e-05, - "loss": 0.0634, - "step": 33130 - }, - { - "epoch": 0.8410965858611499, - "grad_norm": 0.7763369083404541, - "learning_rate": 1.4392689427592335e-05, - "loss": 0.0822, - "step": 33135 - }, - { - "epoch": 0.8412235055210052, - "grad_norm": 0.8062777519226074, - "learning_rate": 1.4391843296526634e-05, - "loss": 0.0726, - "step": 33140 - }, - { - "epoch": 0.8413504251808606, - "grad_norm": 1.0335768461227417, - "learning_rate": 1.4390997165460932e-05, - "loss": 0.0693, - "step": 33145 - }, - { - "epoch": 0.8414773448407158, - "grad_norm": 0.5449428558349609, - "learning_rate": 1.439015103439523e-05, - "loss": 0.0625, - "step": 33150 - }, - { - "epoch": 0.8416042645005711, - "grad_norm": 0.5364847183227539, - "learning_rate": 1.4389304903329526e-05, - "loss": 0.0481, - "step": 33155 - }, - { - "epoch": 0.8417311841604265, - "grad_norm": 0.5666654109954834, - "learning_rate": 1.4388458772263824e-05, - "loss": 0.0716, - "step": 33160 - }, - { - "epoch": 0.8418581038202818, - "grad_norm": 0.6670176386833191, - "learning_rate": 1.4387612641198122e-05, - "loss": 0.0854, - "step": 33165 - }, - { - "epoch": 0.841985023480137, - "grad_norm": 0.7500194311141968, - "learning_rate": 1.4386766510132422e-05, - "loss": 0.0807, - "step": 33170 - }, - { - "epoch": 0.8421119431399924, - "grad_norm": 0.4269331097602844, - "learning_rate": 1.4385920379066718e-05, - "loss": 0.0612, - "step": 33175 - }, - { - "epoch": 0.8422388627998477, - "grad_norm": 0.8794809579849243, - "learning_rate": 1.4385074248001016e-05, - "loss": 0.0608, - "step": 33180 - }, - { - "epoch": 0.8423657824597031, - "grad_norm": 0.6367028951644897, - "learning_rate": 1.4384228116935314e-05, - "loss": 0.0603, - "step": 33185 - }, - { - "epoch": 0.8424927021195583, - "grad_norm": 0.4562963545322418, - "learning_rate": 1.4383381985869613e-05, - "loss": 0.0683, - "step": 33190 - }, - { - "epoch": 0.8426196217794136, - "grad_norm": 0.5844389796257019, - "learning_rate": 1.438253585480391e-05, - "loss": 0.0544, - "step": 33195 - }, - { - "epoch": 0.842746541439269, - "grad_norm": 0.613857626914978, - "learning_rate": 1.4381689723738208e-05, - "loss": 0.0524, - "step": 33200 - }, - { - "epoch": 0.8428734610991242, - "grad_norm": 0.4180372655391693, - "learning_rate": 1.4380843592672506e-05, - "loss": 0.077, - "step": 33205 - }, - { - "epoch": 0.8430003807589795, - "grad_norm": 0.3582751154899597, - "learning_rate": 1.4379997461606805e-05, - "loss": 0.0776, - "step": 33210 - }, - { - "epoch": 0.8431273004188349, - "grad_norm": 0.4815937578678131, - "learning_rate": 1.4379151330541101e-05, - "loss": 0.0817, - "step": 33215 - }, - { - "epoch": 0.8432542200786902, - "grad_norm": 0.6215910315513611, - "learning_rate": 1.43783051994754e-05, - "loss": 0.0693, - "step": 33220 - }, - { - "epoch": 0.8433811397385454, - "grad_norm": 0.6343068480491638, - "learning_rate": 1.4377459068409698e-05, - "loss": 0.082, - "step": 33225 - }, - { - "epoch": 0.8435080593984008, - "grad_norm": 0.9241308569908142, - "learning_rate": 1.4376612937343997e-05, - "loss": 0.0588, - "step": 33230 - }, - { - "epoch": 0.8436349790582561, - "grad_norm": 0.46491649746894836, - "learning_rate": 1.4375766806278293e-05, - "loss": 0.0541, - "step": 33235 - }, - { - "epoch": 0.8437618987181115, - "grad_norm": 0.5068700313568115, - "learning_rate": 1.4374920675212592e-05, - "loss": 0.0691, - "step": 33240 - }, - { - "epoch": 0.8438888183779667, - "grad_norm": 0.34579646587371826, - "learning_rate": 1.437407454414689e-05, - "loss": 0.0591, - "step": 33245 - }, - { - "epoch": 0.844015738037822, - "grad_norm": 0.5721991658210754, - "learning_rate": 1.4373228413081188e-05, - "loss": 0.0647, - "step": 33250 - }, - { - "epoch": 0.8441426576976774, - "grad_norm": 0.8524814248085022, - "learning_rate": 1.4372382282015485e-05, - "loss": 0.0736, - "step": 33255 - }, - { - "epoch": 0.8442695773575327, - "grad_norm": 0.5209138989448547, - "learning_rate": 1.4371536150949784e-05, - "loss": 0.0631, - "step": 33260 - }, - { - "epoch": 0.844396497017388, - "grad_norm": 0.6192706823348999, - "learning_rate": 1.4370690019884082e-05, - "loss": 0.0653, - "step": 33265 - }, - { - "epoch": 0.8445234166772433, - "grad_norm": 0.5619567632675171, - "learning_rate": 1.436984388881838e-05, - "loss": 0.0651, - "step": 33270 - }, - { - "epoch": 0.8446503363370986, - "grad_norm": 0.5826150178909302, - "learning_rate": 1.4368997757752677e-05, - "loss": 0.0734, - "step": 33275 - }, - { - "epoch": 0.844777255996954, - "grad_norm": 0.3955118954181671, - "learning_rate": 1.4368151626686975e-05, - "loss": 0.0718, - "step": 33280 - }, - { - "epoch": 0.8449041756568092, - "grad_norm": 0.8676596283912659, - "learning_rate": 1.4367305495621274e-05, - "loss": 0.0878, - "step": 33285 - }, - { - "epoch": 0.8450310953166645, - "grad_norm": 0.5957885384559631, - "learning_rate": 1.4366459364555572e-05, - "loss": 0.086, - "step": 33290 - }, - { - "epoch": 0.8451580149765199, - "grad_norm": 0.6241479516029358, - "learning_rate": 1.4365613233489867e-05, - "loss": 0.0613, - "step": 33295 - }, - { - "epoch": 0.8452849346363752, - "grad_norm": 0.5145629048347473, - "learning_rate": 1.4364767102424166e-05, - "loss": 0.0817, - "step": 33300 - }, - { - "epoch": 0.8454118542962304, - "grad_norm": 0.4799592196941376, - "learning_rate": 1.4363920971358464e-05, - "loss": 0.0648, - "step": 33305 - }, - { - "epoch": 0.8455387739560858, - "grad_norm": 0.4176102578639984, - "learning_rate": 1.4363074840292763e-05, - "loss": 0.0478, - "step": 33310 - }, - { - "epoch": 0.8456656936159411, - "grad_norm": 0.47523072361946106, - "learning_rate": 1.436222870922706e-05, - "loss": 0.0528, - "step": 33315 - }, - { - "epoch": 0.8457926132757965, - "grad_norm": 0.4700790047645569, - "learning_rate": 1.4361382578161358e-05, - "loss": 0.0567, - "step": 33320 - }, - { - "epoch": 0.8459195329356517, - "grad_norm": 0.8758045434951782, - "learning_rate": 1.4360536447095656e-05, - "loss": 0.0582, - "step": 33325 - }, - { - "epoch": 0.846046452595507, - "grad_norm": 0.5568518042564392, - "learning_rate": 1.4359690316029954e-05, - "loss": 0.0665, - "step": 33330 - }, - { - "epoch": 0.8461733722553624, - "grad_norm": 0.6000694036483765, - "learning_rate": 1.4358844184964251e-05, - "loss": 0.0629, - "step": 33335 - }, - { - "epoch": 0.8463002919152177, - "grad_norm": 0.3391009569168091, - "learning_rate": 1.435799805389855e-05, - "loss": 0.0637, - "step": 33340 - }, - { - "epoch": 0.846427211575073, - "grad_norm": 0.8091613054275513, - "learning_rate": 1.4357151922832848e-05, - "loss": 0.0649, - "step": 33345 - }, - { - "epoch": 0.8465541312349283, - "grad_norm": 0.35350754857063293, - "learning_rate": 1.4356305791767146e-05, - "loss": 0.0693, - "step": 33350 - }, - { - "epoch": 0.8466810508947836, - "grad_norm": 0.5511595606803894, - "learning_rate": 1.4355459660701443e-05, - "loss": 0.0601, - "step": 33355 - }, - { - "epoch": 0.846807970554639, - "grad_norm": 1.1737196445465088, - "learning_rate": 1.4354613529635741e-05, - "loss": 0.0526, - "step": 33360 - }, - { - "epoch": 0.8469348902144942, - "grad_norm": 0.4971906840801239, - "learning_rate": 1.435376739857004e-05, - "loss": 0.0664, - "step": 33365 - }, - { - "epoch": 0.8470618098743495, - "grad_norm": 0.4652421176433563, - "learning_rate": 1.4352921267504338e-05, - "loss": 0.0496, - "step": 33370 - }, - { - "epoch": 0.8471887295342049, - "grad_norm": 0.5811346769332886, - "learning_rate": 1.4352075136438635e-05, - "loss": 0.0741, - "step": 33375 - }, - { - "epoch": 0.8473156491940601, - "grad_norm": 0.721636950969696, - "learning_rate": 1.4351229005372933e-05, - "loss": 0.0701, - "step": 33380 - }, - { - "epoch": 0.8474425688539154, - "grad_norm": 0.6042395234107971, - "learning_rate": 1.4350382874307232e-05, - "loss": 0.0774, - "step": 33385 - }, - { - "epoch": 0.8475694885137708, - "grad_norm": 0.4466267228126526, - "learning_rate": 1.434953674324153e-05, - "loss": 0.0751, - "step": 33390 - }, - { - "epoch": 0.8476964081736261, - "grad_norm": 0.4388009011745453, - "learning_rate": 1.4348690612175827e-05, - "loss": 0.0865, - "step": 33395 - }, - { - "epoch": 0.8478233278334814, - "grad_norm": 0.7197089791297913, - "learning_rate": 1.4347844481110125e-05, - "loss": 0.0633, - "step": 33400 - }, - { - "epoch": 0.8479502474933367, - "grad_norm": 0.3667832612991333, - "learning_rate": 1.4346998350044424e-05, - "loss": 0.0621, - "step": 33405 - }, - { - "epoch": 0.848077167153192, - "grad_norm": 0.559489369392395, - "learning_rate": 1.4346152218978722e-05, - "loss": 0.0703, - "step": 33410 - }, - { - "epoch": 0.8482040868130474, - "grad_norm": 0.3234005272388458, - "learning_rate": 1.4345306087913017e-05, - "loss": 0.0614, - "step": 33415 - }, - { - "epoch": 0.8483310064729026, - "grad_norm": 0.5980404615402222, - "learning_rate": 1.4344459956847317e-05, - "loss": 0.0551, - "step": 33420 - }, - { - "epoch": 0.848457926132758, - "grad_norm": 0.6278416514396667, - "learning_rate": 1.4343613825781616e-05, - "loss": 0.0649, - "step": 33425 - }, - { - "epoch": 0.8485848457926133, - "grad_norm": 0.6509944200515747, - "learning_rate": 1.4342767694715914e-05, - "loss": 0.0879, - "step": 33430 - }, - { - "epoch": 0.8487117654524686, - "grad_norm": 0.5429641604423523, - "learning_rate": 1.4341921563650209e-05, - "loss": 0.0635, - "step": 33435 - }, - { - "epoch": 0.8488386851123239, - "grad_norm": 0.3921922445297241, - "learning_rate": 1.4341075432584507e-05, - "loss": 0.074, - "step": 33440 - }, - { - "epoch": 0.8489656047721792, - "grad_norm": 0.46791186928749084, - "learning_rate": 1.4340229301518806e-05, - "loss": 0.0512, - "step": 33445 - }, - { - "epoch": 0.8490925244320345, - "grad_norm": 0.7740024328231812, - "learning_rate": 1.4339383170453104e-05, - "loss": 0.0611, - "step": 33450 - }, - { - "epoch": 0.8492194440918899, - "grad_norm": 0.466043621301651, - "learning_rate": 1.4338537039387401e-05, - "loss": 0.0595, - "step": 33455 - }, - { - "epoch": 0.8493463637517451, - "grad_norm": 0.6305354833602905, - "learning_rate": 1.43376909083217e-05, - "loss": 0.1057, - "step": 33460 - }, - { - "epoch": 0.8494732834116004, - "grad_norm": 0.7660009860992432, - "learning_rate": 1.4336844777255998e-05, - "loss": 0.0611, - "step": 33465 - }, - { - "epoch": 0.8496002030714558, - "grad_norm": 0.5465408563613892, - "learning_rate": 1.4335998646190296e-05, - "loss": 0.0572, - "step": 33470 - }, - { - "epoch": 0.8497271227313111, - "grad_norm": 1.067832589149475, - "learning_rate": 1.4335152515124593e-05, - "loss": 0.0818, - "step": 33475 - }, - { - "epoch": 0.8498540423911664, - "grad_norm": 0.4152190089225769, - "learning_rate": 1.4334306384058891e-05, - "loss": 0.084, - "step": 33480 - }, - { - "epoch": 0.8499809620510217, - "grad_norm": 0.42464110255241394, - "learning_rate": 1.433346025299319e-05, - "loss": 0.0509, - "step": 33485 - }, - { - "epoch": 0.850107881710877, - "grad_norm": 0.6546791195869446, - "learning_rate": 1.4332614121927488e-05, - "loss": 0.0567, - "step": 33490 - }, - { - "epoch": 0.8502348013707324, - "grad_norm": 0.504569947719574, - "learning_rate": 1.4331767990861785e-05, - "loss": 0.06, - "step": 33495 - }, - { - "epoch": 0.8503617210305876, - "grad_norm": 0.4814865291118622, - "learning_rate": 1.4330921859796083e-05, - "loss": 0.074, - "step": 33500 - }, - { - "epoch": 0.8504886406904429, - "grad_norm": 0.44802454113960266, - "learning_rate": 1.4330075728730382e-05, - "loss": 0.0548, - "step": 33505 - }, - { - "epoch": 0.8506155603502983, - "grad_norm": 0.3808616101741791, - "learning_rate": 1.432922959766468e-05, - "loss": 0.054, - "step": 33510 - }, - { - "epoch": 0.8507424800101536, - "grad_norm": 0.6407504081726074, - "learning_rate": 1.4328383466598977e-05, - "loss": 0.0671, - "step": 33515 - }, - { - "epoch": 0.8508693996700089, - "grad_norm": 0.5112860798835754, - "learning_rate": 1.4327537335533275e-05, - "loss": 0.0673, - "step": 33520 - }, - { - "epoch": 0.8509963193298642, - "grad_norm": 0.3260871171951294, - "learning_rate": 1.4326691204467573e-05, - "loss": 0.0744, - "step": 33525 - }, - { - "epoch": 0.8511232389897195, - "grad_norm": 0.6209508776664734, - "learning_rate": 1.4325845073401872e-05, - "loss": 0.0751, - "step": 33530 - }, - { - "epoch": 0.8512501586495749, - "grad_norm": 0.3621441125869751, - "learning_rate": 1.4324998942336169e-05, - "loss": 0.0555, - "step": 33535 - }, - { - "epoch": 0.8513770783094301, - "grad_norm": 0.4430544376373291, - "learning_rate": 1.4324152811270467e-05, - "loss": 0.0408, - "step": 33540 - }, - { - "epoch": 0.8515039979692854, - "grad_norm": 1.0470702648162842, - "learning_rate": 1.4323306680204765e-05, - "loss": 0.0597, - "step": 33545 - }, - { - "epoch": 0.8516309176291408, - "grad_norm": 0.9249410629272461, - "learning_rate": 1.4322460549139064e-05, - "loss": 0.0857, - "step": 33550 - }, - { - "epoch": 0.851757837288996, - "grad_norm": 0.5583792924880981, - "learning_rate": 1.4321614418073362e-05, - "loss": 0.0698, - "step": 33555 - }, - { - "epoch": 0.8518847569488514, - "grad_norm": 0.5154417753219604, - "learning_rate": 1.4320768287007659e-05, - "loss": 0.0788, - "step": 33560 - }, - { - "epoch": 0.8520116766087067, - "grad_norm": 0.5030690431594849, - "learning_rate": 1.4319922155941957e-05, - "loss": 0.0667, - "step": 33565 - }, - { - "epoch": 0.852138596268562, - "grad_norm": 0.4627953767776489, - "learning_rate": 1.4319076024876256e-05, - "loss": 0.0762, - "step": 33570 - }, - { - "epoch": 0.8522655159284173, - "grad_norm": 0.6261065006256104, - "learning_rate": 1.4318229893810554e-05, - "loss": 0.0828, - "step": 33575 - }, - { - "epoch": 0.8523924355882726, - "grad_norm": 0.49347883462905884, - "learning_rate": 1.4317383762744849e-05, - "loss": 0.0665, - "step": 33580 - }, - { - "epoch": 0.8525193552481279, - "grad_norm": 0.4245373010635376, - "learning_rate": 1.4316537631679148e-05, - "loss": 0.0646, - "step": 33585 - }, - { - "epoch": 0.8526462749079833, - "grad_norm": 0.4993675649166107, - "learning_rate": 1.4315691500613446e-05, - "loss": 0.0617, - "step": 33590 - }, - { - "epoch": 0.8527731945678385, - "grad_norm": 1.14876127243042, - "learning_rate": 1.4314845369547746e-05, - "loss": 0.0821, - "step": 33595 - }, - { - "epoch": 0.8529001142276939, - "grad_norm": 0.6893752217292786, - "learning_rate": 1.4313999238482041e-05, - "loss": 0.0674, - "step": 33600 - }, - { - "epoch": 0.8530270338875492, - "grad_norm": 0.5800114870071411, - "learning_rate": 1.431315310741634e-05, - "loss": 0.0688, - "step": 33605 - }, - { - "epoch": 0.8531539535474045, - "grad_norm": 0.9347243905067444, - "learning_rate": 1.4312306976350638e-05, - "loss": 0.0602, - "step": 33610 - }, - { - "epoch": 0.8532808732072598, - "grad_norm": 0.4610166847705841, - "learning_rate": 1.4311460845284936e-05, - "loss": 0.0534, - "step": 33615 - }, - { - "epoch": 0.8534077928671151, - "grad_norm": 0.6711344718933105, - "learning_rate": 1.4310614714219233e-05, - "loss": 0.0715, - "step": 33620 - }, - { - "epoch": 0.8535347125269704, - "grad_norm": 0.6091018915176392, - "learning_rate": 1.4309768583153531e-05, - "loss": 0.0558, - "step": 33625 - }, - { - "epoch": 0.8536616321868258, - "grad_norm": 1.1320406198501587, - "learning_rate": 1.430892245208783e-05, - "loss": 0.0755, - "step": 33630 - }, - { - "epoch": 0.853788551846681, - "grad_norm": 0.5309237241744995, - "learning_rate": 1.4308076321022128e-05, - "loss": 0.0617, - "step": 33635 - }, - { - "epoch": 0.8539154715065363, - "grad_norm": 1.0032464265823364, - "learning_rate": 1.4307230189956425e-05, - "loss": 0.052, - "step": 33640 - }, - { - "epoch": 0.8540423911663917, - "grad_norm": 0.5201375484466553, - "learning_rate": 1.4306384058890723e-05, - "loss": 0.0695, - "step": 33645 - }, - { - "epoch": 0.854169310826247, - "grad_norm": 0.6913803815841675, - "learning_rate": 1.4305537927825022e-05, - "loss": 0.057, - "step": 33650 - }, - { - "epoch": 0.8542962304861023, - "grad_norm": 0.6010254621505737, - "learning_rate": 1.430469179675932e-05, - "loss": 0.0736, - "step": 33655 - }, - { - "epoch": 0.8544231501459576, - "grad_norm": 1.6657541990280151, - "learning_rate": 1.4303845665693617e-05, - "loss": 0.0803, - "step": 33660 - }, - { - "epoch": 0.8545500698058129, - "grad_norm": 0.5984703302383423, - "learning_rate": 1.4302999534627915e-05, - "loss": 0.0613, - "step": 33665 - }, - { - "epoch": 0.8546769894656683, - "grad_norm": 0.4542136788368225, - "learning_rate": 1.4302153403562214e-05, - "loss": 0.0705, - "step": 33670 - }, - { - "epoch": 0.8548039091255235, - "grad_norm": 0.352664977312088, - "learning_rate": 1.4301307272496512e-05, - "loss": 0.08, - "step": 33675 - }, - { - "epoch": 0.8549308287853788, - "grad_norm": 0.710128903388977, - "learning_rate": 1.4300461141430809e-05, - "loss": 0.0683, - "step": 33680 - }, - { - "epoch": 0.8550577484452342, - "grad_norm": 0.41247090697288513, - "learning_rate": 1.4299615010365107e-05, - "loss": 0.0827, - "step": 33685 - }, - { - "epoch": 0.8551846681050895, - "grad_norm": 0.4141879975795746, - "learning_rate": 1.4298768879299405e-05, - "loss": 0.0588, - "step": 33690 - }, - { - "epoch": 0.8553115877649448, - "grad_norm": 0.4756919741630554, - "learning_rate": 1.4297922748233704e-05, - "loss": 0.0543, - "step": 33695 - }, - { - "epoch": 0.8554385074248001, - "grad_norm": 0.6792693734169006, - "learning_rate": 1.4297076617168e-05, - "loss": 0.0619, - "step": 33700 - }, - { - "epoch": 0.8555654270846554, - "grad_norm": 0.5345882177352905, - "learning_rate": 1.4296230486102299e-05, - "loss": 0.0692, - "step": 33705 - }, - { - "epoch": 0.8556923467445108, - "grad_norm": 0.48848217725753784, - "learning_rate": 1.4295384355036597e-05, - "loss": 0.0685, - "step": 33710 - }, - { - "epoch": 0.855819266404366, - "grad_norm": 0.44496020674705505, - "learning_rate": 1.4294538223970896e-05, - "loss": 0.0539, - "step": 33715 - }, - { - "epoch": 0.8559461860642213, - "grad_norm": 0.5546552538871765, - "learning_rate": 1.429369209290519e-05, - "loss": 0.0622, - "step": 33720 - }, - { - "epoch": 0.8560731057240767, - "grad_norm": 0.6566063165664673, - "learning_rate": 1.429284596183949e-05, - "loss": 0.0779, - "step": 33725 - }, - { - "epoch": 0.8562000253839319, - "grad_norm": 0.393198162317276, - "learning_rate": 1.4291999830773788e-05, - "loss": 0.075, - "step": 33730 - }, - { - "epoch": 0.8563269450437873, - "grad_norm": 0.5352087616920471, - "learning_rate": 1.4291153699708086e-05, - "loss": 0.0748, - "step": 33735 - }, - { - "epoch": 0.8564538647036426, - "grad_norm": 0.4150392413139343, - "learning_rate": 1.4290307568642383e-05, - "loss": 0.0622, - "step": 33740 - }, - { - "epoch": 0.8565807843634979, - "grad_norm": 0.4499109089374542, - "learning_rate": 1.4289461437576681e-05, - "loss": 0.0541, - "step": 33745 - }, - { - "epoch": 0.8567077040233532, - "grad_norm": 0.5329458713531494, - "learning_rate": 1.428861530651098e-05, - "loss": 0.0532, - "step": 33750 - }, - { - "epoch": 0.8568346236832085, - "grad_norm": 0.5125871896743774, - "learning_rate": 1.4287769175445278e-05, - "loss": 0.059, - "step": 33755 - }, - { - "epoch": 0.8569615433430638, - "grad_norm": 0.6420223712921143, - "learning_rate": 1.4286923044379575e-05, - "loss": 0.0633, - "step": 33760 - }, - { - "epoch": 0.8570884630029192, - "grad_norm": 0.7777812480926514, - "learning_rate": 1.4286076913313873e-05, - "loss": 0.0537, - "step": 33765 - }, - { - "epoch": 0.8572153826627744, - "grad_norm": 0.6236174702644348, - "learning_rate": 1.4285230782248171e-05, - "loss": 0.0698, - "step": 33770 - }, - { - "epoch": 0.8573423023226298, - "grad_norm": 0.6271987557411194, - "learning_rate": 1.428438465118247e-05, - "loss": 0.0745, - "step": 33775 - }, - { - "epoch": 0.8574692219824851, - "grad_norm": 0.6442590951919556, - "learning_rate": 1.4283538520116767e-05, - "loss": 0.0732, - "step": 33780 - }, - { - "epoch": 0.8575961416423404, - "grad_norm": 0.9784843325614929, - "learning_rate": 1.4282692389051065e-05, - "loss": 0.0515, - "step": 33785 - }, - { - "epoch": 0.8577230613021957, - "grad_norm": 0.4158329665660858, - "learning_rate": 1.4281846257985363e-05, - "loss": 0.0496, - "step": 33790 - }, - { - "epoch": 0.857849980962051, - "grad_norm": 0.6293787956237793, - "learning_rate": 1.4281000126919662e-05, - "loss": 0.0528, - "step": 33795 - }, - { - "epoch": 0.8579769006219063, - "grad_norm": 0.39866453409194946, - "learning_rate": 1.4280153995853958e-05, - "loss": 0.0545, - "step": 33800 - }, - { - "epoch": 0.8581038202817617, - "grad_norm": 0.5742182731628418, - "learning_rate": 1.4279307864788257e-05, - "loss": 0.0669, - "step": 33805 - }, - { - "epoch": 0.8582307399416169, - "grad_norm": 0.6003056168556213, - "learning_rate": 1.4278461733722555e-05, - "loss": 0.0648, - "step": 33810 - }, - { - "epoch": 0.8583576596014723, - "grad_norm": 0.5700626969337463, - "learning_rate": 1.4277615602656854e-05, - "loss": 0.0622, - "step": 33815 - }, - { - "epoch": 0.8584845792613276, - "grad_norm": 1.0343085527420044, - "learning_rate": 1.427676947159115e-05, - "loss": 0.0627, - "step": 33820 - }, - { - "epoch": 0.8586114989211829, - "grad_norm": 0.4751814603805542, - "learning_rate": 1.4275923340525449e-05, - "loss": 0.0731, - "step": 33825 - }, - { - "epoch": 0.8587384185810382, - "grad_norm": 0.36247438192367554, - "learning_rate": 1.4275077209459747e-05, - "loss": 0.087, - "step": 33830 - }, - { - "epoch": 0.8588653382408935, - "grad_norm": 0.8372130393981934, - "learning_rate": 1.4274231078394046e-05, - "loss": 0.0604, - "step": 33835 - }, - { - "epoch": 0.8589922579007488, - "grad_norm": 0.6590267419815063, - "learning_rate": 1.4273384947328342e-05, - "loss": 0.0687, - "step": 33840 - }, - { - "epoch": 0.8591191775606042, - "grad_norm": 0.6623085141181946, - "learning_rate": 1.427253881626264e-05, - "loss": 0.0543, - "step": 33845 - }, - { - "epoch": 0.8592460972204594, - "grad_norm": 0.4704403579235077, - "learning_rate": 1.4271692685196939e-05, - "loss": 0.0574, - "step": 33850 - }, - { - "epoch": 0.8593730168803148, - "grad_norm": 0.5843262672424316, - "learning_rate": 1.4270846554131237e-05, - "loss": 0.0686, - "step": 33855 - }, - { - "epoch": 0.8594999365401701, - "grad_norm": 0.6860697269439697, - "learning_rate": 1.4270000423065533e-05, - "loss": 0.0759, - "step": 33860 - }, - { - "epoch": 0.8596268562000254, - "grad_norm": 0.4632835388183594, - "learning_rate": 1.4269154291999831e-05, - "loss": 0.059, - "step": 33865 - }, - { - "epoch": 0.8597537758598807, - "grad_norm": 0.3741845488548279, - "learning_rate": 1.426830816093413e-05, - "loss": 0.0803, - "step": 33870 - }, - { - "epoch": 0.859880695519736, - "grad_norm": 0.6014963388442993, - "learning_rate": 1.4267462029868428e-05, - "loss": 0.0802, - "step": 33875 - }, - { - "epoch": 0.8600076151795913, - "grad_norm": 0.5591068267822266, - "learning_rate": 1.4266615898802724e-05, - "loss": 0.0671, - "step": 33880 - }, - { - "epoch": 0.8601345348394467, - "grad_norm": 0.3686566948890686, - "learning_rate": 1.4265769767737023e-05, - "loss": 0.0646, - "step": 33885 - }, - { - "epoch": 0.8602614544993019, - "grad_norm": 0.5514770746231079, - "learning_rate": 1.4264923636671321e-05, - "loss": 0.0529, - "step": 33890 - }, - { - "epoch": 0.8603883741591573, - "grad_norm": 0.5628662109375, - "learning_rate": 1.426407750560562e-05, - "loss": 0.0658, - "step": 33895 - }, - { - "epoch": 0.8605152938190126, - "grad_norm": 0.40834084153175354, - "learning_rate": 1.4263231374539916e-05, - "loss": 0.0689, - "step": 33900 - }, - { - "epoch": 0.8606422134788678, - "grad_norm": 0.5730663537979126, - "learning_rate": 1.4262385243474215e-05, - "loss": 0.0614, - "step": 33905 - }, - { - "epoch": 0.8607691331387232, - "grad_norm": 0.5943712592124939, - "learning_rate": 1.4261539112408513e-05, - "loss": 0.0584, - "step": 33910 - }, - { - "epoch": 0.8608960527985785, - "grad_norm": 0.5139994025230408, - "learning_rate": 1.4260692981342812e-05, - "loss": 0.0681, - "step": 33915 - }, - { - "epoch": 0.8610229724584338, - "grad_norm": 0.7057727575302124, - "learning_rate": 1.4259846850277108e-05, - "loss": 0.0654, - "step": 33920 - }, - { - "epoch": 0.8611498921182891, - "grad_norm": 0.6305063366889954, - "learning_rate": 1.4259000719211407e-05, - "loss": 0.1026, - "step": 33925 - }, - { - "epoch": 0.8612768117781444, - "grad_norm": 0.5597143769264221, - "learning_rate": 1.4258154588145705e-05, - "loss": 0.0664, - "step": 33930 - }, - { - "epoch": 0.8614037314379998, - "grad_norm": 0.46052107214927673, - "learning_rate": 1.4257308457080003e-05, - "loss": 0.0651, - "step": 33935 - }, - { - "epoch": 0.8615306510978551, - "grad_norm": 0.5584263801574707, - "learning_rate": 1.42564623260143e-05, - "loss": 0.0679, - "step": 33940 - }, - { - "epoch": 0.8616575707577103, - "grad_norm": 0.4805888831615448, - "learning_rate": 1.4255616194948599e-05, - "loss": 0.0591, - "step": 33945 - }, - { - "epoch": 0.8617844904175657, - "grad_norm": 0.5662950873374939, - "learning_rate": 1.4254770063882897e-05, - "loss": 0.0604, - "step": 33950 - }, - { - "epoch": 0.861911410077421, - "grad_norm": 0.37461137771606445, - "learning_rate": 1.4253923932817195e-05, - "loss": 0.0586, - "step": 33955 - }, - { - "epoch": 0.8620383297372763, - "grad_norm": 0.6208978295326233, - "learning_rate": 1.4253077801751492e-05, - "loss": 0.0637, - "step": 33960 - }, - { - "epoch": 0.8621652493971316, - "grad_norm": 0.6009203195571899, - "learning_rate": 1.425223167068579e-05, - "loss": 0.0864, - "step": 33965 - }, - { - "epoch": 0.8622921690569869, - "grad_norm": 0.5648624897003174, - "learning_rate": 1.4251385539620089e-05, - "loss": 0.0549, - "step": 33970 - }, - { - "epoch": 0.8624190887168423, - "grad_norm": 1.3415404558181763, - "learning_rate": 1.4250539408554387e-05, - "loss": 0.0829, - "step": 33975 - }, - { - "epoch": 0.8625460083766976, - "grad_norm": 0.8384197354316711, - "learning_rate": 1.4249693277488682e-05, - "loss": 0.0759, - "step": 33980 - }, - { - "epoch": 0.8626729280365528, - "grad_norm": 0.5298097729682922, - "learning_rate": 1.4248847146422982e-05, - "loss": 0.0704, - "step": 33985 - }, - { - "epoch": 0.8627998476964082, - "grad_norm": 0.48840299248695374, - "learning_rate": 1.424800101535728e-05, - "loss": 0.0707, - "step": 33990 - }, - { - "epoch": 0.8629267673562635, - "grad_norm": 1.2078357934951782, - "learning_rate": 1.424715488429158e-05, - "loss": 0.0805, - "step": 33995 - }, - { - "epoch": 0.8630536870161188, - "grad_norm": 0.527970552444458, - "learning_rate": 1.4246308753225874e-05, - "loss": 0.0747, - "step": 34000 - }, - { - "epoch": 0.8631806066759741, - "grad_norm": 0.4572445750236511, - "learning_rate": 1.4245462622160173e-05, - "loss": 0.0818, - "step": 34005 - }, - { - "epoch": 0.8633075263358294, - "grad_norm": 0.471896767616272, - "learning_rate": 1.4244616491094471e-05, - "loss": 0.0674, - "step": 34010 - }, - { - "epoch": 0.8634344459956848, - "grad_norm": 0.3998333513736725, - "learning_rate": 1.424377036002877e-05, - "loss": 0.0524, - "step": 34015 - }, - { - "epoch": 0.8635613656555401, - "grad_norm": 0.4831821620464325, - "learning_rate": 1.4242924228963066e-05, - "loss": 0.0727, - "step": 34020 - }, - { - "epoch": 0.8636882853153953, - "grad_norm": 0.5583149790763855, - "learning_rate": 1.4242078097897365e-05, - "loss": 0.0818, - "step": 34025 - }, - { - "epoch": 0.8638152049752507, - "grad_norm": 0.5018155574798584, - "learning_rate": 1.4241231966831663e-05, - "loss": 0.0825, - "step": 34030 - }, - { - "epoch": 0.863942124635106, - "grad_norm": 0.5302048921585083, - "learning_rate": 1.4240385835765961e-05, - "loss": 0.0679, - "step": 34035 - }, - { - "epoch": 0.8640690442949613, - "grad_norm": 0.3279462158679962, - "learning_rate": 1.4239539704700258e-05, - "loss": 0.053, - "step": 34040 - }, - { - "epoch": 0.8641959639548166, - "grad_norm": 0.5372982621192932, - "learning_rate": 1.4238693573634556e-05, - "loss": 0.0578, - "step": 34045 - }, - { - "epoch": 0.8643228836146719, - "grad_norm": 0.7856726050376892, - "learning_rate": 1.4237847442568855e-05, - "loss": 0.0688, - "step": 34050 - }, - { - "epoch": 0.8644498032745273, - "grad_norm": 0.21966806054115295, - "learning_rate": 1.4237001311503153e-05, - "loss": 0.0577, - "step": 34055 - }, - { - "epoch": 0.8645767229343826, - "grad_norm": 0.44183146953582764, - "learning_rate": 1.423615518043745e-05, - "loss": 0.0591, - "step": 34060 - }, - { - "epoch": 0.8647036425942378, - "grad_norm": 0.32172006368637085, - "learning_rate": 1.4235309049371748e-05, - "loss": 0.0598, - "step": 34065 - }, - { - "epoch": 0.8648305622540932, - "grad_norm": 0.7059404253959656, - "learning_rate": 1.4234462918306047e-05, - "loss": 0.0625, - "step": 34070 - }, - { - "epoch": 0.8649574819139485, - "grad_norm": 0.5246771574020386, - "learning_rate": 1.4233616787240345e-05, - "loss": 0.0658, - "step": 34075 - }, - { - "epoch": 0.8650844015738037, - "grad_norm": 0.6231421232223511, - "learning_rate": 1.4232770656174644e-05, - "loss": 0.0552, - "step": 34080 - }, - { - "epoch": 0.8652113212336591, - "grad_norm": 0.9315437078475952, - "learning_rate": 1.423192452510894e-05, - "loss": 0.0694, - "step": 34085 - }, - { - "epoch": 0.8653382408935144, - "grad_norm": 2.4012739658355713, - "learning_rate": 1.4231078394043239e-05, - "loss": 0.0815, - "step": 34090 - }, - { - "epoch": 0.8654651605533698, - "grad_norm": 0.36335912346839905, - "learning_rate": 1.4230232262977537e-05, - "loss": 0.0688, - "step": 34095 - }, - { - "epoch": 0.865592080213225, - "grad_norm": 0.43291446566581726, - "learning_rate": 1.4229386131911835e-05, - "loss": 0.0469, - "step": 34100 - }, - { - "epoch": 0.8657189998730803, - "grad_norm": 0.5335478186607361, - "learning_rate": 1.4228540000846132e-05, - "loss": 0.0639, - "step": 34105 - }, - { - "epoch": 0.8658459195329357, - "grad_norm": 0.5812557339668274, - "learning_rate": 1.422769386978043e-05, - "loss": 0.0574, - "step": 34110 - }, - { - "epoch": 0.865972839192791, - "grad_norm": 0.8672996163368225, - "learning_rate": 1.4226847738714729e-05, - "loss": 0.0859, - "step": 34115 - }, - { - "epoch": 0.8660997588526462, - "grad_norm": 0.44303098320961, - "learning_rate": 1.4226001607649027e-05, - "loss": 0.0623, - "step": 34120 - }, - { - "epoch": 0.8662266785125016, - "grad_norm": 0.7640902400016785, - "learning_rate": 1.4225155476583324e-05, - "loss": 0.0663, - "step": 34125 - }, - { - "epoch": 0.8663535981723569, - "grad_norm": 0.6262351870536804, - "learning_rate": 1.4224309345517623e-05, - "loss": 0.0747, - "step": 34130 - }, - { - "epoch": 0.8664805178322122, - "grad_norm": 0.6380113363265991, - "learning_rate": 1.4223463214451921e-05, - "loss": 0.0824, - "step": 34135 - }, - { - "epoch": 0.8666074374920675, - "grad_norm": 0.6418672800064087, - "learning_rate": 1.422261708338622e-05, - "loss": 0.0596, - "step": 34140 - }, - { - "epoch": 0.8667343571519228, - "grad_norm": 0.8029141426086426, - "learning_rate": 1.4221770952320514e-05, - "loss": 0.0583, - "step": 34145 - }, - { - "epoch": 0.8668612768117782, - "grad_norm": 0.43680763244628906, - "learning_rate": 1.4220924821254813e-05, - "loss": 0.0542, - "step": 34150 - }, - { - "epoch": 0.8669881964716335, - "grad_norm": 0.4794274568557739, - "learning_rate": 1.4220078690189111e-05, - "loss": 0.0535, - "step": 34155 - }, - { - "epoch": 0.8671151161314887, - "grad_norm": 0.6763949990272522, - "learning_rate": 1.4219232559123411e-05, - "loss": 0.0617, - "step": 34160 - }, - { - "epoch": 0.8672420357913441, - "grad_norm": 0.7907184362411499, - "learning_rate": 1.4218386428057706e-05, - "loss": 0.0799, - "step": 34165 - }, - { - "epoch": 0.8673689554511994, - "grad_norm": 0.45001673698425293, - "learning_rate": 1.4217540296992005e-05, - "loss": 0.0476, - "step": 34170 - }, - { - "epoch": 0.8674958751110547, - "grad_norm": 0.3348437547683716, - "learning_rate": 1.4216694165926303e-05, - "loss": 0.0518, - "step": 34175 - }, - { - "epoch": 0.86762279477091, - "grad_norm": 0.47669848799705505, - "learning_rate": 1.4215848034860601e-05, - "loss": 0.0715, - "step": 34180 - }, - { - "epoch": 0.8677497144307653, - "grad_norm": 0.8457666039466858, - "learning_rate": 1.4215001903794898e-05, - "loss": 0.0667, - "step": 34185 - }, - { - "epoch": 0.8678766340906207, - "grad_norm": 0.5793268084526062, - "learning_rate": 1.4214155772729197e-05, - "loss": 0.067, - "step": 34190 - }, - { - "epoch": 0.868003553750476, - "grad_norm": 1.2097976207733154, - "learning_rate": 1.4213309641663495e-05, - "loss": 0.0646, - "step": 34195 - }, - { - "epoch": 0.8681304734103312, - "grad_norm": 0.598157525062561, - "learning_rate": 1.4212463510597793e-05, - "loss": 0.1011, - "step": 34200 - }, - { - "epoch": 0.8682573930701866, - "grad_norm": 0.47376927733421326, - "learning_rate": 1.421161737953209e-05, - "loss": 0.0486, - "step": 34205 - }, - { - "epoch": 0.8683843127300419, - "grad_norm": 0.7059450745582581, - "learning_rate": 1.4210771248466388e-05, - "loss": 0.0622, - "step": 34210 - }, - { - "epoch": 0.8685112323898972, - "grad_norm": 0.39131829142570496, - "learning_rate": 1.4209925117400687e-05, - "loss": 0.0493, - "step": 34215 - }, - { - "epoch": 0.8686381520497525, - "grad_norm": 0.38944536447525024, - "learning_rate": 1.4209078986334985e-05, - "loss": 0.0468, - "step": 34220 - }, - { - "epoch": 0.8687650717096078, - "grad_norm": 0.5393641591072083, - "learning_rate": 1.4208232855269282e-05, - "loss": 0.0523, - "step": 34225 - }, - { - "epoch": 0.8688919913694632, - "grad_norm": 0.5811500549316406, - "learning_rate": 1.420738672420358e-05, - "loss": 0.0619, - "step": 34230 - }, - { - "epoch": 0.8690189110293184, - "grad_norm": 0.6526428461074829, - "learning_rate": 1.4206540593137879e-05, - "loss": 0.053, - "step": 34235 - }, - { - "epoch": 0.8691458306891737, - "grad_norm": 0.5227010250091553, - "learning_rate": 1.4205694462072177e-05, - "loss": 0.0633, - "step": 34240 - }, - { - "epoch": 0.8692727503490291, - "grad_norm": 1.073530673980713, - "learning_rate": 1.4204848331006474e-05, - "loss": 0.0598, - "step": 34245 - }, - { - "epoch": 0.8693996700088844, - "grad_norm": 0.346123605966568, - "learning_rate": 1.4204002199940772e-05, - "loss": 0.0836, - "step": 34250 - }, - { - "epoch": 0.8695265896687396, - "grad_norm": 0.3563013970851898, - "learning_rate": 1.420315606887507e-05, - "loss": 0.0588, - "step": 34255 - }, - { - "epoch": 0.869653509328595, - "grad_norm": 0.5525762438774109, - "learning_rate": 1.4202309937809369e-05, - "loss": 0.0665, - "step": 34260 - }, - { - "epoch": 0.8697804289884503, - "grad_norm": 0.4620616137981415, - "learning_rate": 1.4201463806743666e-05, - "loss": 0.0683, - "step": 34265 - }, - { - "epoch": 0.8699073486483057, - "grad_norm": 1.3019444942474365, - "learning_rate": 1.4200617675677964e-05, - "loss": 0.0831, - "step": 34270 - }, - { - "epoch": 0.8700342683081609, - "grad_norm": 0.3872714936733246, - "learning_rate": 1.4199771544612263e-05, - "loss": 0.0675, - "step": 34275 - }, - { - "epoch": 0.8701611879680162, - "grad_norm": 0.4951944351196289, - "learning_rate": 1.4198925413546561e-05, - "loss": 0.0633, - "step": 34280 - }, - { - "epoch": 0.8702881076278716, - "grad_norm": 1.3197946548461914, - "learning_rate": 1.4198079282480856e-05, - "loss": 0.0614, - "step": 34285 - }, - { - "epoch": 0.8704150272877269, - "grad_norm": 0.49005040526390076, - "learning_rate": 1.4197233151415154e-05, - "loss": 0.0686, - "step": 34290 - }, - { - "epoch": 0.8705419469475821, - "grad_norm": 0.48620516061782837, - "learning_rate": 1.4196387020349453e-05, - "loss": 0.0727, - "step": 34295 - }, - { - "epoch": 0.8706688666074375, - "grad_norm": 0.5922199487686157, - "learning_rate": 1.4195540889283751e-05, - "loss": 0.0723, - "step": 34300 - }, - { - "epoch": 0.8707957862672928, - "grad_norm": 0.4363718330860138, - "learning_rate": 1.4194694758218048e-05, - "loss": 0.0608, - "step": 34305 - }, - { - "epoch": 0.8709227059271482, - "grad_norm": 0.4513043165206909, - "learning_rate": 1.4193848627152346e-05, - "loss": 0.0697, - "step": 34310 - }, - { - "epoch": 0.8710496255870034, - "grad_norm": 0.5635522603988647, - "learning_rate": 1.4193002496086645e-05, - "loss": 0.0529, - "step": 34315 - }, - { - "epoch": 0.8711765452468587, - "grad_norm": 0.3803008794784546, - "learning_rate": 1.4192156365020943e-05, - "loss": 0.0688, - "step": 34320 - }, - { - "epoch": 0.8713034649067141, - "grad_norm": 0.6313158273696899, - "learning_rate": 1.419131023395524e-05, - "loss": 0.0714, - "step": 34325 - }, - { - "epoch": 0.8714303845665694, - "grad_norm": 0.5150982737541199, - "learning_rate": 1.4190464102889538e-05, - "loss": 0.0986, - "step": 34330 - }, - { - "epoch": 0.8715573042264246, - "grad_norm": 0.29522109031677246, - "learning_rate": 1.4189617971823837e-05, - "loss": 0.0464, - "step": 34335 - }, - { - "epoch": 0.87168422388628, - "grad_norm": 0.43527230620384216, - "learning_rate": 1.4188771840758135e-05, - "loss": 0.0717, - "step": 34340 - }, - { - "epoch": 0.8718111435461353, - "grad_norm": 1.012359619140625, - "learning_rate": 1.4187925709692432e-05, - "loss": 0.0675, - "step": 34345 - }, - { - "epoch": 0.8719380632059907, - "grad_norm": 0.4626675248146057, - "learning_rate": 1.418707957862673e-05, - "loss": 0.071, - "step": 34350 - }, - { - "epoch": 0.8720649828658459, - "grad_norm": 0.4133753478527069, - "learning_rate": 1.4186233447561029e-05, - "loss": 0.0754, - "step": 34355 - }, - { - "epoch": 0.8721919025257012, - "grad_norm": 0.5713927149772644, - "learning_rate": 1.4185387316495327e-05, - "loss": 0.0552, - "step": 34360 - }, - { - "epoch": 0.8723188221855566, - "grad_norm": 0.34447526931762695, - "learning_rate": 1.4184541185429624e-05, - "loss": 0.0714, - "step": 34365 - }, - { - "epoch": 0.8724457418454119, - "grad_norm": 0.5867732167243958, - "learning_rate": 1.4183695054363922e-05, - "loss": 0.0715, - "step": 34370 - }, - { - "epoch": 0.8725726615052671, - "grad_norm": 0.7683963179588318, - "learning_rate": 1.418284892329822e-05, - "loss": 0.0525, - "step": 34375 - }, - { - "epoch": 0.8726995811651225, - "grad_norm": 0.5919082164764404, - "learning_rate": 1.4182002792232519e-05, - "loss": 0.0741, - "step": 34380 - }, - { - "epoch": 0.8728265008249778, - "grad_norm": 0.8089935183525085, - "learning_rate": 1.4181156661166816e-05, - "loss": 0.0795, - "step": 34385 - }, - { - "epoch": 0.8729534204848332, - "grad_norm": 0.6119821071624756, - "learning_rate": 1.4180310530101114e-05, - "loss": 0.0714, - "step": 34390 - }, - { - "epoch": 0.8730803401446884, - "grad_norm": 0.5525721311569214, - "learning_rate": 1.4179464399035412e-05, - "loss": 0.0715, - "step": 34395 - }, - { - "epoch": 0.8732072598045437, - "grad_norm": 2.257988929748535, - "learning_rate": 1.417861826796971e-05, - "loss": 0.0688, - "step": 34400 - }, - { - "epoch": 0.8733341794643991, - "grad_norm": 0.5272205471992493, - "learning_rate": 1.4177772136904008e-05, - "loss": 0.0686, - "step": 34405 - }, - { - "epoch": 0.8734610991242543, - "grad_norm": 0.9987521171569824, - "learning_rate": 1.4176926005838306e-05, - "loss": 0.0571, - "step": 34410 - }, - { - "epoch": 0.8735880187841096, - "grad_norm": 0.6953270435333252, - "learning_rate": 1.4176079874772604e-05, - "loss": 0.0642, - "step": 34415 - }, - { - "epoch": 0.873714938443965, - "grad_norm": 0.44839099049568176, - "learning_rate": 1.4175233743706903e-05, - "loss": 0.0571, - "step": 34420 - }, - { - "epoch": 0.8738418581038203, - "grad_norm": 0.5139065384864807, - "learning_rate": 1.4174387612641198e-05, - "loss": 0.0527, - "step": 34425 - }, - { - "epoch": 0.8739687777636755, - "grad_norm": 0.38527894020080566, - "learning_rate": 1.4173541481575496e-05, - "loss": 0.0625, - "step": 34430 - }, - { - "epoch": 0.8740956974235309, - "grad_norm": 0.4569058418273926, - "learning_rate": 1.4172695350509795e-05, - "loss": 0.0781, - "step": 34435 - }, - { - "epoch": 0.8742226170833862, - "grad_norm": 0.43602320551872253, - "learning_rate": 1.4171849219444093e-05, - "loss": 0.0587, - "step": 34440 - }, - { - "epoch": 0.8743495367432416, - "grad_norm": 0.44184571504592896, - "learning_rate": 1.417100308837839e-05, - "loss": 0.0818, - "step": 34445 - }, - { - "epoch": 0.8744764564030968, - "grad_norm": 1.0298813581466675, - "learning_rate": 1.4170156957312688e-05, - "loss": 0.0569, - "step": 34450 - }, - { - "epoch": 0.8746033760629521, - "grad_norm": 0.5172150731086731, - "learning_rate": 1.4169310826246986e-05, - "loss": 0.0684, - "step": 34455 - }, - { - "epoch": 0.8747302957228075, - "grad_norm": 0.4804682731628418, - "learning_rate": 1.4168464695181285e-05, - "loss": 0.0857, - "step": 34460 - }, - { - "epoch": 0.8748572153826628, - "grad_norm": 0.4934239387512207, - "learning_rate": 1.4167618564115582e-05, - "loss": 0.0513, - "step": 34465 - }, - { - "epoch": 0.874984135042518, - "grad_norm": 0.3297306001186371, - "learning_rate": 1.416677243304988e-05, - "loss": 0.0755, - "step": 34470 - }, - { - "epoch": 0.8751110547023734, - "grad_norm": 0.546271800994873, - "learning_rate": 1.4165926301984178e-05, - "loss": 0.0575, - "step": 34475 - }, - { - "epoch": 0.8752379743622287, - "grad_norm": 0.42478105425834656, - "learning_rate": 1.4165080170918477e-05, - "loss": 0.0555, - "step": 34480 - }, - { - "epoch": 0.8753648940220841, - "grad_norm": 0.43068432807922363, - "learning_rate": 1.4164234039852773e-05, - "loss": 0.087, - "step": 34485 - }, - { - "epoch": 0.8754918136819393, - "grad_norm": 0.6757073998451233, - "learning_rate": 1.4163387908787072e-05, - "loss": 0.0519, - "step": 34490 - }, - { - "epoch": 0.8756187333417946, - "grad_norm": 0.3850046396255493, - "learning_rate": 1.416254177772137e-05, - "loss": 0.0601, - "step": 34495 - }, - { - "epoch": 0.87574565300165, - "grad_norm": 0.6274129748344421, - "learning_rate": 1.4161695646655669e-05, - "loss": 0.046, - "step": 34500 - }, - { - "epoch": 0.8758725726615053, - "grad_norm": 0.47220033407211304, - "learning_rate": 1.4160849515589965e-05, - "loss": 0.0586, - "step": 34505 - }, - { - "epoch": 0.8759994923213605, - "grad_norm": 0.43012914061546326, - "learning_rate": 1.4160003384524264e-05, - "loss": 0.054, - "step": 34510 - }, - { - "epoch": 0.8761264119812159, - "grad_norm": 0.577068030834198, - "learning_rate": 1.4159157253458562e-05, - "loss": 0.0785, - "step": 34515 - }, - { - "epoch": 0.8762533316410712, - "grad_norm": 0.5287047624588013, - "learning_rate": 1.415831112239286e-05, - "loss": 0.0647, - "step": 34520 - }, - { - "epoch": 0.8763802513009266, - "grad_norm": 0.41936784982681274, - "learning_rate": 1.4157464991327157e-05, - "loss": 0.0837, - "step": 34525 - }, - { - "epoch": 0.8765071709607818, - "grad_norm": 0.45306962728500366, - "learning_rate": 1.4156618860261456e-05, - "loss": 0.0615, - "step": 34530 - }, - { - "epoch": 0.8766340906206371, - "grad_norm": 0.3549240529537201, - "learning_rate": 1.4155772729195754e-05, - "loss": 0.0654, - "step": 34535 - }, - { - "epoch": 0.8767610102804925, - "grad_norm": 0.4501296579837799, - "learning_rate": 1.4154926598130053e-05, - "loss": 0.0693, - "step": 34540 - }, - { - "epoch": 0.8768879299403478, - "grad_norm": 0.6177758574485779, - "learning_rate": 1.4154080467064348e-05, - "loss": 0.0651, - "step": 34545 - }, - { - "epoch": 0.877014849600203, - "grad_norm": 0.4193696677684784, - "learning_rate": 1.4153234335998648e-05, - "loss": 0.0659, - "step": 34550 - }, - { - "epoch": 0.8771417692600584, - "grad_norm": 0.7441093921661377, - "learning_rate": 1.4152388204932946e-05, - "loss": 0.056, - "step": 34555 - }, - { - "epoch": 0.8772686889199137, - "grad_norm": 0.500731885433197, - "learning_rate": 1.4151542073867244e-05, - "loss": 0.0628, - "step": 34560 - }, - { - "epoch": 0.8773956085797691, - "grad_norm": 0.396107017993927, - "learning_rate": 1.415069594280154e-05, - "loss": 0.0522, - "step": 34565 - }, - { - "epoch": 0.8775225282396243, - "grad_norm": 0.4018365442752838, - "learning_rate": 1.4149849811735838e-05, - "loss": 0.0729, - "step": 34570 - }, - { - "epoch": 0.8776494478994796, - "grad_norm": 0.5929480195045471, - "learning_rate": 1.4149003680670136e-05, - "loss": 0.0839, - "step": 34575 - }, - { - "epoch": 0.877776367559335, - "grad_norm": 0.3212193548679352, - "learning_rate": 1.4148157549604435e-05, - "loss": 0.0508, - "step": 34580 - }, - { - "epoch": 0.8779032872191902, - "grad_norm": 1.0203638076782227, - "learning_rate": 1.4147311418538735e-05, - "loss": 0.0718, - "step": 34585 - }, - { - "epoch": 0.8780302068790455, - "grad_norm": 0.37974515557289124, - "learning_rate": 1.414646528747303e-05, - "loss": 0.0548, - "step": 34590 - }, - { - "epoch": 0.8781571265389009, - "grad_norm": 0.6992487907409668, - "learning_rate": 1.4145619156407328e-05, - "loss": 0.0669, - "step": 34595 - }, - { - "epoch": 0.8782840461987562, - "grad_norm": 0.48010316491127014, - "learning_rate": 1.4144773025341627e-05, - "loss": 0.0793, - "step": 34600 - }, - { - "epoch": 0.8784109658586114, - "grad_norm": 0.6374577879905701, - "learning_rate": 1.4143926894275925e-05, - "loss": 0.0923, - "step": 34605 - }, - { - "epoch": 0.8785378855184668, - "grad_norm": 0.6428236961364746, - "learning_rate": 1.4143080763210222e-05, - "loss": 0.0636, - "step": 34610 - }, - { - "epoch": 0.8786648051783221, - "grad_norm": 0.4173987805843353, - "learning_rate": 1.414223463214452e-05, - "loss": 0.0734, - "step": 34615 - }, - { - "epoch": 0.8787917248381775, - "grad_norm": 0.6437735557556152, - "learning_rate": 1.4141388501078818e-05, - "loss": 0.0635, - "step": 34620 - }, - { - "epoch": 0.8789186444980327, - "grad_norm": 0.43467986583709717, - "learning_rate": 1.4140542370013117e-05, - "loss": 0.0668, - "step": 34625 - }, - { - "epoch": 0.879045564157888, - "grad_norm": 0.35919246077537537, - "learning_rate": 1.4139696238947414e-05, - "loss": 0.0529, - "step": 34630 - }, - { - "epoch": 0.8791724838177434, - "grad_norm": 0.5986192226409912, - "learning_rate": 1.4138850107881712e-05, - "loss": 0.0798, - "step": 34635 - }, - { - "epoch": 0.8792994034775987, - "grad_norm": 0.5600852966308594, - "learning_rate": 1.413800397681601e-05, - "loss": 0.0637, - "step": 34640 - }, - { - "epoch": 0.879426323137454, - "grad_norm": 0.8316653370857239, - "learning_rate": 1.4137157845750309e-05, - "loss": 0.0832, - "step": 34645 - }, - { - "epoch": 0.8795532427973093, - "grad_norm": 0.7637650370597839, - "learning_rate": 1.4136311714684605e-05, - "loss": 0.0721, - "step": 34650 - }, - { - "epoch": 0.8796801624571646, - "grad_norm": 0.5111541152000427, - "learning_rate": 1.4135465583618904e-05, - "loss": 0.0495, - "step": 34655 - }, - { - "epoch": 0.87980708211702, - "grad_norm": 0.4024965763092041, - "learning_rate": 1.4134619452553202e-05, - "loss": 0.0603, - "step": 34660 - }, - { - "epoch": 0.8799340017768752, - "grad_norm": 0.4355154037475586, - "learning_rate": 1.41337733214875e-05, - "loss": 0.0555, - "step": 34665 - }, - { - "epoch": 0.8800609214367305, - "grad_norm": 0.4618392884731293, - "learning_rate": 1.4132927190421797e-05, - "loss": 0.0536, - "step": 34670 - }, - { - "epoch": 0.8801878410965859, - "grad_norm": 0.40786364674568176, - "learning_rate": 1.4132081059356096e-05, - "loss": 0.0954, - "step": 34675 - }, - { - "epoch": 0.8803147607564412, - "grad_norm": 0.6484477519989014, - "learning_rate": 1.4131234928290394e-05, - "loss": 0.0855, - "step": 34680 - }, - { - "epoch": 0.8804416804162964, - "grad_norm": 0.5656567811965942, - "learning_rate": 1.4130388797224693e-05, - "loss": 0.0839, - "step": 34685 - }, - { - "epoch": 0.8805686000761518, - "grad_norm": 0.9205188751220703, - "learning_rate": 1.412954266615899e-05, - "loss": 0.0809, - "step": 34690 - }, - { - "epoch": 0.8806955197360071, - "grad_norm": 0.3333803117275238, - "learning_rate": 1.4128696535093288e-05, - "loss": 0.0569, - "step": 34695 - }, - { - "epoch": 0.8808224393958625, - "grad_norm": 0.49322590231895447, - "learning_rate": 1.4127850404027586e-05, - "loss": 0.0593, - "step": 34700 - }, - { - "epoch": 0.8809493590557177, - "grad_norm": 0.6073592901229858, - "learning_rate": 1.4127004272961885e-05, - "loss": 0.0722, - "step": 34705 - }, - { - "epoch": 0.881076278715573, - "grad_norm": 0.4468435049057007, - "learning_rate": 1.412615814189618e-05, - "loss": 0.0668, - "step": 34710 - }, - { - "epoch": 0.8812031983754284, - "grad_norm": 0.45478400588035583, - "learning_rate": 1.4125312010830478e-05, - "loss": 0.0807, - "step": 34715 - }, - { - "epoch": 0.8813301180352837, - "grad_norm": 0.5733183026313782, - "learning_rate": 1.4124465879764776e-05, - "loss": 0.0815, - "step": 34720 - }, - { - "epoch": 0.881457037695139, - "grad_norm": 2.5981884002685547, - "learning_rate": 1.4123619748699076e-05, - "loss": 0.0603, - "step": 34725 - }, - { - "epoch": 0.8815839573549943, - "grad_norm": 0.5155329704284668, - "learning_rate": 1.4122773617633371e-05, - "loss": 0.0965, - "step": 34730 - }, - { - "epoch": 0.8817108770148496, - "grad_norm": 0.46370577812194824, - "learning_rate": 1.412192748656767e-05, - "loss": 0.0536, - "step": 34735 - }, - { - "epoch": 0.881837796674705, - "grad_norm": 0.5318700075149536, - "learning_rate": 1.4121081355501968e-05, - "loss": 0.096, - "step": 34740 - }, - { - "epoch": 0.8819647163345602, - "grad_norm": 0.7448140382766724, - "learning_rate": 1.4120235224436267e-05, - "loss": 0.0569, - "step": 34745 - }, - { - "epoch": 0.8820916359944155, - "grad_norm": 0.39706122875213623, - "learning_rate": 1.4119389093370563e-05, - "loss": 0.0574, - "step": 34750 - }, - { - "epoch": 0.8822185556542709, - "grad_norm": 0.5476941466331482, - "learning_rate": 1.4118542962304862e-05, - "loss": 0.0748, - "step": 34755 - }, - { - "epoch": 0.8823454753141261, - "grad_norm": 0.46551597118377686, - "learning_rate": 1.411769683123916e-05, - "loss": 0.0688, - "step": 34760 - }, - { - "epoch": 0.8824723949739814, - "grad_norm": 0.4470709264278412, - "learning_rate": 1.4116850700173459e-05, - "loss": 0.0618, - "step": 34765 - }, - { - "epoch": 0.8825993146338368, - "grad_norm": 0.6245851516723633, - "learning_rate": 1.4116004569107755e-05, - "loss": 0.0737, - "step": 34770 - }, - { - "epoch": 0.8827262342936921, - "grad_norm": 0.740343451499939, - "learning_rate": 1.4115158438042054e-05, - "loss": 0.0611, - "step": 34775 - }, - { - "epoch": 0.8828531539535474, - "grad_norm": 0.3582501709461212, - "learning_rate": 1.4114312306976352e-05, - "loss": 0.0371, - "step": 34780 - }, - { - "epoch": 0.8829800736134027, - "grad_norm": 0.5133855938911438, - "learning_rate": 1.411346617591065e-05, - "loss": 0.0796, - "step": 34785 - }, - { - "epoch": 0.883106993273258, - "grad_norm": 0.5935069918632507, - "learning_rate": 1.4112620044844947e-05, - "loss": 0.058, - "step": 34790 - }, - { - "epoch": 0.8832339129331134, - "grad_norm": 0.6294364333152771, - "learning_rate": 1.4111773913779246e-05, - "loss": 0.0832, - "step": 34795 - }, - { - "epoch": 0.8833608325929686, - "grad_norm": 0.2988439202308655, - "learning_rate": 1.4110927782713544e-05, - "loss": 0.0689, - "step": 34800 - }, - { - "epoch": 0.8834877522528239, - "grad_norm": 0.5067592263221741, - "learning_rate": 1.4110081651647842e-05, - "loss": 0.0729, - "step": 34805 - }, - { - "epoch": 0.8836146719126793, - "grad_norm": 0.7685109376907349, - "learning_rate": 1.4109235520582139e-05, - "loss": 0.0609, - "step": 34810 - }, - { - "epoch": 0.8837415915725346, - "grad_norm": 0.3304712474346161, - "learning_rate": 1.4108389389516438e-05, - "loss": 0.0693, - "step": 34815 - }, - { - "epoch": 0.8838685112323899, - "grad_norm": 0.4220477342605591, - "learning_rate": 1.4107543258450736e-05, - "loss": 0.0668, - "step": 34820 - }, - { - "epoch": 0.8839954308922452, - "grad_norm": 0.4194203317165375, - "learning_rate": 1.4106697127385034e-05, - "loss": 0.0723, - "step": 34825 - }, - { - "epoch": 0.8841223505521005, - "grad_norm": 0.38929101824760437, - "learning_rate": 1.4105850996319331e-05, - "loss": 0.0531, - "step": 34830 - }, - { - "epoch": 0.8842492702119559, - "grad_norm": 0.6579625010490417, - "learning_rate": 1.410500486525363e-05, - "loss": 0.0619, - "step": 34835 - }, - { - "epoch": 0.8843761898718111, - "grad_norm": 0.6530230641365051, - "learning_rate": 1.4104158734187928e-05, - "loss": 0.0555, - "step": 34840 - }, - { - "epoch": 0.8845031095316664, - "grad_norm": 0.6187976598739624, - "learning_rate": 1.4103312603122226e-05, - "loss": 0.0746, - "step": 34845 - }, - { - "epoch": 0.8846300291915218, - "grad_norm": 0.45847925543785095, - "learning_rate": 1.4102466472056521e-05, - "loss": 0.0655, - "step": 34850 - }, - { - "epoch": 0.8847569488513771, - "grad_norm": 0.17294710874557495, - "learning_rate": 1.410162034099082e-05, - "loss": 0.0216, - "step": 34855 - }, - { - "epoch": 0.8848838685112324, - "grad_norm": 0.38952305912971497, - "learning_rate": 1.4100774209925118e-05, - "loss": 0.078, - "step": 34860 - }, - { - "epoch": 0.8850107881710877, - "grad_norm": 0.5617550015449524, - "learning_rate": 1.4099928078859416e-05, - "loss": 0.0577, - "step": 34865 - }, - { - "epoch": 0.885137707830943, - "grad_norm": 0.6552527546882629, - "learning_rate": 1.4099081947793713e-05, - "loss": 0.0591, - "step": 34870 - }, - { - "epoch": 0.8852646274907984, - "grad_norm": 0.5666271448135376, - "learning_rate": 1.4098235816728012e-05, - "loss": 0.0721, - "step": 34875 - }, - { - "epoch": 0.8853915471506536, - "grad_norm": 0.4224430322647095, - "learning_rate": 1.409738968566231e-05, - "loss": 0.052, - "step": 34880 - }, - { - "epoch": 0.8855184668105089, - "grad_norm": 0.5569811463356018, - "learning_rate": 1.4096543554596608e-05, - "loss": 0.0546, - "step": 34885 - }, - { - "epoch": 0.8856453864703643, - "grad_norm": 0.5093055963516235, - "learning_rate": 1.4095697423530905e-05, - "loss": 0.0685, - "step": 34890 - }, - { - "epoch": 0.8857723061302196, - "grad_norm": 0.45345133543014526, - "learning_rate": 1.4094851292465203e-05, - "loss": 0.0676, - "step": 34895 - }, - { - "epoch": 0.8858992257900749, - "grad_norm": 0.5104205012321472, - "learning_rate": 1.4094005161399502e-05, - "loss": 0.0587, - "step": 34900 - }, - { - "epoch": 0.8860261454499302, - "grad_norm": 0.5814826488494873, - "learning_rate": 1.40931590303338e-05, - "loss": 0.0804, - "step": 34905 - }, - { - "epoch": 0.8861530651097855, - "grad_norm": 0.609217643737793, - "learning_rate": 1.4092312899268097e-05, - "loss": 0.0635, - "step": 34910 - }, - { - "epoch": 0.8862799847696409, - "grad_norm": 0.44782450795173645, - "learning_rate": 1.4091466768202395e-05, - "loss": 0.0492, - "step": 34915 - }, - { - "epoch": 0.8864069044294961, - "grad_norm": 0.7684276103973389, - "learning_rate": 1.4090620637136694e-05, - "loss": 0.0651, - "step": 34920 - }, - { - "epoch": 0.8865338240893514, - "grad_norm": 0.4072602689266205, - "learning_rate": 1.4089774506070992e-05, - "loss": 0.0554, - "step": 34925 - }, - { - "epoch": 0.8866607437492068, - "grad_norm": 1.094642162322998, - "learning_rate": 1.4088928375005289e-05, - "loss": 0.0567, - "step": 34930 - }, - { - "epoch": 0.886787663409062, - "grad_norm": 0.6165804266929626, - "learning_rate": 1.4088082243939587e-05, - "loss": 0.0753, - "step": 34935 - }, - { - "epoch": 0.8869145830689174, - "grad_norm": 0.7601154446601868, - "learning_rate": 1.4087236112873886e-05, - "loss": 0.0646, - "step": 34940 - }, - { - "epoch": 0.8870415027287727, - "grad_norm": 0.5878347754478455, - "learning_rate": 1.4086389981808184e-05, - "loss": 0.0588, - "step": 34945 - }, - { - "epoch": 0.887168422388628, - "grad_norm": 0.5441903471946716, - "learning_rate": 1.408554385074248e-05, - "loss": 0.0532, - "step": 34950 - }, - { - "epoch": 0.8872953420484833, - "grad_norm": 0.5270041227340698, - "learning_rate": 1.408469771967678e-05, - "loss": 0.0804, - "step": 34955 - }, - { - "epoch": 0.8874222617083386, - "grad_norm": 0.38208940625190735, - "learning_rate": 1.4083851588611078e-05, - "loss": 0.0548, - "step": 34960 - }, - { - "epoch": 0.8875491813681939, - "grad_norm": 0.453867644071579, - "learning_rate": 1.4083005457545376e-05, - "loss": 0.0686, - "step": 34965 - }, - { - "epoch": 0.8876761010280493, - "grad_norm": 1.2792969942092896, - "learning_rate": 1.4082159326479671e-05, - "loss": 0.0569, - "step": 34970 - }, - { - "epoch": 0.8878030206879045, - "grad_norm": 0.9899558424949646, - "learning_rate": 1.4081313195413971e-05, - "loss": 0.0667, - "step": 34975 - }, - { - "epoch": 0.8879299403477598, - "grad_norm": 0.6003256440162659, - "learning_rate": 1.408046706434827e-05, - "loss": 0.0837, - "step": 34980 - }, - { - "epoch": 0.8880568600076152, - "grad_norm": 0.2682812511920929, - "learning_rate": 1.4079620933282568e-05, - "loss": 0.0495, - "step": 34985 - }, - { - "epoch": 0.8881837796674705, - "grad_norm": 0.526199460029602, - "learning_rate": 1.4078774802216863e-05, - "loss": 0.0817, - "step": 34990 - }, - { - "epoch": 0.8883106993273258, - "grad_norm": 0.38134291768074036, - "learning_rate": 1.4077928671151161e-05, - "loss": 0.0561, - "step": 34995 - }, - { - "epoch": 0.8884376189871811, - "grad_norm": 0.5040686726570129, - "learning_rate": 1.407708254008546e-05, - "loss": 0.0801, - "step": 35000 - }, - { - "epoch": 0.8885645386470364, - "grad_norm": 0.4959940016269684, - "learning_rate": 1.4076236409019758e-05, - "loss": 0.0572, - "step": 35005 - }, - { - "epoch": 0.8886914583068918, - "grad_norm": 0.48712655901908875, - "learning_rate": 1.4075390277954055e-05, - "loss": 0.0672, - "step": 35010 - }, - { - "epoch": 0.888818377966747, - "grad_norm": 0.6044802069664001, - "learning_rate": 1.4074544146888353e-05, - "loss": 0.0524, - "step": 35015 - }, - { - "epoch": 0.8889452976266023, - "grad_norm": 0.4492810368537903, - "learning_rate": 1.4073698015822652e-05, - "loss": 0.0502, - "step": 35020 - }, - { - "epoch": 0.8890722172864577, - "grad_norm": 0.5634965300559998, - "learning_rate": 1.407285188475695e-05, - "loss": 0.0684, - "step": 35025 - }, - { - "epoch": 0.889199136946313, - "grad_norm": 0.36526942253112793, - "learning_rate": 1.4072005753691247e-05, - "loss": 0.0537, - "step": 35030 - }, - { - "epoch": 0.8893260566061683, - "grad_norm": 0.8241205215454102, - "learning_rate": 1.4071159622625545e-05, - "loss": 0.0685, - "step": 35035 - }, - { - "epoch": 0.8894529762660236, - "grad_norm": 0.3881432116031647, - "learning_rate": 1.4070313491559844e-05, - "loss": 0.0728, - "step": 35040 - }, - { - "epoch": 0.8895798959258789, - "grad_norm": 0.4265308678150177, - "learning_rate": 1.4069467360494142e-05, - "loss": 0.0631, - "step": 35045 - }, - { - "epoch": 0.8897068155857343, - "grad_norm": 0.3954401910305023, - "learning_rate": 1.4068621229428439e-05, - "loss": 0.0496, - "step": 35050 - }, - { - "epoch": 0.8898337352455895, - "grad_norm": 0.5289174914360046, - "learning_rate": 1.4067775098362737e-05, - "loss": 0.0547, - "step": 35055 - }, - { - "epoch": 0.8899606549054448, - "grad_norm": 0.6053035855293274, - "learning_rate": 1.4066928967297035e-05, - "loss": 0.059, - "step": 35060 - }, - { - "epoch": 0.8900875745653002, - "grad_norm": 0.8783829808235168, - "learning_rate": 1.4066082836231334e-05, - "loss": 0.0664, - "step": 35065 - }, - { - "epoch": 0.8902144942251555, - "grad_norm": 0.6331197619438171, - "learning_rate": 1.406523670516563e-05, - "loss": 0.0619, - "step": 35070 - }, - { - "epoch": 0.8903414138850108, - "grad_norm": 0.6140260696411133, - "learning_rate": 1.4064390574099929e-05, - "loss": 0.065, - "step": 35075 - }, - { - "epoch": 0.8904683335448661, - "grad_norm": 0.5236164331436157, - "learning_rate": 1.4063544443034227e-05, - "loss": 0.0708, - "step": 35080 - }, - { - "epoch": 0.8905952532047214, - "grad_norm": 0.4957127273082733, - "learning_rate": 1.4062698311968526e-05, - "loss": 0.0655, - "step": 35085 - }, - { - "epoch": 0.8907221728645768, - "grad_norm": 0.3689078688621521, - "learning_rate": 1.4061852180902823e-05, - "loss": 0.0794, - "step": 35090 - }, - { - "epoch": 0.890849092524432, - "grad_norm": 0.5723044276237488, - "learning_rate": 1.4061006049837121e-05, - "loss": 0.0629, - "step": 35095 - }, - { - "epoch": 0.8909760121842873, - "grad_norm": 0.7530127763748169, - "learning_rate": 1.406015991877142e-05, - "loss": 0.073, - "step": 35100 - }, - { - "epoch": 0.8911029318441427, - "grad_norm": 0.4156973361968994, - "learning_rate": 1.4059313787705718e-05, - "loss": 0.0615, - "step": 35105 - }, - { - "epoch": 0.8912298515039979, - "grad_norm": 0.47077107429504395, - "learning_rate": 1.4058467656640016e-05, - "loss": 0.0449, - "step": 35110 - }, - { - "epoch": 0.8913567711638533, - "grad_norm": 1.1336990594863892, - "learning_rate": 1.4057621525574313e-05, - "loss": 0.0646, - "step": 35115 - }, - { - "epoch": 0.8914836908237086, - "grad_norm": 0.6898463368415833, - "learning_rate": 1.4056775394508611e-05, - "loss": 0.042, - "step": 35120 - }, - { - "epoch": 0.8916106104835639, - "grad_norm": 0.47178342938423157, - "learning_rate": 1.405592926344291e-05, - "loss": 0.0544, - "step": 35125 - }, - { - "epoch": 0.8917375301434192, - "grad_norm": 0.41895347833633423, - "learning_rate": 1.4055083132377208e-05, - "loss": 0.0484, - "step": 35130 - }, - { - "epoch": 0.8918644498032745, - "grad_norm": 2.312026262283325, - "learning_rate": 1.4054237001311503e-05, - "loss": 0.0508, - "step": 35135 - }, - { - "epoch": 0.8919913694631298, - "grad_norm": 0.4094340205192566, - "learning_rate": 1.4053390870245801e-05, - "loss": 0.074, - "step": 35140 - }, - { - "epoch": 0.8921182891229852, - "grad_norm": 0.5479934215545654, - "learning_rate": 1.40525447391801e-05, - "loss": 0.0802, - "step": 35145 - }, - { - "epoch": 0.8922452087828404, - "grad_norm": 0.5689419507980347, - "learning_rate": 1.40516986081144e-05, - "loss": 0.0782, - "step": 35150 - }, - { - "epoch": 0.8923721284426958, - "grad_norm": 0.2658119201660156, - "learning_rate": 1.4050852477048695e-05, - "loss": 0.0571, - "step": 35155 - }, - { - "epoch": 0.8924990481025511, - "grad_norm": 0.4687420725822449, - "learning_rate": 1.4050006345982993e-05, - "loss": 0.0581, - "step": 35160 - }, - { - "epoch": 0.8926259677624064, - "grad_norm": 0.8115020394325256, - "learning_rate": 1.4049160214917292e-05, - "loss": 0.0607, - "step": 35165 - }, - { - "epoch": 0.8927528874222617, - "grad_norm": 0.4689027667045593, - "learning_rate": 1.404831408385159e-05, - "loss": 0.0633, - "step": 35170 - }, - { - "epoch": 0.892879807082117, - "grad_norm": 0.3078717291355133, - "learning_rate": 1.4047467952785887e-05, - "loss": 0.0681, - "step": 35175 - }, - { - "epoch": 0.8930067267419723, - "grad_norm": 0.8845883011817932, - "learning_rate": 1.4046621821720185e-05, - "loss": 0.0583, - "step": 35180 - }, - { - "epoch": 0.8931336464018277, - "grad_norm": 0.37942638993263245, - "learning_rate": 1.4045775690654484e-05, - "loss": 0.0465, - "step": 35185 - }, - { - "epoch": 0.8932605660616829, - "grad_norm": 0.3219950795173645, - "learning_rate": 1.4044929559588782e-05, - "loss": 0.0666, - "step": 35190 - }, - { - "epoch": 0.8933874857215383, - "grad_norm": 0.4549737274646759, - "learning_rate": 1.4044083428523079e-05, - "loss": 0.0597, - "step": 35195 - }, - { - "epoch": 0.8935144053813936, - "grad_norm": 0.5626015663146973, - "learning_rate": 1.4043237297457377e-05, - "loss": 0.0732, - "step": 35200 - }, - { - "epoch": 0.8936413250412489, - "grad_norm": 0.7765746116638184, - "learning_rate": 1.4042391166391676e-05, - "loss": 0.0471, - "step": 35205 - }, - { - "epoch": 0.8937682447011042, - "grad_norm": 0.4299619793891907, - "learning_rate": 1.4041545035325974e-05, - "loss": 0.0486, - "step": 35210 - }, - { - "epoch": 0.8938951643609595, - "grad_norm": 0.5321807861328125, - "learning_rate": 1.404069890426027e-05, - "loss": 0.0722, - "step": 35215 - }, - { - "epoch": 0.8940220840208148, - "grad_norm": 0.5389994382858276, - "learning_rate": 1.4039852773194569e-05, - "loss": 0.0599, - "step": 35220 - }, - { - "epoch": 0.8941490036806702, - "grad_norm": 0.4992239475250244, - "learning_rate": 1.4039006642128868e-05, - "loss": 0.052, - "step": 35225 - }, - { - "epoch": 0.8942759233405254, - "grad_norm": 0.28586849570274353, - "learning_rate": 1.4038160511063166e-05, - "loss": 0.0592, - "step": 35230 - }, - { - "epoch": 0.8944028430003808, - "grad_norm": 0.6693266034126282, - "learning_rate": 1.4037314379997463e-05, - "loss": 0.0692, - "step": 35235 - }, - { - "epoch": 0.8945297626602361, - "grad_norm": 0.6322370171546936, - "learning_rate": 1.4036468248931761e-05, - "loss": 0.0649, - "step": 35240 - }, - { - "epoch": 0.8946566823200914, - "grad_norm": 0.37547966837882996, - "learning_rate": 1.403562211786606e-05, - "loss": 0.0878, - "step": 35245 - }, - { - "epoch": 0.8947836019799467, - "grad_norm": 0.5993882417678833, - "learning_rate": 1.4034775986800358e-05, - "loss": 0.077, - "step": 35250 - }, - { - "epoch": 0.894910521639802, - "grad_norm": 1.8442648649215698, - "learning_rate": 1.4033929855734655e-05, - "loss": 0.0587, - "step": 35255 - }, - { - "epoch": 0.8950374412996573, - "grad_norm": 0.5735684037208557, - "learning_rate": 1.4033083724668953e-05, - "loss": 0.0565, - "step": 35260 - }, - { - "epoch": 0.8951643609595127, - "grad_norm": 0.6070929169654846, - "learning_rate": 1.4032237593603251e-05, - "loss": 0.0503, - "step": 35265 - }, - { - "epoch": 0.8952912806193679, - "grad_norm": 0.5779569745063782, - "learning_rate": 1.403139146253755e-05, - "loss": 0.0762, - "step": 35270 - }, - { - "epoch": 0.8954182002792233, - "grad_norm": 0.46369853615760803, - "learning_rate": 1.4030545331471845e-05, - "loss": 0.0431, - "step": 35275 - }, - { - "epoch": 0.8955451199390786, - "grad_norm": 0.788457989692688, - "learning_rate": 1.4029699200406143e-05, - "loss": 0.0491, - "step": 35280 - }, - { - "epoch": 0.8956720395989338, - "grad_norm": 0.5825850963592529, - "learning_rate": 1.4028853069340442e-05, - "loss": 0.0518, - "step": 35285 - }, - { - "epoch": 0.8957989592587892, - "grad_norm": 0.4558623135089874, - "learning_rate": 1.402800693827474e-05, - "loss": 0.0642, - "step": 35290 - }, - { - "epoch": 0.8959258789186445, - "grad_norm": 0.5958998799324036, - "learning_rate": 1.4027160807209037e-05, - "loss": 0.0825, - "step": 35295 - }, - { - "epoch": 0.8960527985784998, - "grad_norm": 0.4824344515800476, - "learning_rate": 1.4026314676143335e-05, - "loss": 0.0679, - "step": 35300 - }, - { - "epoch": 0.8961797182383551, - "grad_norm": 0.38949334621429443, - "learning_rate": 1.4025468545077633e-05, - "loss": 0.0784, - "step": 35305 - }, - { - "epoch": 0.8963066378982104, - "grad_norm": 0.47412732243537903, - "learning_rate": 1.4024622414011932e-05, - "loss": 0.0539, - "step": 35310 - }, - { - "epoch": 0.8964335575580658, - "grad_norm": 0.8034778237342834, - "learning_rate": 1.4023776282946229e-05, - "loss": 0.0578, - "step": 35315 - }, - { - "epoch": 0.8965604772179211, - "grad_norm": 0.2647051215171814, - "learning_rate": 1.4022930151880527e-05, - "loss": 0.0505, - "step": 35320 - }, - { - "epoch": 0.8966873968777763, - "grad_norm": 0.5596978664398193, - "learning_rate": 1.4022084020814825e-05, - "loss": 0.0411, - "step": 35325 - }, - { - "epoch": 0.8968143165376317, - "grad_norm": 0.34831950068473816, - "learning_rate": 1.4021237889749124e-05, - "loss": 0.0757, - "step": 35330 - }, - { - "epoch": 0.896941236197487, - "grad_norm": 0.5986328721046448, - "learning_rate": 1.402039175868342e-05, - "loss": 0.0539, - "step": 35335 - }, - { - "epoch": 0.8970681558573423, - "grad_norm": 0.7212954759597778, - "learning_rate": 1.4019545627617719e-05, - "loss": 0.0542, - "step": 35340 - }, - { - "epoch": 0.8971950755171976, - "grad_norm": 0.5268731713294983, - "learning_rate": 1.4018699496552017e-05, - "loss": 0.0626, - "step": 35345 - }, - { - "epoch": 0.8973219951770529, - "grad_norm": 0.6079050898551941, - "learning_rate": 1.4017853365486316e-05, - "loss": 0.089, - "step": 35350 - }, - { - "epoch": 0.8974489148369083, - "grad_norm": 1.829393744468689, - "learning_rate": 1.4017007234420612e-05, - "loss": 0.0771, - "step": 35355 - }, - { - "epoch": 0.8975758344967636, - "grad_norm": 0.6323705911636353, - "learning_rate": 1.401616110335491e-05, - "loss": 0.0572, - "step": 35360 - }, - { - "epoch": 0.8977027541566188, - "grad_norm": 0.5731505751609802, - "learning_rate": 1.401531497228921e-05, - "loss": 0.0668, - "step": 35365 - }, - { - "epoch": 0.8978296738164742, - "grad_norm": 0.4381532669067383, - "learning_rate": 1.4014468841223508e-05, - "loss": 0.0576, - "step": 35370 - }, - { - "epoch": 0.8979565934763295, - "grad_norm": 0.576497495174408, - "learning_rate": 1.4013622710157804e-05, - "loss": 0.0524, - "step": 35375 - }, - { - "epoch": 0.8980835131361848, - "grad_norm": 0.3703215718269348, - "learning_rate": 1.4012776579092103e-05, - "loss": 0.0425, - "step": 35380 - }, - { - "epoch": 0.8982104327960401, - "grad_norm": 0.38794538378715515, - "learning_rate": 1.4011930448026401e-05, - "loss": 0.0652, - "step": 35385 - }, - { - "epoch": 0.8983373524558954, - "grad_norm": 0.4784393608570099, - "learning_rate": 1.40110843169607e-05, - "loss": 0.0651, - "step": 35390 - }, - { - "epoch": 0.8984642721157508, - "grad_norm": 0.42981621623039246, - "learning_rate": 1.4010238185894996e-05, - "loss": 0.0823, - "step": 35395 - }, - { - "epoch": 0.8985911917756061, - "grad_norm": 0.4076554775238037, - "learning_rate": 1.4009392054829295e-05, - "loss": 0.0747, - "step": 35400 - }, - { - "epoch": 0.8987181114354613, - "grad_norm": 0.6202811598777771, - "learning_rate": 1.4008545923763593e-05, - "loss": 0.059, - "step": 35405 - }, - { - "epoch": 0.8988450310953167, - "grad_norm": 0.44438254833221436, - "learning_rate": 1.4007699792697891e-05, - "loss": 0.0552, - "step": 35410 - }, - { - "epoch": 0.898971950755172, - "grad_norm": 0.49857375025749207, - "learning_rate": 1.4006853661632186e-05, - "loss": 0.0688, - "step": 35415 - }, - { - "epoch": 0.8990988704150273, - "grad_norm": 0.9087181091308594, - "learning_rate": 1.4006007530566485e-05, - "loss": 0.0529, - "step": 35420 - }, - { - "epoch": 0.8992257900748826, - "grad_norm": 0.5597060918807983, - "learning_rate": 1.4005161399500783e-05, - "loss": 0.0709, - "step": 35425 - }, - { - "epoch": 0.8993527097347379, - "grad_norm": 0.5433340072631836, - "learning_rate": 1.4004315268435082e-05, - "loss": 0.054, - "step": 35430 - }, - { - "epoch": 0.8994796293945932, - "grad_norm": 0.5076007843017578, - "learning_rate": 1.4003469137369378e-05, - "loss": 0.0659, - "step": 35435 - }, - { - "epoch": 0.8996065490544485, - "grad_norm": 0.5533579587936401, - "learning_rate": 1.4002623006303677e-05, - "loss": 0.053, - "step": 35440 - }, - { - "epoch": 0.8997334687143038, - "grad_norm": 0.5547532439231873, - "learning_rate": 1.4001776875237975e-05, - "loss": 0.0543, - "step": 35445 - }, - { - "epoch": 0.8998603883741592, - "grad_norm": 0.5422335267066956, - "learning_rate": 1.4000930744172274e-05, - "loss": 0.0695, - "step": 35450 - }, - { - "epoch": 0.8999873080340145, - "grad_norm": 0.5290141701698303, - "learning_rate": 1.400008461310657e-05, - "loss": 0.0549, - "step": 35455 - }, - { - "epoch": 0.9001142276938697, - "grad_norm": 0.5614126324653625, - "learning_rate": 1.3999238482040869e-05, - "loss": 0.0545, - "step": 35460 - }, - { - "epoch": 0.9002411473537251, - "grad_norm": 0.6558793187141418, - "learning_rate": 1.3998392350975167e-05, - "loss": 0.0796, - "step": 35465 - }, - { - "epoch": 0.9003680670135804, - "grad_norm": 0.35309121012687683, - "learning_rate": 1.3997546219909465e-05, - "loss": 0.0522, - "step": 35470 - }, - { - "epoch": 0.9004949866734357, - "grad_norm": 0.6415835618972778, - "learning_rate": 1.3996700088843762e-05, - "loss": 0.0538, - "step": 35475 - }, - { - "epoch": 0.900621906333291, - "grad_norm": 0.7187492251396179, - "learning_rate": 1.399585395777806e-05, - "loss": 0.0669, - "step": 35480 - }, - { - "epoch": 0.9007488259931463, - "grad_norm": 0.348796010017395, - "learning_rate": 1.3995007826712359e-05, - "loss": 0.0509, - "step": 35485 - }, - { - "epoch": 0.9008757456530017, - "grad_norm": 0.5280691981315613, - "learning_rate": 1.3994161695646657e-05, - "loss": 0.058, - "step": 35490 - }, - { - "epoch": 0.901002665312857, - "grad_norm": 0.8122514486312866, - "learning_rate": 1.3993315564580954e-05, - "loss": 0.063, - "step": 35495 - }, - { - "epoch": 0.9011295849727122, - "grad_norm": 0.6575353741645813, - "learning_rate": 1.3992469433515253e-05, - "loss": 0.0486, - "step": 35500 - }, - { - "epoch": 0.9012565046325676, - "grad_norm": 0.4625047445297241, - "learning_rate": 1.3991623302449551e-05, - "loss": 0.0515, - "step": 35505 - }, - { - "epoch": 0.9013834242924229, - "grad_norm": 0.4527244567871094, - "learning_rate": 1.399077717138385e-05, - "loss": 0.0504, - "step": 35510 - }, - { - "epoch": 0.9015103439522782, - "grad_norm": 0.4507639408111572, - "learning_rate": 1.3989931040318146e-05, - "loss": 0.0633, - "step": 35515 - }, - { - "epoch": 0.9016372636121335, - "grad_norm": 0.5713810324668884, - "learning_rate": 1.3989084909252444e-05, - "loss": 0.0707, - "step": 35520 - }, - { - "epoch": 0.9017641832719888, - "grad_norm": 0.585920512676239, - "learning_rate": 1.3988238778186743e-05, - "loss": 0.0779, - "step": 35525 - }, - { - "epoch": 0.9018911029318442, - "grad_norm": 0.4275569021701813, - "learning_rate": 1.3987392647121041e-05, - "loss": 0.0597, - "step": 35530 - }, - { - "epoch": 0.9020180225916995, - "grad_norm": 0.6829882860183716, - "learning_rate": 1.3986546516055336e-05, - "loss": 0.0731, - "step": 35535 - }, - { - "epoch": 0.9021449422515547, - "grad_norm": 0.6214821934700012, - "learning_rate": 1.3985700384989636e-05, - "loss": 0.0667, - "step": 35540 - }, - { - "epoch": 0.9022718619114101, - "grad_norm": 0.4312489926815033, - "learning_rate": 1.3984854253923935e-05, - "loss": 0.0383, - "step": 35545 - }, - { - "epoch": 0.9023987815712654, - "grad_norm": 0.49352091550827026, - "learning_rate": 1.3984008122858233e-05, - "loss": 0.0721, - "step": 35550 - }, - { - "epoch": 0.9025257012311207, - "grad_norm": 0.6225677132606506, - "learning_rate": 1.3983161991792528e-05, - "loss": 0.0489, - "step": 35555 - }, - { - "epoch": 0.902652620890976, - "grad_norm": 0.6697161197662354, - "learning_rate": 1.3982315860726827e-05, - "loss": 0.0774, - "step": 35560 - }, - { - "epoch": 0.9027795405508313, - "grad_norm": 0.5342637300491333, - "learning_rate": 1.3981469729661125e-05, - "loss": 0.052, - "step": 35565 - }, - { - "epoch": 0.9029064602106867, - "grad_norm": 0.44212427735328674, - "learning_rate": 1.3980623598595423e-05, - "loss": 0.0566, - "step": 35570 - }, - { - "epoch": 0.903033379870542, - "grad_norm": 0.3657410442829132, - "learning_rate": 1.397977746752972e-05, - "loss": 0.0463, - "step": 35575 - }, - { - "epoch": 0.9031602995303972, - "grad_norm": 0.4275638163089752, - "learning_rate": 1.3978931336464018e-05, - "loss": 0.0554, - "step": 35580 - }, - { - "epoch": 0.9032872191902526, - "grad_norm": 0.6286470293998718, - "learning_rate": 1.3978085205398317e-05, - "loss": 0.0449, - "step": 35585 - }, - { - "epoch": 0.9034141388501079, - "grad_norm": 0.5052282810211182, - "learning_rate": 1.3977239074332615e-05, - "loss": 0.0521, - "step": 35590 - }, - { - "epoch": 0.9035410585099632, - "grad_norm": 0.4213330149650574, - "learning_rate": 1.3976392943266912e-05, - "loss": 0.0534, - "step": 35595 - }, - { - "epoch": 0.9036679781698185, - "grad_norm": 0.5124711990356445, - "learning_rate": 1.397554681220121e-05, - "loss": 0.0699, - "step": 35600 - }, - { - "epoch": 0.9037948978296738, - "grad_norm": 0.4083732068538666, - "learning_rate": 1.3974700681135509e-05, - "loss": 0.0688, - "step": 35605 - }, - { - "epoch": 0.9039218174895292, - "grad_norm": 0.4097045660018921, - "learning_rate": 1.3973854550069807e-05, - "loss": 0.0803, - "step": 35610 - }, - { - "epoch": 0.9040487371493844, - "grad_norm": 0.6943650245666504, - "learning_rate": 1.3973008419004106e-05, - "loss": 0.0701, - "step": 35615 - }, - { - "epoch": 0.9041756568092397, - "grad_norm": 0.6970309615135193, - "learning_rate": 1.3972162287938402e-05, - "loss": 0.0515, - "step": 35620 - }, - { - "epoch": 0.9043025764690951, - "grad_norm": 0.431818425655365, - "learning_rate": 1.39713161568727e-05, - "loss": 0.0513, - "step": 35625 - }, - { - "epoch": 0.9044294961289504, - "grad_norm": 0.4677361249923706, - "learning_rate": 1.3970470025806999e-05, - "loss": 0.062, - "step": 35630 - }, - { - "epoch": 0.9045564157888056, - "grad_norm": 0.5686903595924377, - "learning_rate": 1.3969623894741298e-05, - "loss": 0.0711, - "step": 35635 - }, - { - "epoch": 0.904683335448661, - "grad_norm": 0.4488184154033661, - "learning_rate": 1.3968777763675594e-05, - "loss": 0.0601, - "step": 35640 - }, - { - "epoch": 0.9048102551085163, - "grad_norm": 0.35828644037246704, - "learning_rate": 1.3967931632609893e-05, - "loss": 0.0699, - "step": 35645 - }, - { - "epoch": 0.9049371747683717, - "grad_norm": 0.39648061990737915, - "learning_rate": 1.3967085501544191e-05, - "loss": 0.0775, - "step": 35650 - }, - { - "epoch": 0.9050640944282269, - "grad_norm": 0.9120569229125977, - "learning_rate": 1.396623937047849e-05, - "loss": 0.0652, - "step": 35655 - }, - { - "epoch": 0.9051910140880822, - "grad_norm": 0.40951985120773315, - "learning_rate": 1.3965393239412786e-05, - "loss": 0.0629, - "step": 35660 - }, - { - "epoch": 0.9053179337479376, - "grad_norm": 0.55852872133255, - "learning_rate": 1.3964547108347085e-05, - "loss": 0.064, - "step": 35665 - }, - { - "epoch": 0.9054448534077929, - "grad_norm": 0.756687581539154, - "learning_rate": 1.3963700977281383e-05, - "loss": 0.0659, - "step": 35670 - }, - { - "epoch": 0.9055717730676481, - "grad_norm": 0.6170337796211243, - "learning_rate": 1.3962854846215681e-05, - "loss": 0.0774, - "step": 35675 - }, - { - "epoch": 0.9056986927275035, - "grad_norm": 0.4753200113773346, - "learning_rate": 1.3962008715149978e-05, - "loss": 0.0695, - "step": 35680 - }, - { - "epoch": 0.9058256123873588, - "grad_norm": 0.3901576101779938, - "learning_rate": 1.3961162584084276e-05, - "loss": 0.0442, - "step": 35685 - }, - { - "epoch": 0.9059525320472142, - "grad_norm": 0.487278550863266, - "learning_rate": 1.3960316453018575e-05, - "loss": 0.053, - "step": 35690 - }, - { - "epoch": 0.9060794517070694, - "grad_norm": 0.6608798503875732, - "learning_rate": 1.3959470321952873e-05, - "loss": 0.0593, - "step": 35695 - }, - { - "epoch": 0.9062063713669247, - "grad_norm": 0.7184143662452698, - "learning_rate": 1.3958624190887168e-05, - "loss": 0.0686, - "step": 35700 - }, - { - "epoch": 0.9063332910267801, - "grad_norm": 0.5345159769058228, - "learning_rate": 1.3957778059821467e-05, - "loss": 0.082, - "step": 35705 - }, - { - "epoch": 0.9064602106866354, - "grad_norm": 0.5161870121955872, - "learning_rate": 1.3956931928755765e-05, - "loss": 0.0648, - "step": 35710 - }, - { - "epoch": 0.9065871303464906, - "grad_norm": 0.7663313150405884, - "learning_rate": 1.3956085797690065e-05, - "loss": 0.0639, - "step": 35715 - }, - { - "epoch": 0.906714050006346, - "grad_norm": 0.5536203384399414, - "learning_rate": 1.395523966662436e-05, - "loss": 0.0664, - "step": 35720 - }, - { - "epoch": 0.9068409696662013, - "grad_norm": 0.5833967328071594, - "learning_rate": 1.3954393535558659e-05, - "loss": 0.0532, - "step": 35725 - }, - { - "epoch": 0.9069678893260567, - "grad_norm": 0.40048515796661377, - "learning_rate": 1.3953547404492957e-05, - "loss": 0.0527, - "step": 35730 - }, - { - "epoch": 0.9070948089859119, - "grad_norm": 0.46452516317367554, - "learning_rate": 1.3952701273427255e-05, - "loss": 0.0561, - "step": 35735 - }, - { - "epoch": 0.9072217286457672, - "grad_norm": 0.42266300320625305, - "learning_rate": 1.3951855142361552e-05, - "loss": 0.0558, - "step": 35740 - }, - { - "epoch": 0.9073486483056226, - "grad_norm": 0.42098888754844666, - "learning_rate": 1.395100901129585e-05, - "loss": 0.0623, - "step": 35745 - }, - { - "epoch": 0.9074755679654779, - "grad_norm": 0.41297924518585205, - "learning_rate": 1.3950162880230149e-05, - "loss": 0.0537, - "step": 35750 - }, - { - "epoch": 0.9076024876253331, - "grad_norm": 0.7173315286636353, - "learning_rate": 1.3949316749164447e-05, - "loss": 0.0709, - "step": 35755 - }, - { - "epoch": 0.9077294072851885, - "grad_norm": 0.5555155873298645, - "learning_rate": 1.3948470618098744e-05, - "loss": 0.0435, - "step": 35760 - }, - { - "epoch": 0.9078563269450438, - "grad_norm": 0.3385359048843384, - "learning_rate": 1.3947624487033042e-05, - "loss": 0.0647, - "step": 35765 - }, - { - "epoch": 0.9079832466048992, - "grad_norm": 0.6309249997138977, - "learning_rate": 1.394677835596734e-05, - "loss": 0.0716, - "step": 35770 - }, - { - "epoch": 0.9081101662647544, - "grad_norm": 0.38597702980041504, - "learning_rate": 1.394593222490164e-05, - "loss": 0.0685, - "step": 35775 - }, - { - "epoch": 0.9082370859246097, - "grad_norm": 0.40910783410072327, - "learning_rate": 1.3945086093835936e-05, - "loss": 0.0499, - "step": 35780 - }, - { - "epoch": 0.9083640055844651, - "grad_norm": 0.5262329578399658, - "learning_rate": 1.3944239962770234e-05, - "loss": 0.0509, - "step": 35785 - }, - { - "epoch": 0.9084909252443203, - "grad_norm": 0.6380796432495117, - "learning_rate": 1.3943393831704533e-05, - "loss": 0.0496, - "step": 35790 - }, - { - "epoch": 0.9086178449041756, - "grad_norm": 0.8702549338340759, - "learning_rate": 1.3942547700638831e-05, - "loss": 0.0678, - "step": 35795 - }, - { - "epoch": 0.908744764564031, - "grad_norm": 0.37720638513565063, - "learning_rate": 1.3941701569573128e-05, - "loss": 0.0625, - "step": 35800 - }, - { - "epoch": 0.9088716842238863, - "grad_norm": 0.5829445123672485, - "learning_rate": 1.3940855438507426e-05, - "loss": 0.0609, - "step": 35805 - }, - { - "epoch": 0.9089986038837415, - "grad_norm": 0.8798021078109741, - "learning_rate": 1.3940009307441725e-05, - "loss": 0.0611, - "step": 35810 - }, - { - "epoch": 0.9091255235435969, - "grad_norm": 0.632575273513794, - "learning_rate": 1.3939163176376023e-05, - "loss": 0.0797, - "step": 35815 - }, - { - "epoch": 0.9092524432034522, - "grad_norm": 0.4816359281539917, - "learning_rate": 1.393831704531032e-05, - "loss": 0.0587, - "step": 35820 - }, - { - "epoch": 0.9093793628633076, - "grad_norm": 0.7026493549346924, - "learning_rate": 1.3937470914244618e-05, - "loss": 0.0694, - "step": 35825 - }, - { - "epoch": 0.9095062825231628, - "grad_norm": 0.5104148387908936, - "learning_rate": 1.3936624783178917e-05, - "loss": 0.0449, - "step": 35830 - }, - { - "epoch": 0.9096332021830181, - "grad_norm": 0.4069823920726776, - "learning_rate": 1.3935778652113215e-05, - "loss": 0.0536, - "step": 35835 - }, - { - "epoch": 0.9097601218428735, - "grad_norm": 0.7840912938117981, - "learning_rate": 1.393493252104751e-05, - "loss": 0.0464, - "step": 35840 - }, - { - "epoch": 0.9098870415027288, - "grad_norm": 0.5983901023864746, - "learning_rate": 1.3934086389981808e-05, - "loss": 0.0695, - "step": 35845 - }, - { - "epoch": 0.910013961162584, - "grad_norm": 0.41672849655151367, - "learning_rate": 1.3933240258916107e-05, - "loss": 0.0557, - "step": 35850 - }, - { - "epoch": 0.9101408808224394, - "grad_norm": 0.5936152935028076, - "learning_rate": 1.3932394127850405e-05, - "loss": 0.0513, - "step": 35855 - }, - { - "epoch": 0.9102678004822947, - "grad_norm": 0.6426900029182434, - "learning_rate": 1.3931547996784702e-05, - "loss": 0.0657, - "step": 35860 - }, - { - "epoch": 0.9103947201421501, - "grad_norm": 0.5609415769577026, - "learning_rate": 1.3930701865719e-05, - "loss": 0.063, - "step": 35865 - }, - { - "epoch": 0.9105216398020053, - "grad_norm": 0.5674073696136475, - "learning_rate": 1.3929855734653299e-05, - "loss": 0.0828, - "step": 35870 - }, - { - "epoch": 0.9106485594618606, - "grad_norm": 0.9100561738014221, - "learning_rate": 1.3929009603587597e-05, - "loss": 0.0569, - "step": 35875 - }, - { - "epoch": 0.910775479121716, - "grad_norm": 0.42916178703308105, - "learning_rate": 1.3928163472521894e-05, - "loss": 0.0538, - "step": 35880 - }, - { - "epoch": 0.9109023987815713, - "grad_norm": 0.5670952200889587, - "learning_rate": 1.3927317341456192e-05, - "loss": 0.0513, - "step": 35885 - }, - { - "epoch": 0.9110293184414265, - "grad_norm": 0.5851391553878784, - "learning_rate": 1.392647121039049e-05, - "loss": 0.0718, - "step": 35890 - }, - { - "epoch": 0.9111562381012819, - "grad_norm": 0.34720829129219055, - "learning_rate": 1.3925625079324789e-05, - "loss": 0.0555, - "step": 35895 - }, - { - "epoch": 0.9112831577611372, - "grad_norm": 0.3031560182571411, - "learning_rate": 1.3924778948259086e-05, - "loss": 0.0387, - "step": 35900 - }, - { - "epoch": 0.9114100774209926, - "grad_norm": 0.523270845413208, - "learning_rate": 1.3923932817193384e-05, - "loss": 0.0593, - "step": 35905 - }, - { - "epoch": 0.9115369970808478, - "grad_norm": 0.4993807375431061, - "learning_rate": 1.3923086686127683e-05, - "loss": 0.0734, - "step": 35910 - }, - { - "epoch": 0.9116639167407031, - "grad_norm": 0.7300135493278503, - "learning_rate": 1.3922240555061981e-05, - "loss": 0.0792, - "step": 35915 - }, - { - "epoch": 0.9117908364005585, - "grad_norm": 0.3985985815525055, - "learning_rate": 1.3921394423996278e-05, - "loss": 0.0561, - "step": 35920 - }, - { - "epoch": 0.9119177560604138, - "grad_norm": 0.3908310532569885, - "learning_rate": 1.3920548292930576e-05, - "loss": 0.0683, - "step": 35925 - }, - { - "epoch": 0.912044675720269, - "grad_norm": 0.7904796004295349, - "learning_rate": 1.3919702161864874e-05, - "loss": 0.0556, - "step": 35930 - }, - { - "epoch": 0.9121715953801244, - "grad_norm": 0.6656363010406494, - "learning_rate": 1.3918856030799173e-05, - "loss": 0.0663, - "step": 35935 - }, - { - "epoch": 0.9122985150399797, - "grad_norm": 0.4563128352165222, - "learning_rate": 1.391800989973347e-05, - "loss": 0.0624, - "step": 35940 - }, - { - "epoch": 0.9124254346998351, - "grad_norm": 1.5634815692901611, - "learning_rate": 1.3917163768667768e-05, - "loss": 0.0699, - "step": 35945 - }, - { - "epoch": 0.9125523543596903, - "grad_norm": 0.4150119125843048, - "learning_rate": 1.3916317637602066e-05, - "loss": 0.0368, - "step": 35950 - }, - { - "epoch": 0.9126792740195456, - "grad_norm": 0.4370921850204468, - "learning_rate": 1.3915471506536365e-05, - "loss": 0.0497, - "step": 35955 - }, - { - "epoch": 0.912806193679401, - "grad_norm": 0.3948012888431549, - "learning_rate": 1.3914625375470661e-05, - "loss": 0.0701, - "step": 35960 - }, - { - "epoch": 0.9129331133392562, - "grad_norm": 0.553893506526947, - "learning_rate": 1.391377924440496e-05, - "loss": 0.0525, - "step": 35965 - }, - { - "epoch": 0.9130600329991115, - "grad_norm": 0.7392289042472839, - "learning_rate": 1.3912933113339258e-05, - "loss": 0.061, - "step": 35970 - }, - { - "epoch": 0.9131869526589669, - "grad_norm": 0.9235190153121948, - "learning_rate": 1.3912086982273557e-05, - "loss": 0.073, - "step": 35975 - }, - { - "epoch": 0.9133138723188222, - "grad_norm": 0.41809892654418945, - "learning_rate": 1.3911240851207852e-05, - "loss": 0.0537, - "step": 35980 - }, - { - "epoch": 0.9134407919786774, - "grad_norm": 0.49973949790000916, - "learning_rate": 1.391039472014215e-05, - "loss": 0.0837, - "step": 35985 - }, - { - "epoch": 0.9135677116385328, - "grad_norm": 1.6755456924438477, - "learning_rate": 1.3909548589076448e-05, - "loss": 0.0454, - "step": 35990 - }, - { - "epoch": 0.9136946312983881, - "grad_norm": 0.3946578800678253, - "learning_rate": 1.3908702458010747e-05, - "loss": 0.057, - "step": 35995 - }, - { - "epoch": 0.9138215509582435, - "grad_norm": 0.2506122887134552, - "learning_rate": 1.3907856326945044e-05, - "loss": 0.0442, - "step": 36000 - }, - { - "epoch": 0.9139484706180987, - "grad_norm": 0.7715497612953186, - "learning_rate": 1.3907010195879342e-05, - "loss": 0.0703, - "step": 36005 - }, - { - "epoch": 0.914075390277954, - "grad_norm": 0.43095719814300537, - "learning_rate": 1.390616406481364e-05, - "loss": 0.0676, - "step": 36010 - }, - { - "epoch": 0.9142023099378094, - "grad_norm": 0.526982307434082, - "learning_rate": 1.3905317933747939e-05, - "loss": 0.0797, - "step": 36015 - }, - { - "epoch": 0.9143292295976647, - "grad_norm": 0.4760642349720001, - "learning_rate": 1.3904471802682236e-05, - "loss": 0.0643, - "step": 36020 - }, - { - "epoch": 0.91445614925752, - "grad_norm": 0.3731924891471863, - "learning_rate": 1.3903625671616534e-05, - "loss": 0.0634, - "step": 36025 - }, - { - "epoch": 0.9145830689173753, - "grad_norm": 0.7781260013580322, - "learning_rate": 1.3902779540550832e-05, - "loss": 0.0552, - "step": 36030 - }, - { - "epoch": 0.9147099885772306, - "grad_norm": 0.3187873959541321, - "learning_rate": 1.390193340948513e-05, - "loss": 0.0558, - "step": 36035 - }, - { - "epoch": 0.914836908237086, - "grad_norm": 0.8405691385269165, - "learning_rate": 1.3901087278419427e-05, - "loss": 0.0675, - "step": 36040 - }, - { - "epoch": 0.9149638278969412, - "grad_norm": 0.6189005374908447, - "learning_rate": 1.3900241147353726e-05, - "loss": 0.0695, - "step": 36045 - }, - { - "epoch": 0.9150907475567965, - "grad_norm": 0.5484052300453186, - "learning_rate": 1.3899395016288024e-05, - "loss": 0.0573, - "step": 36050 - }, - { - "epoch": 0.9152176672166519, - "grad_norm": 1.0177640914916992, - "learning_rate": 1.3898548885222323e-05, - "loss": 0.0496, - "step": 36055 - }, - { - "epoch": 0.9153445868765072, - "grad_norm": 0.5386986136436462, - "learning_rate": 1.389770275415662e-05, - "loss": 0.0609, - "step": 36060 - }, - { - "epoch": 0.9154715065363624, - "grad_norm": 0.29330629110336304, - "learning_rate": 1.3896856623090918e-05, - "loss": 0.0704, - "step": 36065 - }, - { - "epoch": 0.9155984261962178, - "grad_norm": 0.5978008508682251, - "learning_rate": 1.3896010492025216e-05, - "loss": 0.0917, - "step": 36070 - }, - { - "epoch": 0.9157253458560731, - "grad_norm": 0.38673245906829834, - "learning_rate": 1.3895164360959515e-05, - "loss": 0.0593, - "step": 36075 - }, - { - "epoch": 0.9158522655159285, - "grad_norm": 1.0714882612228394, - "learning_rate": 1.3894318229893811e-05, - "loss": 0.0672, - "step": 36080 - }, - { - "epoch": 0.9159791851757837, - "grad_norm": 0.45823559165000916, - "learning_rate": 1.389347209882811e-05, - "loss": 0.0575, - "step": 36085 - }, - { - "epoch": 0.916106104835639, - "grad_norm": 0.3504791557788849, - "learning_rate": 1.3892625967762408e-05, - "loss": 0.0641, - "step": 36090 - }, - { - "epoch": 0.9162330244954944, - "grad_norm": 0.4594399631023407, - "learning_rate": 1.3891779836696706e-05, - "loss": 0.0729, - "step": 36095 - }, - { - "epoch": 0.9163599441553497, - "grad_norm": 0.43579167127609253, - "learning_rate": 1.3890933705631001e-05, - "loss": 0.0631, - "step": 36100 - }, - { - "epoch": 0.9164868638152049, - "grad_norm": 0.467289537191391, - "learning_rate": 1.3890087574565302e-05, - "loss": 0.0687, - "step": 36105 - }, - { - "epoch": 0.9166137834750603, - "grad_norm": 0.6438808441162109, - "learning_rate": 1.38892414434996e-05, - "loss": 0.0676, - "step": 36110 - }, - { - "epoch": 0.9167407031349156, - "grad_norm": 0.3436811864376068, - "learning_rate": 1.3888395312433898e-05, - "loss": 0.0442, - "step": 36115 - }, - { - "epoch": 0.916867622794771, - "grad_norm": 0.7183666229248047, - "learning_rate": 1.3887549181368193e-05, - "loss": 0.0594, - "step": 36120 - }, - { - "epoch": 0.9169945424546262, - "grad_norm": 0.3617455065250397, - "learning_rate": 1.3886703050302492e-05, - "loss": 0.0525, - "step": 36125 - }, - { - "epoch": 0.9171214621144815, - "grad_norm": 0.5853341817855835, - "learning_rate": 1.388585691923679e-05, - "loss": 0.066, - "step": 36130 - }, - { - "epoch": 0.9172483817743369, - "grad_norm": 0.4095194637775421, - "learning_rate": 1.3885010788171089e-05, - "loss": 0.0703, - "step": 36135 - }, - { - "epoch": 0.9173753014341921, - "grad_norm": 1.1366517543792725, - "learning_rate": 1.3884164657105389e-05, - "loss": 0.0441, - "step": 36140 - }, - { - "epoch": 0.9175022210940474, - "grad_norm": 0.879767119884491, - "learning_rate": 1.3883318526039684e-05, - "loss": 0.0818, - "step": 36145 - }, - { - "epoch": 0.9176291407539028, - "grad_norm": 0.5569621920585632, - "learning_rate": 1.3882472394973982e-05, - "loss": 0.082, - "step": 36150 - }, - { - "epoch": 0.9177560604137581, - "grad_norm": 0.54118812084198, - "learning_rate": 1.388162626390828e-05, - "loss": 0.0834, - "step": 36155 - }, - { - "epoch": 0.9178829800736134, - "grad_norm": 0.48699063062667847, - "learning_rate": 1.3880780132842579e-05, - "loss": 0.0695, - "step": 36160 - }, - { - "epoch": 0.9180098997334687, - "grad_norm": 0.3935364782810211, - "learning_rate": 1.3879934001776876e-05, - "loss": 0.0609, - "step": 36165 - }, - { - "epoch": 0.918136819393324, - "grad_norm": 0.34021762013435364, - "learning_rate": 1.3879087870711174e-05, - "loss": 0.065, - "step": 36170 - }, - { - "epoch": 0.9182637390531794, - "grad_norm": 0.6391494870185852, - "learning_rate": 1.3878241739645472e-05, - "loss": 0.0715, - "step": 36175 - }, - { - "epoch": 0.9183906587130346, - "grad_norm": 0.49920326471328735, - "learning_rate": 1.387739560857977e-05, - "loss": 0.0502, - "step": 36180 - }, - { - "epoch": 0.9185175783728899, - "grad_norm": 0.647445797920227, - "learning_rate": 1.3876549477514068e-05, - "loss": 0.0607, - "step": 36185 - }, - { - "epoch": 0.9186444980327453, - "grad_norm": 0.6276041269302368, - "learning_rate": 1.3875703346448366e-05, - "loss": 0.0577, - "step": 36190 - }, - { - "epoch": 0.9187714176926006, - "grad_norm": 0.5600964426994324, - "learning_rate": 1.3874857215382664e-05, - "loss": 0.0789, - "step": 36195 - }, - { - "epoch": 0.9188983373524559, - "grad_norm": 0.9054215550422668, - "learning_rate": 1.3874011084316963e-05, - "loss": 0.0801, - "step": 36200 - }, - { - "epoch": 0.9190252570123112, - "grad_norm": 0.7239906787872314, - "learning_rate": 1.387316495325126e-05, - "loss": 0.0643, - "step": 36205 - }, - { - "epoch": 0.9191521766721665, - "grad_norm": 0.5962494015693665, - "learning_rate": 1.3872318822185558e-05, - "loss": 0.0642, - "step": 36210 - }, - { - "epoch": 0.9192790963320219, - "grad_norm": 0.6681636571884155, - "learning_rate": 1.3871472691119856e-05, - "loss": 0.0616, - "step": 36215 - }, - { - "epoch": 0.9194060159918771, - "grad_norm": 0.4356440305709839, - "learning_rate": 1.3870626560054155e-05, - "loss": 0.0628, - "step": 36220 - }, - { - "epoch": 0.9195329356517324, - "grad_norm": 0.260852575302124, - "learning_rate": 1.3869780428988451e-05, - "loss": 0.0518, - "step": 36225 - }, - { - "epoch": 0.9196598553115878, - "grad_norm": 0.4649352431297302, - "learning_rate": 1.386893429792275e-05, - "loss": 0.0605, - "step": 36230 - }, - { - "epoch": 0.9197867749714431, - "grad_norm": 0.5760641694068909, - "learning_rate": 1.3868088166857048e-05, - "loss": 0.0752, - "step": 36235 - }, - { - "epoch": 0.9199136946312984, - "grad_norm": 0.4548545777797699, - "learning_rate": 1.3867242035791347e-05, - "loss": 0.0558, - "step": 36240 - }, - { - "epoch": 0.9200406142911537, - "grad_norm": 0.46886518597602844, - "learning_rate": 1.3866395904725643e-05, - "loss": 0.0636, - "step": 36245 - }, - { - "epoch": 0.920167533951009, - "grad_norm": 0.5474319458007812, - "learning_rate": 1.3865549773659942e-05, - "loss": 0.0712, - "step": 36250 - }, - { - "epoch": 0.9202944536108644, - "grad_norm": 0.6587515473365784, - "learning_rate": 1.386470364259424e-05, - "loss": 0.062, - "step": 36255 - }, - { - "epoch": 0.9204213732707196, - "grad_norm": 0.6892610788345337, - "learning_rate": 1.3863857511528538e-05, - "loss": 0.0573, - "step": 36260 - }, - { - "epoch": 0.9205482929305749, - "grad_norm": 0.6897662878036499, - "learning_rate": 1.3863011380462833e-05, - "loss": 0.0484, - "step": 36265 - }, - { - "epoch": 0.9206752125904303, - "grad_norm": 1.3797804117202759, - "learning_rate": 1.3862165249397132e-05, - "loss": 0.0461, - "step": 36270 - }, - { - "epoch": 0.9208021322502856, - "grad_norm": 1.2776718139648438, - "learning_rate": 1.386131911833143e-05, - "loss": 0.0674, - "step": 36275 - }, - { - "epoch": 0.9209290519101409, - "grad_norm": 0.3773519694805145, - "learning_rate": 1.386047298726573e-05, - "loss": 0.0633, - "step": 36280 - }, - { - "epoch": 0.9210559715699962, - "grad_norm": 0.49148136377334595, - "learning_rate": 1.3859626856200025e-05, - "loss": 0.0447, - "step": 36285 - }, - { - "epoch": 0.9211828912298515, - "grad_norm": 0.49123328924179077, - "learning_rate": 1.3858780725134324e-05, - "loss": 0.0583, - "step": 36290 - }, - { - "epoch": 0.9213098108897069, - "grad_norm": 0.5093609690666199, - "learning_rate": 1.3857934594068622e-05, - "loss": 0.0681, - "step": 36295 - }, - { - "epoch": 0.9214367305495621, - "grad_norm": 0.5251277685165405, - "learning_rate": 1.385708846300292e-05, - "loss": 0.0846, - "step": 36300 - }, - { - "epoch": 0.9215636502094174, - "grad_norm": 0.6592527627944946, - "learning_rate": 1.3856242331937217e-05, - "loss": 0.0711, - "step": 36305 - }, - { - "epoch": 0.9216905698692728, - "grad_norm": 0.5530318021774292, - "learning_rate": 1.3855396200871516e-05, - "loss": 0.0744, - "step": 36310 - }, - { - "epoch": 0.921817489529128, - "grad_norm": 0.9193724393844604, - "learning_rate": 1.3854550069805814e-05, - "loss": 0.0846, - "step": 36315 - }, - { - "epoch": 0.9219444091889833, - "grad_norm": 0.42848291993141174, - "learning_rate": 1.3853703938740113e-05, - "loss": 0.0585, - "step": 36320 - }, - { - "epoch": 0.9220713288488387, - "grad_norm": 0.6342139840126038, - "learning_rate": 1.385285780767441e-05, - "loss": 0.0589, - "step": 36325 - }, - { - "epoch": 0.922198248508694, - "grad_norm": 0.48296335339546204, - "learning_rate": 1.3852011676608708e-05, - "loss": 0.069, - "step": 36330 - }, - { - "epoch": 0.9223251681685493, - "grad_norm": 0.6948003768920898, - "learning_rate": 1.3851165545543006e-05, - "loss": 0.0674, - "step": 36335 - }, - { - "epoch": 0.9224520878284046, - "grad_norm": 0.7495690584182739, - "learning_rate": 1.3850319414477304e-05, - "loss": 0.0725, - "step": 36340 - }, - { - "epoch": 0.9225790074882599, - "grad_norm": 0.43140697479248047, - "learning_rate": 1.3849473283411601e-05, - "loss": 0.0637, - "step": 36345 - }, - { - "epoch": 0.9227059271481153, - "grad_norm": 0.6889270544052124, - "learning_rate": 1.38486271523459e-05, - "loss": 0.0534, - "step": 36350 - }, - { - "epoch": 0.9228328468079705, - "grad_norm": 0.42890676856040955, - "learning_rate": 1.3847781021280198e-05, - "loss": 0.0675, - "step": 36355 - }, - { - "epoch": 0.9229597664678258, - "grad_norm": 0.371334969997406, - "learning_rate": 1.3846934890214496e-05, - "loss": 0.05, - "step": 36360 - }, - { - "epoch": 0.9230866861276812, - "grad_norm": 0.454735666513443, - "learning_rate": 1.3846088759148793e-05, - "loss": 0.0656, - "step": 36365 - }, - { - "epoch": 0.9232136057875365, - "grad_norm": 0.42244866490364075, - "learning_rate": 1.3845242628083091e-05, - "loss": 0.0605, - "step": 36370 - }, - { - "epoch": 0.9233405254473918, - "grad_norm": 0.496294766664505, - "learning_rate": 1.384439649701739e-05, - "loss": 0.0716, - "step": 36375 - }, - { - "epoch": 0.9234674451072471, - "grad_norm": 0.3395509123802185, - "learning_rate": 1.3843550365951688e-05, - "loss": 0.0458, - "step": 36380 - }, - { - "epoch": 0.9235943647671024, - "grad_norm": 0.3709258735179901, - "learning_rate": 1.3842704234885985e-05, - "loss": 0.055, - "step": 36385 - }, - { - "epoch": 0.9237212844269578, - "grad_norm": 0.5986008048057556, - "learning_rate": 1.3841858103820283e-05, - "loss": 0.0709, - "step": 36390 - }, - { - "epoch": 0.923848204086813, - "grad_norm": 0.5654592514038086, - "learning_rate": 1.3841011972754582e-05, - "loss": 0.0534, - "step": 36395 - }, - { - "epoch": 0.9239751237466683, - "grad_norm": 0.40770962834358215, - "learning_rate": 1.384016584168888e-05, - "loss": 0.0552, - "step": 36400 - }, - { - "epoch": 0.9241020434065237, - "grad_norm": 0.7224621772766113, - "learning_rate": 1.3839319710623175e-05, - "loss": 0.0527, - "step": 36405 - }, - { - "epoch": 0.924228963066379, - "grad_norm": 0.40630874037742615, - "learning_rate": 1.3838473579557474e-05, - "loss": 0.0502, - "step": 36410 - }, - { - "epoch": 0.9243558827262343, - "grad_norm": 0.5154129862785339, - "learning_rate": 1.3837627448491772e-05, - "loss": 0.0649, - "step": 36415 - }, - { - "epoch": 0.9244828023860896, - "grad_norm": 1.0294181108474731, - "learning_rate": 1.383678131742607e-05, - "loss": 0.0496, - "step": 36420 - }, - { - "epoch": 0.9246097220459449, - "grad_norm": 0.5735950469970703, - "learning_rate": 1.3835935186360367e-05, - "loss": 0.0599, - "step": 36425 - }, - { - "epoch": 0.9247366417058003, - "grad_norm": 0.5174784660339355, - "learning_rate": 1.3835089055294666e-05, - "loss": 0.0542, - "step": 36430 - }, - { - "epoch": 0.9248635613656555, - "grad_norm": 0.722710371017456, - "learning_rate": 1.3834242924228964e-05, - "loss": 0.0579, - "step": 36435 - }, - { - "epoch": 0.9249904810255108, - "grad_norm": 0.3589234948158264, - "learning_rate": 1.3833396793163262e-05, - "loss": 0.0722, - "step": 36440 - }, - { - "epoch": 0.9251174006853662, - "grad_norm": 0.3880384564399719, - "learning_rate": 1.3832550662097559e-05, - "loss": 0.0735, - "step": 36445 - }, - { - "epoch": 0.9252443203452215, - "grad_norm": 0.5457733273506165, - "learning_rate": 1.3831704531031857e-05, - "loss": 0.0866, - "step": 36450 - }, - { - "epoch": 0.9253712400050768, - "grad_norm": 0.2788640260696411, - "learning_rate": 1.3830858399966156e-05, - "loss": 0.0469, - "step": 36455 - }, - { - "epoch": 0.9254981596649321, - "grad_norm": 0.5886020064353943, - "learning_rate": 1.3830012268900454e-05, - "loss": 0.0582, - "step": 36460 - }, - { - "epoch": 0.9256250793247874, - "grad_norm": 0.6201922297477722, - "learning_rate": 1.3829166137834751e-05, - "loss": 0.0626, - "step": 36465 - }, - { - "epoch": 0.9257519989846428, - "grad_norm": 0.5611088275909424, - "learning_rate": 1.382832000676905e-05, - "loss": 0.0643, - "step": 36470 - }, - { - "epoch": 0.925878918644498, - "grad_norm": 0.4397813081741333, - "learning_rate": 1.3827473875703348e-05, - "loss": 0.0789, - "step": 36475 - }, - { - "epoch": 0.9260058383043533, - "grad_norm": 1.1714452505111694, - "learning_rate": 1.3826627744637646e-05, - "loss": 0.0577, - "step": 36480 - }, - { - "epoch": 0.9261327579642087, - "grad_norm": 0.8107494711875916, - "learning_rate": 1.3825781613571943e-05, - "loss": 0.0795, - "step": 36485 - }, - { - "epoch": 0.9262596776240639, - "grad_norm": 0.34678736329078674, - "learning_rate": 1.3824935482506241e-05, - "loss": 0.0541, - "step": 36490 - }, - { - "epoch": 0.9263865972839193, - "grad_norm": 0.5045878887176514, - "learning_rate": 1.382408935144054e-05, - "loss": 0.0482, - "step": 36495 - }, - { - "epoch": 0.9265135169437746, - "grad_norm": 0.5347180366516113, - "learning_rate": 1.3823243220374838e-05, - "loss": 0.0488, - "step": 36500 - }, - { - "epoch": 0.9266404366036299, - "grad_norm": 0.8627147078514099, - "learning_rate": 1.3822397089309135e-05, - "loss": 0.0499, - "step": 36505 - }, - { - "epoch": 0.9267673562634852, - "grad_norm": 0.47932150959968567, - "learning_rate": 1.3821550958243433e-05, - "loss": 0.0722, - "step": 36510 - }, - { - "epoch": 0.9268942759233405, - "grad_norm": 2.4508605003356934, - "learning_rate": 1.3820704827177732e-05, - "loss": 0.0676, - "step": 36515 - }, - { - "epoch": 0.9270211955831958, - "grad_norm": 0.49752897024154663, - "learning_rate": 1.381985869611203e-05, - "loss": 0.0729, - "step": 36520 - }, - { - "epoch": 0.9271481152430512, - "grad_norm": 0.49631425738334656, - "learning_rate": 1.3819012565046325e-05, - "loss": 0.0563, - "step": 36525 - }, - { - "epoch": 0.9272750349029064, - "grad_norm": 0.5465111136436462, - "learning_rate": 1.3818166433980625e-05, - "loss": 0.0664, - "step": 36530 - }, - { - "epoch": 0.9274019545627618, - "grad_norm": 0.5176756978034973, - "learning_rate": 1.3817320302914923e-05, - "loss": 0.0692, - "step": 36535 - }, - { - "epoch": 0.9275288742226171, - "grad_norm": 0.7343745231628418, - "learning_rate": 1.3816474171849222e-05, - "loss": 0.0608, - "step": 36540 - }, - { - "epoch": 0.9276557938824724, - "grad_norm": 0.5357832908630371, - "learning_rate": 1.3815628040783517e-05, - "loss": 0.0454, - "step": 36545 - }, - { - "epoch": 0.9277827135423277, - "grad_norm": 0.462773859500885, - "learning_rate": 1.3814781909717815e-05, - "loss": 0.0588, - "step": 36550 - }, - { - "epoch": 0.927909633202183, - "grad_norm": 0.4075582027435303, - "learning_rate": 1.3813935778652114e-05, - "loss": 0.0648, - "step": 36555 - }, - { - "epoch": 0.9280365528620383, - "grad_norm": 0.5358396172523499, - "learning_rate": 1.3813089647586412e-05, - "loss": 0.0621, - "step": 36560 - }, - { - "epoch": 0.9281634725218937, - "grad_norm": 0.6563841104507446, - "learning_rate": 1.3812243516520709e-05, - "loss": 0.0754, - "step": 36565 - }, - { - "epoch": 0.9282903921817489, - "grad_norm": 0.43283611536026, - "learning_rate": 1.3811397385455007e-05, - "loss": 0.0342, - "step": 36570 - }, - { - "epoch": 0.9284173118416043, - "grad_norm": 0.35099270939826965, - "learning_rate": 1.3810551254389306e-05, - "loss": 0.0457, - "step": 36575 - }, - { - "epoch": 0.9285442315014596, - "grad_norm": 0.7136268615722656, - "learning_rate": 1.3809705123323604e-05, - "loss": 0.0732, - "step": 36580 - }, - { - "epoch": 0.9286711511613149, - "grad_norm": 0.5641516447067261, - "learning_rate": 1.38088589922579e-05, - "loss": 0.0627, - "step": 36585 - }, - { - "epoch": 0.9287980708211702, - "grad_norm": 0.5169939398765564, - "learning_rate": 1.3808012861192199e-05, - "loss": 0.0565, - "step": 36590 - }, - { - "epoch": 0.9289249904810255, - "grad_norm": 0.4219919741153717, - "learning_rate": 1.3807166730126498e-05, - "loss": 0.068, - "step": 36595 - }, - { - "epoch": 0.9290519101408808, - "grad_norm": 0.5339454412460327, - "learning_rate": 1.3806320599060796e-05, - "loss": 0.0563, - "step": 36600 - }, - { - "epoch": 0.9291788298007362, - "grad_norm": 0.519254207611084, - "learning_rate": 1.3805474467995093e-05, - "loss": 0.0704, - "step": 36605 - }, - { - "epoch": 0.9293057494605914, - "grad_norm": 0.5946153402328491, - "learning_rate": 1.3804628336929391e-05, - "loss": 0.0775, - "step": 36610 - }, - { - "epoch": 0.9294326691204468, - "grad_norm": 0.7079075574874878, - "learning_rate": 1.380378220586369e-05, - "loss": 0.0673, - "step": 36615 - }, - { - "epoch": 0.9295595887803021, - "grad_norm": 0.3021582365036011, - "learning_rate": 1.3802936074797988e-05, - "loss": 0.0521, - "step": 36620 - }, - { - "epoch": 0.9296865084401574, - "grad_norm": 0.6292435526847839, - "learning_rate": 1.3802089943732285e-05, - "loss": 0.0666, - "step": 36625 - }, - { - "epoch": 0.9298134281000127, - "grad_norm": 0.4779410660266876, - "learning_rate": 1.3801243812666583e-05, - "loss": 0.0682, - "step": 36630 - }, - { - "epoch": 0.929940347759868, - "grad_norm": 0.5250445008277893, - "learning_rate": 1.3800397681600881e-05, - "loss": 0.0919, - "step": 36635 - }, - { - "epoch": 0.9300672674197233, - "grad_norm": 0.36310824751853943, - "learning_rate": 1.379955155053518e-05, - "loss": 0.0557, - "step": 36640 - }, - { - "epoch": 0.9301941870795786, - "grad_norm": 0.6117006540298462, - "learning_rate": 1.3798705419469478e-05, - "loss": 0.0704, - "step": 36645 - }, - { - "epoch": 0.9303211067394339, - "grad_norm": 0.5549901127815247, - "learning_rate": 1.3797859288403775e-05, - "loss": 0.0716, - "step": 36650 - }, - { - "epoch": 0.9304480263992893, - "grad_norm": 0.6600590348243713, - "learning_rate": 1.3797013157338073e-05, - "loss": 0.0773, - "step": 36655 - }, - { - "epoch": 0.9305749460591446, - "grad_norm": 0.518208384513855, - "learning_rate": 1.3796167026272372e-05, - "loss": 0.0476, - "step": 36660 - }, - { - "epoch": 0.9307018657189998, - "grad_norm": 0.4433552324771881, - "learning_rate": 1.379532089520667e-05, - "loss": 0.0529, - "step": 36665 - }, - { - "epoch": 0.9308287853788552, - "grad_norm": 0.3991177976131439, - "learning_rate": 1.3794474764140967e-05, - "loss": 0.0672, - "step": 36670 - }, - { - "epoch": 0.9309557050387105, - "grad_norm": 0.5047779083251953, - "learning_rate": 1.3793628633075265e-05, - "loss": 0.0624, - "step": 36675 - }, - { - "epoch": 0.9310826246985658, - "grad_norm": 0.4567528963088989, - "learning_rate": 1.3792782502009564e-05, - "loss": 0.0699, - "step": 36680 - }, - { - "epoch": 0.9312095443584211, - "grad_norm": 0.7237064242362976, - "learning_rate": 1.3791936370943862e-05, - "loss": 0.0503, - "step": 36685 - }, - { - "epoch": 0.9313364640182764, - "grad_norm": 0.6126700043678284, - "learning_rate": 1.3791090239878157e-05, - "loss": 0.0708, - "step": 36690 - }, - { - "epoch": 0.9314633836781318, - "grad_norm": 0.6921812295913696, - "learning_rate": 1.3790244108812455e-05, - "loss": 0.0822, - "step": 36695 - }, - { - "epoch": 0.9315903033379871, - "grad_norm": 0.7884401082992554, - "learning_rate": 1.3789397977746754e-05, - "loss": 0.0451, - "step": 36700 - }, - { - "epoch": 0.9317172229978423, - "grad_norm": 0.4595678448677063, - "learning_rate": 1.3788551846681054e-05, - "loss": 0.057, - "step": 36705 - }, - { - "epoch": 0.9318441426576977, - "grad_norm": 0.44599857926368713, - "learning_rate": 1.3787705715615349e-05, - "loss": 0.0618, - "step": 36710 - }, - { - "epoch": 0.931971062317553, - "grad_norm": 0.4254589080810547, - "learning_rate": 1.3786859584549647e-05, - "loss": 0.0612, - "step": 36715 - }, - { - "epoch": 0.9320979819774083, - "grad_norm": 0.41318410634994507, - "learning_rate": 1.3786013453483946e-05, - "loss": 0.0663, - "step": 36720 - }, - { - "epoch": 0.9322249016372636, - "grad_norm": 0.3480411469936371, - "learning_rate": 1.3785167322418244e-05, - "loss": 0.0733, - "step": 36725 - }, - { - "epoch": 0.9323518212971189, - "grad_norm": 0.4997783601284027, - "learning_rate": 1.378432119135254e-05, - "loss": 0.0602, - "step": 36730 - }, - { - "epoch": 0.9324787409569743, - "grad_norm": 0.6273012757301331, - "learning_rate": 1.378347506028684e-05, - "loss": 0.0713, - "step": 36735 - }, - { - "epoch": 0.9326056606168296, - "grad_norm": 0.35176587104797363, - "learning_rate": 1.3782628929221138e-05, - "loss": 0.0588, - "step": 36740 - }, - { - "epoch": 0.9327325802766848, - "grad_norm": 0.576381266117096, - "learning_rate": 1.3781782798155436e-05, - "loss": 0.0626, - "step": 36745 - }, - { - "epoch": 0.9328594999365402, - "grad_norm": 0.42088979482650757, - "learning_rate": 1.3780936667089733e-05, - "loss": 0.0835, - "step": 36750 - }, - { - "epoch": 0.9329864195963955, - "grad_norm": 0.4276842176914215, - "learning_rate": 1.3780090536024031e-05, - "loss": 0.0423, - "step": 36755 - }, - { - "epoch": 0.9331133392562508, - "grad_norm": 0.5335595011711121, - "learning_rate": 1.377924440495833e-05, - "loss": 0.0826, - "step": 36760 - }, - { - "epoch": 0.9332402589161061, - "grad_norm": 1.2552722692489624, - "learning_rate": 1.3778398273892628e-05, - "loss": 0.0735, - "step": 36765 - }, - { - "epoch": 0.9333671785759614, - "grad_norm": 0.5412977337837219, - "learning_rate": 1.3777552142826925e-05, - "loss": 0.0565, - "step": 36770 - }, - { - "epoch": 0.9334940982358167, - "grad_norm": 0.33290043473243713, - "learning_rate": 1.3776706011761223e-05, - "loss": 0.0672, - "step": 36775 - }, - { - "epoch": 0.9336210178956721, - "grad_norm": 0.40973201394081116, - "learning_rate": 1.3775859880695521e-05, - "loss": 0.057, - "step": 36780 - }, - { - "epoch": 0.9337479375555273, - "grad_norm": 0.5217160582542419, - "learning_rate": 1.377501374962982e-05, - "loss": 0.074, - "step": 36785 - }, - { - "epoch": 0.9338748572153827, - "grad_norm": 0.49457743763923645, - "learning_rate": 1.3774167618564117e-05, - "loss": 0.0608, - "step": 36790 - }, - { - "epoch": 0.934001776875238, - "grad_norm": 0.5443389415740967, - "learning_rate": 1.3773321487498415e-05, - "loss": 0.0417, - "step": 36795 - }, - { - "epoch": 0.9341286965350933, - "grad_norm": 0.5122794508934021, - "learning_rate": 1.3772475356432713e-05, - "loss": 0.0724, - "step": 36800 - }, - { - "epoch": 0.9342556161949486, - "grad_norm": 0.4392770826816559, - "learning_rate": 1.3771629225367012e-05, - "loss": 0.0575, - "step": 36805 - }, - { - "epoch": 0.9343825358548039, - "grad_norm": 0.9085880517959595, - "learning_rate": 1.3770783094301308e-05, - "loss": 0.0549, - "step": 36810 - }, - { - "epoch": 0.9345094555146592, - "grad_norm": 0.3699207305908203, - "learning_rate": 1.3769936963235607e-05, - "loss": 0.0653, - "step": 36815 - }, - { - "epoch": 0.9346363751745145, - "grad_norm": 0.7421435117721558, - "learning_rate": 1.3769090832169905e-05, - "loss": 0.0675, - "step": 36820 - }, - { - "epoch": 0.9347632948343698, - "grad_norm": 0.47276434302330017, - "learning_rate": 1.3768244701104204e-05, - "loss": 0.0686, - "step": 36825 - }, - { - "epoch": 0.9348902144942252, - "grad_norm": 0.4199857711791992, - "learning_rate": 1.3767398570038499e-05, - "loss": 0.0441, - "step": 36830 - }, - { - "epoch": 0.9350171341540805, - "grad_norm": 0.4984279274940491, - "learning_rate": 1.3766552438972797e-05, - "loss": 0.0566, - "step": 36835 - }, - { - "epoch": 0.9351440538139357, - "grad_norm": 0.48863476514816284, - "learning_rate": 1.3765706307907096e-05, - "loss": 0.0615, - "step": 36840 - }, - { - "epoch": 0.9352709734737911, - "grad_norm": 0.39259073138237, - "learning_rate": 1.3764860176841394e-05, - "loss": 0.0676, - "step": 36845 - }, - { - "epoch": 0.9353978931336464, - "grad_norm": 0.4266221523284912, - "learning_rate": 1.376401404577569e-05, - "loss": 0.0584, - "step": 36850 - }, - { - "epoch": 0.9355248127935017, - "grad_norm": 0.6708065271377563, - "learning_rate": 1.3763167914709989e-05, - "loss": 0.0687, - "step": 36855 - }, - { - "epoch": 0.935651732453357, - "grad_norm": 1.479171633720398, - "learning_rate": 1.3762321783644287e-05, - "loss": 0.0797, - "step": 36860 - }, - { - "epoch": 0.9357786521132123, - "grad_norm": 0.5453132390975952, - "learning_rate": 1.3761475652578586e-05, - "loss": 0.08, - "step": 36865 - }, - { - "epoch": 0.9359055717730677, - "grad_norm": 0.4146172106266022, - "learning_rate": 1.3760629521512883e-05, - "loss": 0.045, - "step": 36870 - }, - { - "epoch": 0.936032491432923, - "grad_norm": 0.5339311361312866, - "learning_rate": 1.3759783390447181e-05, - "loss": 0.0528, - "step": 36875 - }, - { - "epoch": 0.9361594110927782, - "grad_norm": 0.7384214997291565, - "learning_rate": 1.375893725938148e-05, - "loss": 0.0533, - "step": 36880 - }, - { - "epoch": 0.9362863307526336, - "grad_norm": 0.5810449719429016, - "learning_rate": 1.3758091128315778e-05, - "loss": 0.0713, - "step": 36885 - }, - { - "epoch": 0.9364132504124889, - "grad_norm": 0.6183148622512817, - "learning_rate": 1.3757244997250074e-05, - "loss": 0.085, - "step": 36890 - }, - { - "epoch": 0.9365401700723442, - "grad_norm": 0.5589436888694763, - "learning_rate": 1.3756398866184373e-05, - "loss": 0.077, - "step": 36895 - }, - { - "epoch": 0.9366670897321995, - "grad_norm": 0.5076503157615662, - "learning_rate": 1.3755552735118671e-05, - "loss": 0.0709, - "step": 36900 - }, - { - "epoch": 0.9367940093920548, - "grad_norm": 0.34880390763282776, - "learning_rate": 1.375470660405297e-05, - "loss": 0.0728, - "step": 36905 - }, - { - "epoch": 0.9369209290519102, - "grad_norm": 0.4070088863372803, - "learning_rate": 1.3753860472987266e-05, - "loss": 0.0436, - "step": 36910 - }, - { - "epoch": 0.9370478487117655, - "grad_norm": 0.38747870922088623, - "learning_rate": 1.3753014341921565e-05, - "loss": 0.0652, - "step": 36915 - }, - { - "epoch": 0.9371747683716207, - "grad_norm": 0.6245269179344177, - "learning_rate": 1.3752168210855863e-05, - "loss": 0.0555, - "step": 36920 - }, - { - "epoch": 0.9373016880314761, - "grad_norm": 0.4476991593837738, - "learning_rate": 1.3751322079790162e-05, - "loss": 0.0532, - "step": 36925 - }, - { - "epoch": 0.9374286076913314, - "grad_norm": 0.7840293049812317, - "learning_rate": 1.3750475948724458e-05, - "loss": 0.0704, - "step": 36930 - }, - { - "epoch": 0.9375555273511867, - "grad_norm": 0.43649643659591675, - "learning_rate": 1.3749629817658757e-05, - "loss": 0.0664, - "step": 36935 - }, - { - "epoch": 0.937682447011042, - "grad_norm": 0.6217731833457947, - "learning_rate": 1.3748783686593055e-05, - "loss": 0.0777, - "step": 36940 - }, - { - "epoch": 0.9378093666708973, - "grad_norm": 0.25209787487983704, - "learning_rate": 1.3747937555527353e-05, - "loss": 0.0431, - "step": 36945 - }, - { - "epoch": 0.9379362863307527, - "grad_norm": 0.4612734317779541, - "learning_rate": 1.374709142446165e-05, - "loss": 0.049, - "step": 36950 - }, - { - "epoch": 0.938063205990608, - "grad_norm": 0.5279253125190735, - "learning_rate": 1.3746245293395949e-05, - "loss": 0.0614, - "step": 36955 - }, - { - "epoch": 0.9381901256504632, - "grad_norm": 0.5093502998352051, - "learning_rate": 1.3745399162330247e-05, - "loss": 0.0598, - "step": 36960 - }, - { - "epoch": 0.9383170453103186, - "grad_norm": 0.3517601191997528, - "learning_rate": 1.3744553031264545e-05, - "loss": 0.0614, - "step": 36965 - }, - { - "epoch": 0.9384439649701739, - "grad_norm": 0.4084548056125641, - "learning_rate": 1.374370690019884e-05, - "loss": 0.0759, - "step": 36970 - }, - { - "epoch": 0.9385708846300292, - "grad_norm": 0.5142797231674194, - "learning_rate": 1.3742860769133139e-05, - "loss": 0.0471, - "step": 36975 - }, - { - "epoch": 0.9386978042898845, - "grad_norm": 0.32741260528564453, - "learning_rate": 1.3742014638067437e-05, - "loss": 0.0508, - "step": 36980 - }, - { - "epoch": 0.9388247239497398, - "grad_norm": 0.5469199419021606, - "learning_rate": 1.3741168507001736e-05, - "loss": 0.0575, - "step": 36985 - }, - { - "epoch": 0.9389516436095952, - "grad_norm": 0.544802188873291, - "learning_rate": 1.3740322375936032e-05, - "loss": 0.0568, - "step": 36990 - }, - { - "epoch": 0.9390785632694504, - "grad_norm": 0.557260274887085, - "learning_rate": 1.373947624487033e-05, - "loss": 0.0616, - "step": 36995 - }, - { - "epoch": 0.9392054829293057, - "grad_norm": 0.30689582228660583, - "learning_rate": 1.3738630113804629e-05, - "loss": 0.0535, - "step": 37000 - }, - { - "epoch": 0.9393324025891611, - "grad_norm": 0.4370027780532837, - "learning_rate": 1.3737783982738928e-05, - "loss": 0.0575, - "step": 37005 - }, - { - "epoch": 0.9394593222490164, - "grad_norm": 0.5320820212364197, - "learning_rate": 1.3736937851673224e-05, - "loss": 0.0767, - "step": 37010 - }, - { - "epoch": 0.9395862419088716, - "grad_norm": 0.5741347074508667, - "learning_rate": 1.3736091720607523e-05, - "loss": 0.0632, - "step": 37015 - }, - { - "epoch": 0.939713161568727, - "grad_norm": 0.9295310378074646, - "learning_rate": 1.3735245589541821e-05, - "loss": 0.0593, - "step": 37020 - }, - { - "epoch": 0.9398400812285823, - "grad_norm": 0.6261506676673889, - "learning_rate": 1.373439945847612e-05, - "loss": 0.0627, - "step": 37025 - }, - { - "epoch": 0.9399670008884377, - "grad_norm": 0.4047774374485016, - "learning_rate": 1.3733553327410416e-05, - "loss": 0.063, - "step": 37030 - }, - { - "epoch": 0.9400939205482929, - "grad_norm": 1.0955839157104492, - "learning_rate": 1.3732707196344715e-05, - "loss": 0.0465, - "step": 37035 - }, - { - "epoch": 0.9402208402081482, - "grad_norm": 0.49933353066444397, - "learning_rate": 1.3731861065279013e-05, - "loss": 0.0538, - "step": 37040 - }, - { - "epoch": 0.9403477598680036, - "grad_norm": 0.5039911866188049, - "learning_rate": 1.3731014934213311e-05, - "loss": 0.0637, - "step": 37045 - }, - { - "epoch": 0.9404746795278589, - "grad_norm": 0.4628029465675354, - "learning_rate": 1.3730168803147608e-05, - "loss": 0.0638, - "step": 37050 - }, - { - "epoch": 0.9406015991877141, - "grad_norm": 0.5315268635749817, - "learning_rate": 1.3729322672081906e-05, - "loss": 0.0599, - "step": 37055 - }, - { - "epoch": 0.9407285188475695, - "grad_norm": 0.4044140577316284, - "learning_rate": 1.3728476541016205e-05, - "loss": 0.0491, - "step": 37060 - }, - { - "epoch": 0.9408554385074248, - "grad_norm": 0.405211478471756, - "learning_rate": 1.3727630409950503e-05, - "loss": 0.0522, - "step": 37065 - }, - { - "epoch": 0.9409823581672802, - "grad_norm": 0.42339739203453064, - "learning_rate": 1.37267842788848e-05, - "loss": 0.0483, - "step": 37070 - }, - { - "epoch": 0.9411092778271354, - "grad_norm": 0.3462311327457428, - "learning_rate": 1.3725938147819098e-05, - "loss": 0.0936, - "step": 37075 - }, - { - "epoch": 0.9412361974869907, - "grad_norm": 0.5220473408699036, - "learning_rate": 1.3725092016753397e-05, - "loss": 0.0622, - "step": 37080 - }, - { - "epoch": 0.9413631171468461, - "grad_norm": 0.6707574129104614, - "learning_rate": 1.3724245885687695e-05, - "loss": 0.0496, - "step": 37085 - }, - { - "epoch": 0.9414900368067014, - "grad_norm": 0.523579478263855, - "learning_rate": 1.372339975462199e-05, - "loss": 0.0639, - "step": 37090 - }, - { - "epoch": 0.9416169564665566, - "grad_norm": 0.6312288045883179, - "learning_rate": 1.372255362355629e-05, - "loss": 0.0672, - "step": 37095 - }, - { - "epoch": 0.941743876126412, - "grad_norm": 0.7084813117980957, - "learning_rate": 1.3721707492490589e-05, - "loss": 0.08, - "step": 37100 - }, - { - "epoch": 0.9418707957862673, - "grad_norm": 0.42599087953567505, - "learning_rate": 1.3720861361424887e-05, - "loss": 0.0417, - "step": 37105 - }, - { - "epoch": 0.9419977154461227, - "grad_norm": 0.5907847285270691, - "learning_rate": 1.3720015230359182e-05, - "loss": 0.0629, - "step": 37110 - }, - { - "epoch": 0.9421246351059779, - "grad_norm": 0.4537334740161896, - "learning_rate": 1.371916909929348e-05, - "loss": 0.0528, - "step": 37115 - }, - { - "epoch": 0.9422515547658332, - "grad_norm": 0.4782498776912689, - "learning_rate": 1.3718322968227779e-05, - "loss": 0.0556, - "step": 37120 - }, - { - "epoch": 0.9423784744256886, - "grad_norm": 0.7373412847518921, - "learning_rate": 1.3717476837162077e-05, - "loss": 0.0586, - "step": 37125 - }, - { - "epoch": 0.9425053940855439, - "grad_norm": 0.4065229594707489, - "learning_rate": 1.3716630706096374e-05, - "loss": 0.0438, - "step": 37130 - }, - { - "epoch": 0.9426323137453991, - "grad_norm": 0.3608575165271759, - "learning_rate": 1.3715784575030672e-05, - "loss": 0.0674, - "step": 37135 - }, - { - "epoch": 0.9427592334052545, - "grad_norm": 0.2911969721317291, - "learning_rate": 1.371493844396497e-05, - "loss": 0.0493, - "step": 37140 - }, - { - "epoch": 0.9428861530651098, - "grad_norm": 0.49014100432395935, - "learning_rate": 1.371409231289927e-05, - "loss": 0.0739, - "step": 37145 - }, - { - "epoch": 0.9430130727249652, - "grad_norm": 0.51686692237854, - "learning_rate": 1.3713246181833566e-05, - "loss": 0.0587, - "step": 37150 - }, - { - "epoch": 0.9431399923848204, - "grad_norm": 0.5115757584571838, - "learning_rate": 1.3712400050767864e-05, - "loss": 0.0427, - "step": 37155 - }, - { - "epoch": 0.9432669120446757, - "grad_norm": 0.425749808549881, - "learning_rate": 1.3711553919702163e-05, - "loss": 0.0655, - "step": 37160 - }, - { - "epoch": 0.9433938317045311, - "grad_norm": 0.6688129305839539, - "learning_rate": 1.3710707788636461e-05, - "loss": 0.0607, - "step": 37165 - }, - { - "epoch": 0.9435207513643863, - "grad_norm": 0.48580509424209595, - "learning_rate": 1.370986165757076e-05, - "loss": 0.065, - "step": 37170 - }, - { - "epoch": 0.9436476710242416, - "grad_norm": 0.6403408646583557, - "learning_rate": 1.3709015526505056e-05, - "loss": 0.0778, - "step": 37175 - }, - { - "epoch": 0.943774590684097, - "grad_norm": 0.5566293001174927, - "learning_rate": 1.3708169395439355e-05, - "loss": 0.0663, - "step": 37180 - }, - { - "epoch": 0.9439015103439523, - "grad_norm": 0.41051942110061646, - "learning_rate": 1.3707323264373653e-05, - "loss": 0.0599, - "step": 37185 - }, - { - "epoch": 0.9440284300038075, - "grad_norm": 0.4368005096912384, - "learning_rate": 1.3706477133307951e-05, - "loss": 0.0702, - "step": 37190 - }, - { - "epoch": 0.9441553496636629, - "grad_norm": 0.5056063532829285, - "learning_rate": 1.3705631002242248e-05, - "loss": 0.0646, - "step": 37195 - }, - { - "epoch": 0.9442822693235182, - "grad_norm": 0.6388033628463745, - "learning_rate": 1.3704784871176547e-05, - "loss": 0.0653, - "step": 37200 - }, - { - "epoch": 0.9444091889833736, - "grad_norm": 0.6296546459197998, - "learning_rate": 1.3703938740110845e-05, - "loss": 0.0636, - "step": 37205 - }, - { - "epoch": 0.9445361086432288, - "grad_norm": 0.4812244474887848, - "learning_rate": 1.3703092609045143e-05, - "loss": 0.0419, - "step": 37210 - }, - { - "epoch": 0.9446630283030841, - "grad_norm": 0.6352214813232422, - "learning_rate": 1.370224647797944e-05, - "loss": 0.056, - "step": 37215 - }, - { - "epoch": 0.9447899479629395, - "grad_norm": 0.3237013518810272, - "learning_rate": 1.3701400346913738e-05, - "loss": 0.0475, - "step": 37220 - }, - { - "epoch": 0.9449168676227948, - "grad_norm": 0.3389245569705963, - "learning_rate": 1.3700554215848037e-05, - "loss": 0.0472, - "step": 37225 - }, - { - "epoch": 0.94504378728265, - "grad_norm": 0.3472314476966858, - "learning_rate": 1.3699708084782335e-05, - "loss": 0.0771, - "step": 37230 - }, - { - "epoch": 0.9451707069425054, - "grad_norm": 0.3507210910320282, - "learning_rate": 1.3698861953716632e-05, - "loss": 0.0612, - "step": 37235 - }, - { - "epoch": 0.9452976266023607, - "grad_norm": 0.36962080001831055, - "learning_rate": 1.369801582265093e-05, - "loss": 0.0589, - "step": 37240 - }, - { - "epoch": 0.9454245462622161, - "grad_norm": 0.6204604506492615, - "learning_rate": 1.3697169691585229e-05, - "loss": 0.0743, - "step": 37245 - }, - { - "epoch": 0.9455514659220713, - "grad_norm": 0.4680357873439789, - "learning_rate": 1.3696323560519527e-05, - "loss": 0.064, - "step": 37250 - }, - { - "epoch": 0.9456783855819266, - "grad_norm": 0.43132248520851135, - "learning_rate": 1.3695477429453822e-05, - "loss": 0.0745, - "step": 37255 - }, - { - "epoch": 0.945805305241782, - "grad_norm": 0.5815799832344055, - "learning_rate": 1.369463129838812e-05, - "loss": 0.0645, - "step": 37260 - }, - { - "epoch": 0.9459322249016373, - "grad_norm": 0.6624397039413452, - "learning_rate": 1.3693785167322419e-05, - "loss": 0.0429, - "step": 37265 - }, - { - "epoch": 0.9460591445614925, - "grad_norm": 0.5415029525756836, - "learning_rate": 1.3692939036256719e-05, - "loss": 0.0525, - "step": 37270 - }, - { - "epoch": 0.9461860642213479, - "grad_norm": 0.43924635648727417, - "learning_rate": 1.3692092905191014e-05, - "loss": 0.0517, - "step": 37275 - }, - { - "epoch": 0.9463129838812032, - "grad_norm": 0.8491019010543823, - "learning_rate": 1.3691246774125313e-05, - "loss": 0.0556, - "step": 37280 - }, - { - "epoch": 0.9464399035410586, - "grad_norm": 0.7150440812110901, - "learning_rate": 1.3690400643059611e-05, - "loss": 0.0712, - "step": 37285 - }, - { - "epoch": 0.9465668232009138, - "grad_norm": 0.2744773328304291, - "learning_rate": 1.368955451199391e-05, - "loss": 0.0509, - "step": 37290 - }, - { - "epoch": 0.9466937428607691, - "grad_norm": 0.4447001814842224, - "learning_rate": 1.3688708380928206e-05, - "loss": 0.0622, - "step": 37295 - }, - { - "epoch": 0.9468206625206245, - "grad_norm": 0.5713723301887512, - "learning_rate": 1.3687862249862504e-05, - "loss": 0.073, - "step": 37300 - }, - { - "epoch": 0.9469475821804798, - "grad_norm": 0.6108819246292114, - "learning_rate": 1.3687016118796803e-05, - "loss": 0.0651, - "step": 37305 - }, - { - "epoch": 0.947074501840335, - "grad_norm": 0.45929044485092163, - "learning_rate": 1.3686169987731101e-05, - "loss": 0.0522, - "step": 37310 - }, - { - "epoch": 0.9472014215001904, - "grad_norm": 0.6783097982406616, - "learning_rate": 1.3685323856665398e-05, - "loss": 0.0601, - "step": 37315 - }, - { - "epoch": 0.9473283411600457, - "grad_norm": 0.6391616463661194, - "learning_rate": 1.3684477725599696e-05, - "loss": 0.0572, - "step": 37320 - }, - { - "epoch": 0.9474552608199011, - "grad_norm": 0.6235865354537964, - "learning_rate": 1.3683631594533995e-05, - "loss": 0.0804, - "step": 37325 - }, - { - "epoch": 0.9475821804797563, - "grad_norm": 0.5140718817710876, - "learning_rate": 1.3682785463468293e-05, - "loss": 0.0681, - "step": 37330 - }, - { - "epoch": 0.9477091001396116, - "grad_norm": 0.5179203748703003, - "learning_rate": 1.368193933240259e-05, - "loss": 0.051, - "step": 37335 - }, - { - "epoch": 0.947836019799467, - "grad_norm": 0.5696921348571777, - "learning_rate": 1.3681093201336888e-05, - "loss": 0.0367, - "step": 37340 - }, - { - "epoch": 0.9479629394593222, - "grad_norm": 0.3894122242927551, - "learning_rate": 1.3680247070271187e-05, - "loss": 0.0686, - "step": 37345 - }, - { - "epoch": 0.9480898591191775, - "grad_norm": 0.49598369002342224, - "learning_rate": 1.3679400939205485e-05, - "loss": 0.0882, - "step": 37350 - }, - { - "epoch": 0.9482167787790329, - "grad_norm": 0.50491863489151, - "learning_rate": 1.3678554808139782e-05, - "loss": 0.0564, - "step": 37355 - }, - { - "epoch": 0.9483436984388882, - "grad_norm": 1.7531112432479858, - "learning_rate": 1.367770867707408e-05, - "loss": 0.0648, - "step": 37360 - }, - { - "epoch": 0.9484706180987434, - "grad_norm": 0.47489508986473083, - "learning_rate": 1.3676862546008379e-05, - "loss": 0.0438, - "step": 37365 - }, - { - "epoch": 0.9485975377585988, - "grad_norm": 0.6154206395149231, - "learning_rate": 1.3676016414942677e-05, - "loss": 0.0648, - "step": 37370 - }, - { - "epoch": 0.9487244574184541, - "grad_norm": 0.6262681484222412, - "learning_rate": 1.3675170283876974e-05, - "loss": 0.0762, - "step": 37375 - }, - { - "epoch": 0.9488513770783095, - "grad_norm": 0.37223103642463684, - "learning_rate": 1.3674324152811272e-05, - "loss": 0.0809, - "step": 37380 - }, - { - "epoch": 0.9489782967381647, - "grad_norm": 0.4056383967399597, - "learning_rate": 1.367347802174557e-05, - "loss": 0.0581, - "step": 37385 - }, - { - "epoch": 0.94910521639802, - "grad_norm": 0.4161015748977661, - "learning_rate": 1.3672631890679869e-05, - "loss": 0.0505, - "step": 37390 - }, - { - "epoch": 0.9492321360578754, - "grad_norm": 0.4613837003707886, - "learning_rate": 1.3671785759614164e-05, - "loss": 0.0624, - "step": 37395 - }, - { - "epoch": 0.9493590557177307, - "grad_norm": 0.6994709968566895, - "learning_rate": 1.3670939628548462e-05, - "loss": 0.0487, - "step": 37400 - }, - { - "epoch": 0.949485975377586, - "grad_norm": 0.41079404950141907, - "learning_rate": 1.367009349748276e-05, - "loss": 0.057, - "step": 37405 - }, - { - "epoch": 0.9496128950374413, - "grad_norm": 1.5310412645339966, - "learning_rate": 1.3669247366417059e-05, - "loss": 0.0709, - "step": 37410 - }, - { - "epoch": 0.9497398146972966, - "grad_norm": 0.5369924902915955, - "learning_rate": 1.3668401235351356e-05, - "loss": 0.0673, - "step": 37415 - }, - { - "epoch": 0.949866734357152, - "grad_norm": 0.5612553358078003, - "learning_rate": 1.3667555104285654e-05, - "loss": 0.0625, - "step": 37420 - }, - { - "epoch": 0.9499936540170072, - "grad_norm": 0.6397160291671753, - "learning_rate": 1.3666708973219953e-05, - "loss": 0.0542, - "step": 37425 - }, - { - "epoch": 0.9501205736768625, - "grad_norm": 0.4132636785507202, - "learning_rate": 1.3665862842154251e-05, - "loss": 0.0594, - "step": 37430 - }, - { - "epoch": 0.9502474933367179, - "grad_norm": 0.5150110125541687, - "learning_rate": 1.3665016711088548e-05, - "loss": 0.0743, - "step": 37435 - }, - { - "epoch": 0.9503744129965732, - "grad_norm": 0.4749082624912262, - "learning_rate": 1.3664170580022846e-05, - "loss": 0.0462, - "step": 37440 - }, - { - "epoch": 0.9505013326564284, - "grad_norm": 0.7347456812858582, - "learning_rate": 1.3663324448957145e-05, - "loss": 0.0643, - "step": 37445 - }, - { - "epoch": 0.9506282523162838, - "grad_norm": 0.47462087869644165, - "learning_rate": 1.3662478317891443e-05, - "loss": 0.0624, - "step": 37450 - }, - { - "epoch": 0.9507551719761391, - "grad_norm": 0.373261958360672, - "learning_rate": 1.366163218682574e-05, - "loss": 0.046, - "step": 37455 - }, - { - "epoch": 0.9508820916359945, - "grad_norm": 0.4735334813594818, - "learning_rate": 1.3660786055760038e-05, - "loss": 0.0691, - "step": 37460 - }, - { - "epoch": 0.9510090112958497, - "grad_norm": 0.64393550157547, - "learning_rate": 1.3659939924694336e-05, - "loss": 0.0722, - "step": 37465 - }, - { - "epoch": 0.951135930955705, - "grad_norm": 0.7164002060890198, - "learning_rate": 1.3659093793628635e-05, - "loss": 0.0602, - "step": 37470 - }, - { - "epoch": 0.9512628506155604, - "grad_norm": 1.5205156803131104, - "learning_rate": 1.3658247662562932e-05, - "loss": 0.0685, - "step": 37475 - }, - { - "epoch": 0.9513897702754157, - "grad_norm": 0.4238488972187042, - "learning_rate": 1.365740153149723e-05, - "loss": 0.0551, - "step": 37480 - }, - { - "epoch": 0.9515166899352709, - "grad_norm": 0.9169315099716187, - "learning_rate": 1.3656555400431528e-05, - "loss": 0.0623, - "step": 37485 - }, - { - "epoch": 0.9516436095951263, - "grad_norm": 0.565479576587677, - "learning_rate": 1.3655709269365827e-05, - "loss": 0.0587, - "step": 37490 - }, - { - "epoch": 0.9517705292549816, - "grad_norm": 0.7452170848846436, - "learning_rate": 1.3654863138300123e-05, - "loss": 0.0489, - "step": 37495 - }, - { - "epoch": 0.951897448914837, - "grad_norm": 0.4749549329280853, - "learning_rate": 1.3654017007234422e-05, - "loss": 0.0636, - "step": 37500 - }, - { - "epoch": 0.9520243685746922, - "grad_norm": 0.30268779397010803, - "learning_rate": 1.365317087616872e-05, - "loss": 0.0654, - "step": 37505 - }, - { - "epoch": 0.9521512882345475, - "grad_norm": 0.44320622086524963, - "learning_rate": 1.3652324745103019e-05, - "loss": 0.0764, - "step": 37510 - }, - { - "epoch": 0.9522782078944029, - "grad_norm": 0.696483850479126, - "learning_rate": 1.3651478614037315e-05, - "loss": 0.0561, - "step": 37515 - }, - { - "epoch": 0.9524051275542581, - "grad_norm": 0.5677118897438049, - "learning_rate": 1.3650632482971614e-05, - "loss": 0.0735, - "step": 37520 - }, - { - "epoch": 0.9525320472141134, - "grad_norm": 0.6997514367103577, - "learning_rate": 1.3649786351905912e-05, - "loss": 0.0483, - "step": 37525 - }, - { - "epoch": 0.9526589668739688, - "grad_norm": 0.5942908525466919, - "learning_rate": 1.364894022084021e-05, - "loss": 0.0594, - "step": 37530 - }, - { - "epoch": 0.9527858865338241, - "grad_norm": 0.46118417382240295, - "learning_rate": 1.3648094089774506e-05, - "loss": 0.0602, - "step": 37535 - }, - { - "epoch": 0.9529128061936794, - "grad_norm": 0.5482557415962219, - "learning_rate": 1.3647247958708804e-05, - "loss": 0.0653, - "step": 37540 - }, - { - "epoch": 0.9530397258535347, - "grad_norm": 0.2815789580345154, - "learning_rate": 1.3646401827643102e-05, - "loss": 0.0536, - "step": 37545 - }, - { - "epoch": 0.95316664551339, - "grad_norm": 0.42621755599975586, - "learning_rate": 1.36455556965774e-05, - "loss": 0.0546, - "step": 37550 - }, - { - "epoch": 0.9532935651732454, - "grad_norm": 0.623938798904419, - "learning_rate": 1.3644709565511698e-05, - "loss": 0.0608, - "step": 37555 - }, - { - "epoch": 0.9534204848331006, - "grad_norm": 0.5931280255317688, - "learning_rate": 1.3643863434445996e-05, - "loss": 0.0635, - "step": 37560 - }, - { - "epoch": 0.9535474044929559, - "grad_norm": 0.3155328333377838, - "learning_rate": 1.3643017303380294e-05, - "loss": 0.0656, - "step": 37565 - }, - { - "epoch": 0.9536743241528113, - "grad_norm": 0.5865635275840759, - "learning_rate": 1.3642171172314593e-05, - "loss": 0.061, - "step": 37570 - }, - { - "epoch": 0.9538012438126666, - "grad_norm": 0.5568182468414307, - "learning_rate": 1.364132504124889e-05, - "loss": 0.0674, - "step": 37575 - }, - { - "epoch": 0.9539281634725219, - "grad_norm": 1.1550322771072388, - "learning_rate": 1.3640478910183188e-05, - "loss": 0.0467, - "step": 37580 - }, - { - "epoch": 0.9540550831323772, - "grad_norm": 0.4878893196582794, - "learning_rate": 1.3639632779117486e-05, - "loss": 0.0614, - "step": 37585 - }, - { - "epoch": 0.9541820027922325, - "grad_norm": 0.4853946566581726, - "learning_rate": 1.3638786648051785e-05, - "loss": 0.052, - "step": 37590 - }, - { - "epoch": 0.9543089224520879, - "grad_norm": 0.44317030906677246, - "learning_rate": 1.3637940516986081e-05, - "loss": 0.0428, - "step": 37595 - }, - { - "epoch": 0.9544358421119431, - "grad_norm": 0.7227149605751038, - "learning_rate": 1.363709438592038e-05, - "loss": 0.0466, - "step": 37600 - }, - { - "epoch": 0.9545627617717984, - "grad_norm": 0.7459822297096252, - "learning_rate": 1.3636248254854678e-05, - "loss": 0.0774, - "step": 37605 - }, - { - "epoch": 0.9546896814316538, - "grad_norm": 1.1708035469055176, - "learning_rate": 1.3635402123788977e-05, - "loss": 0.0727, - "step": 37610 - }, - { - "epoch": 0.9548166010915091, - "grad_norm": 0.5012136101722717, - "learning_rate": 1.3634555992723273e-05, - "loss": 0.0497, - "step": 37615 - }, - { - "epoch": 0.9549435207513643, - "grad_norm": 0.35916873812675476, - "learning_rate": 1.3633709861657572e-05, - "loss": 0.0615, - "step": 37620 - }, - { - "epoch": 0.9550704404112197, - "grad_norm": 0.5044980049133301, - "learning_rate": 1.363286373059187e-05, - "loss": 0.0462, - "step": 37625 - }, - { - "epoch": 0.955197360071075, - "grad_norm": 1.0002521276474, - "learning_rate": 1.3632017599526168e-05, - "loss": 0.0729, - "step": 37630 - }, - { - "epoch": 0.9553242797309304, - "grad_norm": 0.37916895747184753, - "learning_rate": 1.3631171468460465e-05, - "loss": 0.0424, - "step": 37635 - }, - { - "epoch": 0.9554511993907856, - "grad_norm": 0.6496108174324036, - "learning_rate": 1.3630325337394764e-05, - "loss": 0.0747, - "step": 37640 - }, - { - "epoch": 0.9555781190506409, - "grad_norm": 0.5800801515579224, - "learning_rate": 1.3629479206329062e-05, - "loss": 0.0746, - "step": 37645 - }, - { - "epoch": 0.9557050387104963, - "grad_norm": 0.40007007122039795, - "learning_rate": 1.362863307526336e-05, - "loss": 0.0558, - "step": 37650 - }, - { - "epoch": 0.9558319583703516, - "grad_norm": 0.5699698328971863, - "learning_rate": 1.3627786944197655e-05, - "loss": 0.0724, - "step": 37655 - }, - { - "epoch": 0.9559588780302068, - "grad_norm": 0.7013781070709229, - "learning_rate": 1.3626940813131956e-05, - "loss": 0.0828, - "step": 37660 - }, - { - "epoch": 0.9560857976900622, - "grad_norm": 0.5781684517860413, - "learning_rate": 1.3626094682066254e-05, - "loss": 0.0631, - "step": 37665 - }, - { - "epoch": 0.9562127173499175, - "grad_norm": 0.7224883437156677, - "learning_rate": 1.3625248551000552e-05, - "loss": 0.0707, - "step": 37670 - }, - { - "epoch": 0.9563396370097728, - "grad_norm": 0.5216369032859802, - "learning_rate": 1.362440241993485e-05, - "loss": 0.0585, - "step": 37675 - }, - { - "epoch": 0.9564665566696281, - "grad_norm": 0.9086722731590271, - "learning_rate": 1.3623556288869146e-05, - "loss": 0.0725, - "step": 37680 - }, - { - "epoch": 0.9565934763294834, - "grad_norm": 0.4253166913986206, - "learning_rate": 1.3622710157803444e-05, - "loss": 0.0466, - "step": 37685 - }, - { - "epoch": 0.9567203959893388, - "grad_norm": 0.6246082186698914, - "learning_rate": 1.3621864026737743e-05, - "loss": 0.0537, - "step": 37690 - }, - { - "epoch": 0.956847315649194, - "grad_norm": 0.5930611491203308, - "learning_rate": 1.3621017895672043e-05, - "loss": 0.0613, - "step": 37695 - }, - { - "epoch": 0.9569742353090493, - "grad_norm": 0.5193292498588562, - "learning_rate": 1.3620171764606338e-05, - "loss": 0.0615, - "step": 37700 - }, - { - "epoch": 0.9571011549689047, - "grad_norm": 0.8304556608200073, - "learning_rate": 1.3619325633540636e-05, - "loss": 0.0449, - "step": 37705 - }, - { - "epoch": 0.95722807462876, - "grad_norm": 0.9163462519645691, - "learning_rate": 1.3618479502474934e-05, - "loss": 0.0592, - "step": 37710 - }, - { - "epoch": 0.9573549942886153, - "grad_norm": 0.6218233704566956, - "learning_rate": 1.3617633371409233e-05, - "loss": 0.0563, - "step": 37715 - }, - { - "epoch": 0.9574819139484706, - "grad_norm": 0.5728680491447449, - "learning_rate": 1.361678724034353e-05, - "loss": 0.0883, - "step": 37720 - }, - { - "epoch": 0.9576088336083259, - "grad_norm": 0.5751407742500305, - "learning_rate": 1.3615941109277828e-05, - "loss": 0.0564, - "step": 37725 - }, - { - "epoch": 0.9577357532681813, - "grad_norm": 0.25282782316207886, - "learning_rate": 1.3615094978212126e-05, - "loss": 0.0599, - "step": 37730 - }, - { - "epoch": 0.9578626729280365, - "grad_norm": 0.3634345233440399, - "learning_rate": 1.3614248847146425e-05, - "loss": 0.0556, - "step": 37735 - }, - { - "epoch": 0.9579895925878918, - "grad_norm": 0.9837189316749573, - "learning_rate": 1.3613402716080721e-05, - "loss": 0.0333, - "step": 37740 - }, - { - "epoch": 0.9581165122477472, - "grad_norm": 0.7594463229179382, - "learning_rate": 1.361255658501502e-05, - "loss": 0.0532, - "step": 37745 - }, - { - "epoch": 0.9582434319076025, - "grad_norm": 0.581487774848938, - "learning_rate": 1.3611710453949318e-05, - "loss": 0.0588, - "step": 37750 - }, - { - "epoch": 0.9583703515674578, - "grad_norm": 0.6895684003829956, - "learning_rate": 1.3610864322883617e-05, - "loss": 0.0541, - "step": 37755 - }, - { - "epoch": 0.9584972712273131, - "grad_norm": 0.6850426197052002, - "learning_rate": 1.3610018191817913e-05, - "loss": 0.0488, - "step": 37760 - }, - { - "epoch": 0.9586241908871684, - "grad_norm": 0.33700230717658997, - "learning_rate": 1.3609172060752212e-05, - "loss": 0.0566, - "step": 37765 - }, - { - "epoch": 0.9587511105470238, - "grad_norm": 0.39233699440956116, - "learning_rate": 1.360832592968651e-05, - "loss": 0.0536, - "step": 37770 - }, - { - "epoch": 0.958878030206879, - "grad_norm": 0.9672409296035767, - "learning_rate": 1.3607479798620809e-05, - "loss": 0.0593, - "step": 37775 - }, - { - "epoch": 0.9590049498667343, - "grad_norm": 0.34748539328575134, - "learning_rate": 1.3606633667555105e-05, - "loss": 0.04, - "step": 37780 - }, - { - "epoch": 0.9591318695265897, - "grad_norm": 0.6327432990074158, - "learning_rate": 1.3605787536489404e-05, - "loss": 0.0626, - "step": 37785 - }, - { - "epoch": 0.959258789186445, - "grad_norm": 0.47394898533821106, - "learning_rate": 1.3604941405423702e-05, - "loss": 0.0684, - "step": 37790 - }, - { - "epoch": 0.9593857088463003, - "grad_norm": 0.6787389516830444, - "learning_rate": 1.3604095274358e-05, - "loss": 0.0682, - "step": 37795 - }, - { - "epoch": 0.9595126285061556, - "grad_norm": 0.5666583180427551, - "learning_rate": 1.3603249143292297e-05, - "loss": 0.056, - "step": 37800 - }, - { - "epoch": 0.9596395481660109, - "grad_norm": 0.9058935642242432, - "learning_rate": 1.3602403012226596e-05, - "loss": 0.1042, - "step": 37805 - }, - { - "epoch": 0.9597664678258663, - "grad_norm": 1.5570876598358154, - "learning_rate": 1.3601556881160894e-05, - "loss": 0.0748, - "step": 37810 - }, - { - "epoch": 0.9598933874857215, - "grad_norm": 0.2845557928085327, - "learning_rate": 1.3600710750095192e-05, - "loss": 0.0526, - "step": 37815 - }, - { - "epoch": 0.9600203071455768, - "grad_norm": 0.45336616039276123, - "learning_rate": 1.3599864619029487e-05, - "loss": 0.0379, - "step": 37820 - }, - { - "epoch": 0.9601472268054322, - "grad_norm": 0.6008481383323669, - "learning_rate": 1.3599018487963786e-05, - "loss": 0.0751, - "step": 37825 - }, - { - "epoch": 0.9602741464652875, - "grad_norm": 0.3557358980178833, - "learning_rate": 1.3598172356898084e-05, - "loss": 0.0635, - "step": 37830 - }, - { - "epoch": 0.9604010661251428, - "grad_norm": 1.0952014923095703, - "learning_rate": 1.3597326225832383e-05, - "loss": 0.0759, - "step": 37835 - }, - { - "epoch": 0.9605279857849981, - "grad_norm": 0.2972720265388489, - "learning_rate": 1.359648009476668e-05, - "loss": 0.0558, - "step": 37840 - }, - { - "epoch": 0.9606549054448534, - "grad_norm": 0.41834551095962524, - "learning_rate": 1.3595633963700978e-05, - "loss": 0.0572, - "step": 37845 - }, - { - "epoch": 0.9607818251047087, - "grad_norm": 0.5668693780899048, - "learning_rate": 1.3594787832635276e-05, - "loss": 0.0428, - "step": 37850 - }, - { - "epoch": 0.960908744764564, - "grad_norm": 0.6771657466888428, - "learning_rate": 1.3593941701569575e-05, - "loss": 0.0671, - "step": 37855 - }, - { - "epoch": 0.9610356644244193, - "grad_norm": 0.538332998752594, - "learning_rate": 1.3593095570503871e-05, - "loss": 0.074, - "step": 37860 - }, - { - "epoch": 0.9611625840842747, - "grad_norm": 0.31496864557266235, - "learning_rate": 1.359224943943817e-05, - "loss": 0.0593, - "step": 37865 - }, - { - "epoch": 0.9612895037441299, - "grad_norm": 0.5340454578399658, - "learning_rate": 1.3591403308372468e-05, - "loss": 0.0596, - "step": 37870 - }, - { - "epoch": 0.9614164234039853, - "grad_norm": 0.4368014931678772, - "learning_rate": 1.3590557177306766e-05, - "loss": 0.0701, - "step": 37875 - }, - { - "epoch": 0.9615433430638406, - "grad_norm": 0.4302612543106079, - "learning_rate": 1.3589711046241063e-05, - "loss": 0.0654, - "step": 37880 - }, - { - "epoch": 0.9616702627236959, - "grad_norm": 0.6907369494438171, - "learning_rate": 1.3588864915175362e-05, - "loss": 0.0629, - "step": 37885 - }, - { - "epoch": 0.9617971823835512, - "grad_norm": 0.34225964546203613, - "learning_rate": 1.358801878410966e-05, - "loss": 0.0664, - "step": 37890 - }, - { - "epoch": 0.9619241020434065, - "grad_norm": 0.42943263053894043, - "learning_rate": 1.3587172653043958e-05, - "loss": 0.0564, - "step": 37895 - }, - { - "epoch": 0.9620510217032618, - "grad_norm": 0.48413532972335815, - "learning_rate": 1.3586326521978255e-05, - "loss": 0.0595, - "step": 37900 - }, - { - "epoch": 0.9621779413631172, - "grad_norm": 0.42062318325042725, - "learning_rate": 1.3585480390912553e-05, - "loss": 0.0622, - "step": 37905 - }, - { - "epoch": 0.9623048610229724, - "grad_norm": 0.6091448068618774, - "learning_rate": 1.3584634259846852e-05, - "loss": 0.0645, - "step": 37910 - }, - { - "epoch": 0.9624317806828278, - "grad_norm": 0.3167096674442291, - "learning_rate": 1.358378812878115e-05, - "loss": 0.0578, - "step": 37915 - }, - { - "epoch": 0.9625587003426831, - "grad_norm": 0.5458002686500549, - "learning_rate": 1.3582941997715447e-05, - "loss": 0.0634, - "step": 37920 - }, - { - "epoch": 0.9626856200025384, - "grad_norm": 0.5375428199768066, - "learning_rate": 1.3582095866649745e-05, - "loss": 0.0617, - "step": 37925 - }, - { - "epoch": 0.9628125396623937, - "grad_norm": 0.7559522390365601, - "learning_rate": 1.3581249735584044e-05, - "loss": 0.0662, - "step": 37930 - }, - { - "epoch": 0.962939459322249, - "grad_norm": 0.3759811520576477, - "learning_rate": 1.3580403604518342e-05, - "loss": 0.0569, - "step": 37935 - }, - { - "epoch": 0.9630663789821043, - "grad_norm": 0.6401395201683044, - "learning_rate": 1.3579557473452639e-05, - "loss": 0.0445, - "step": 37940 - }, - { - "epoch": 0.9631932986419597, - "grad_norm": 0.5405794382095337, - "learning_rate": 1.3578711342386937e-05, - "loss": 0.0564, - "step": 37945 - }, - { - "epoch": 0.9633202183018149, - "grad_norm": 0.5333657264709473, - "learning_rate": 1.3577865211321236e-05, - "loss": 0.0575, - "step": 37950 - }, - { - "epoch": 0.9634471379616703, - "grad_norm": 0.33385029435157776, - "learning_rate": 1.3577019080255534e-05, - "loss": 0.0454, - "step": 37955 - }, - { - "epoch": 0.9635740576215256, - "grad_norm": 0.8635277152061462, - "learning_rate": 1.3576172949189829e-05, - "loss": 0.0638, - "step": 37960 - }, - { - "epoch": 0.9637009772813809, - "grad_norm": 0.3057486116886139, - "learning_rate": 1.3575326818124128e-05, - "loss": 0.0452, - "step": 37965 - }, - { - "epoch": 0.9638278969412362, - "grad_norm": 0.6351229548454285, - "learning_rate": 1.3574480687058426e-05, - "loss": 0.0851, - "step": 37970 - }, - { - "epoch": 0.9639548166010915, - "grad_norm": 0.3086373507976532, - "learning_rate": 1.3573634555992724e-05, - "loss": 0.0605, - "step": 37975 - }, - { - "epoch": 0.9640817362609468, - "grad_norm": 0.7313413023948669, - "learning_rate": 1.3572788424927021e-05, - "loss": 0.0568, - "step": 37980 - }, - { - "epoch": 0.9642086559208022, - "grad_norm": 0.4607091546058655, - "learning_rate": 1.357194229386132e-05, - "loss": 0.0649, - "step": 37985 - }, - { - "epoch": 0.9643355755806574, - "grad_norm": 0.4052794575691223, - "learning_rate": 1.3571096162795618e-05, - "loss": 0.0494, - "step": 37990 - }, - { - "epoch": 0.9644624952405128, - "grad_norm": 0.5025202035903931, - "learning_rate": 1.3570250031729916e-05, - "loss": 0.0565, - "step": 37995 - }, - { - "epoch": 0.9645894149003681, - "grad_norm": 0.512332022190094, - "learning_rate": 1.3569403900664213e-05, - "loss": 0.0443, - "step": 38000 - }, - { - "epoch": 0.9647163345602234, - "grad_norm": 0.5475181937217712, - "learning_rate": 1.3568557769598511e-05, - "loss": 0.0661, - "step": 38005 - }, - { - "epoch": 0.9648432542200787, - "grad_norm": 0.4782204329967499, - "learning_rate": 1.356771163853281e-05, - "loss": 0.062, - "step": 38010 - }, - { - "epoch": 0.964970173879934, - "grad_norm": 0.9042017459869385, - "learning_rate": 1.3566865507467108e-05, - "loss": 0.0616, - "step": 38015 - }, - { - "epoch": 0.9650970935397893, - "grad_norm": 0.6993356347084045, - "learning_rate": 1.3566019376401405e-05, - "loss": 0.0727, - "step": 38020 - }, - { - "epoch": 0.9652240131996446, - "grad_norm": 0.6631599068641663, - "learning_rate": 1.3565173245335703e-05, - "loss": 0.0677, - "step": 38025 - }, - { - "epoch": 0.9653509328594999, - "grad_norm": 0.575522780418396, - "learning_rate": 1.3564327114270002e-05, - "loss": 0.066, - "step": 38030 - }, - { - "epoch": 0.9654778525193553, - "grad_norm": 0.5447171330451965, - "learning_rate": 1.35634809832043e-05, - "loss": 0.0571, - "step": 38035 - }, - { - "epoch": 0.9656047721792106, - "grad_norm": 0.540241539478302, - "learning_rate": 1.3562634852138597e-05, - "loss": 0.0697, - "step": 38040 - }, - { - "epoch": 0.9657316918390658, - "grad_norm": 0.4562129080295563, - "learning_rate": 1.3561788721072895e-05, - "loss": 0.0955, - "step": 38045 - }, - { - "epoch": 0.9658586114989212, - "grad_norm": 0.45325878262519836, - "learning_rate": 1.3560942590007194e-05, - "loss": 0.065, - "step": 38050 - }, - { - "epoch": 0.9659855311587765, - "grad_norm": 0.3403722643852234, - "learning_rate": 1.3560096458941492e-05, - "loss": 0.0531, - "step": 38055 - }, - { - "epoch": 0.9661124508186318, - "grad_norm": 0.45430174469947815, - "learning_rate": 1.3559250327875789e-05, - "loss": 0.0593, - "step": 38060 - }, - { - "epoch": 0.9662393704784871, - "grad_norm": 0.6660622358322144, - "learning_rate": 1.3558404196810087e-05, - "loss": 0.0617, - "step": 38065 - }, - { - "epoch": 0.9663662901383424, - "grad_norm": 0.7108929753303528, - "learning_rate": 1.3557558065744386e-05, - "loss": 0.0476, - "step": 38070 - }, - { - "epoch": 0.9664932097981977, - "grad_norm": 0.5406216382980347, - "learning_rate": 1.3556711934678684e-05, - "loss": 0.0559, - "step": 38075 - }, - { - "epoch": 0.9666201294580531, - "grad_norm": 0.3998161852359772, - "learning_rate": 1.3555865803612979e-05, - "loss": 0.0512, - "step": 38080 - }, - { - "epoch": 0.9667470491179083, - "grad_norm": 0.5835168957710266, - "learning_rate": 1.3555019672547279e-05, - "loss": 0.0482, - "step": 38085 - }, - { - "epoch": 0.9668739687777637, - "grad_norm": 0.7912107706069946, - "learning_rate": 1.3554173541481577e-05, - "loss": 0.0598, - "step": 38090 - }, - { - "epoch": 0.967000888437619, - "grad_norm": 0.42650288343429565, - "learning_rate": 1.3553327410415876e-05, - "loss": 0.051, - "step": 38095 - }, - { - "epoch": 0.9671278080974743, - "grad_norm": 0.5591062307357788, - "learning_rate": 1.355248127935017e-05, - "loss": 0.0516, - "step": 38100 - }, - { - "epoch": 0.9672547277573296, - "grad_norm": 0.5177953839302063, - "learning_rate": 1.355163514828447e-05, - "loss": 0.0386, - "step": 38105 - }, - { - "epoch": 0.9673816474171849, - "grad_norm": 0.4458886384963989, - "learning_rate": 1.3550789017218768e-05, - "loss": 0.0686, - "step": 38110 - }, - { - "epoch": 0.9675085670770402, - "grad_norm": 1.1793392896652222, - "learning_rate": 1.3549942886153066e-05, - "loss": 0.0735, - "step": 38115 - }, - { - "epoch": 0.9676354867368956, - "grad_norm": 0.47522619366645813, - "learning_rate": 1.3549096755087363e-05, - "loss": 0.0472, - "step": 38120 - }, - { - "epoch": 0.9677624063967508, - "grad_norm": 0.49401694536209106, - "learning_rate": 1.3548250624021661e-05, - "loss": 0.0503, - "step": 38125 - }, - { - "epoch": 0.9678893260566062, - "grad_norm": 0.3461948037147522, - "learning_rate": 1.354740449295596e-05, - "loss": 0.0744, - "step": 38130 - }, - { - "epoch": 0.9680162457164615, - "grad_norm": 2.7518563270568848, - "learning_rate": 1.3546558361890258e-05, - "loss": 0.0636, - "step": 38135 - }, - { - "epoch": 0.9681431653763168, - "grad_norm": 0.44443538784980774, - "learning_rate": 1.3545712230824555e-05, - "loss": 0.0456, - "step": 38140 - }, - { - "epoch": 0.9682700850361721, - "grad_norm": 0.4310300648212433, - "learning_rate": 1.3544866099758853e-05, - "loss": 0.058, - "step": 38145 - }, - { - "epoch": 0.9683970046960274, - "grad_norm": 0.37048810720443726, - "learning_rate": 1.3544019968693151e-05, - "loss": 0.0596, - "step": 38150 - }, - { - "epoch": 0.9685239243558827, - "grad_norm": 0.8683078289031982, - "learning_rate": 1.354317383762745e-05, - "loss": 0.0482, - "step": 38155 - }, - { - "epoch": 0.9686508440157381, - "grad_norm": 0.49417757987976074, - "learning_rate": 1.3542327706561747e-05, - "loss": 0.0804, - "step": 38160 - }, - { - "epoch": 0.9687777636755933, - "grad_norm": 0.6594704389572144, - "learning_rate": 1.3541481575496045e-05, - "loss": 0.0476, - "step": 38165 - }, - { - "epoch": 0.9689046833354487, - "grad_norm": 1.1164093017578125, - "learning_rate": 1.3540635444430343e-05, - "loss": 0.0736, - "step": 38170 - }, - { - "epoch": 0.969031602995304, - "grad_norm": 0.657072126865387, - "learning_rate": 1.3539789313364642e-05, - "loss": 0.062, - "step": 38175 - }, - { - "epoch": 0.9691585226551593, - "grad_norm": 0.4030343294143677, - "learning_rate": 1.3538943182298938e-05, - "loss": 0.0653, - "step": 38180 - }, - { - "epoch": 0.9692854423150146, - "grad_norm": 0.35666584968566895, - "learning_rate": 1.3538097051233237e-05, - "loss": 0.0558, - "step": 38185 - }, - { - "epoch": 0.9694123619748699, - "grad_norm": 0.5898399353027344, - "learning_rate": 1.3537250920167535e-05, - "loss": 0.0723, - "step": 38190 - }, - { - "epoch": 0.9695392816347252, - "grad_norm": 0.5976544618606567, - "learning_rate": 1.3536404789101834e-05, - "loss": 0.0921, - "step": 38195 - }, - { - "epoch": 0.9696662012945805, - "grad_norm": 0.546617329120636, - "learning_rate": 1.3535558658036132e-05, - "loss": 0.0544, - "step": 38200 - }, - { - "epoch": 0.9697931209544358, - "grad_norm": 0.49620890617370605, - "learning_rate": 1.3534712526970429e-05, - "loss": 0.0667, - "step": 38205 - }, - { - "epoch": 0.9699200406142912, - "grad_norm": 0.3499632179737091, - "learning_rate": 1.3533866395904727e-05, - "loss": 0.0578, - "step": 38210 - }, - { - "epoch": 0.9700469602741465, - "grad_norm": 0.756793200969696, - "learning_rate": 1.3533020264839026e-05, - "loss": 0.0617, - "step": 38215 - }, - { - "epoch": 0.9701738799340017, - "grad_norm": 0.4743309020996094, - "learning_rate": 1.3532174133773324e-05, - "loss": 0.0691, - "step": 38220 - }, - { - "epoch": 0.9703007995938571, - "grad_norm": 0.8871796131134033, - "learning_rate": 1.353132800270762e-05, - "loss": 0.0595, - "step": 38225 - }, - { - "epoch": 0.9704277192537124, - "grad_norm": 0.47601020336151123, - "learning_rate": 1.3530481871641919e-05, - "loss": 0.0559, - "step": 38230 - }, - { - "epoch": 0.9705546389135677, - "grad_norm": 0.41190677881240845, - "learning_rate": 1.3529635740576218e-05, - "loss": 0.0564, - "step": 38235 - }, - { - "epoch": 0.970681558573423, - "grad_norm": 0.5991368293762207, - "learning_rate": 1.3528789609510516e-05, - "loss": 0.0746, - "step": 38240 - }, - { - "epoch": 0.9708084782332783, - "grad_norm": 0.4760580062866211, - "learning_rate": 1.3527943478444811e-05, - "loss": 0.0607, - "step": 38245 - }, - { - "epoch": 0.9709353978931337, - "grad_norm": 1.4410046339035034, - "learning_rate": 1.352709734737911e-05, - "loss": 0.0717, - "step": 38250 - }, - { - "epoch": 0.971062317552989, - "grad_norm": 0.4348889887332916, - "learning_rate": 1.3526251216313408e-05, - "loss": 0.0667, - "step": 38255 - }, - { - "epoch": 0.9711892372128442, - "grad_norm": 0.42963841557502747, - "learning_rate": 1.3525405085247708e-05, - "loss": 0.0462, - "step": 38260 - }, - { - "epoch": 0.9713161568726996, - "grad_norm": 0.567651629447937, - "learning_rate": 1.3524558954182003e-05, - "loss": 0.0594, - "step": 38265 - }, - { - "epoch": 0.9714430765325549, - "grad_norm": 0.6852742433547974, - "learning_rate": 1.3523712823116301e-05, - "loss": 0.0758, - "step": 38270 - }, - { - "epoch": 0.9715699961924102, - "grad_norm": 0.5047458410263062, - "learning_rate": 1.35228666920506e-05, - "loss": 0.0522, - "step": 38275 - }, - { - "epoch": 0.9716969158522655, - "grad_norm": 0.8526800274848938, - "learning_rate": 1.3522020560984898e-05, - "loss": 0.0562, - "step": 38280 - }, - { - "epoch": 0.9718238355121208, - "grad_norm": 0.6889795660972595, - "learning_rate": 1.3521174429919195e-05, - "loss": 0.0535, - "step": 38285 - }, - { - "epoch": 0.9719507551719762, - "grad_norm": 0.5194762349128723, - "learning_rate": 1.3520328298853493e-05, - "loss": 0.0647, - "step": 38290 - }, - { - "epoch": 0.9720776748318315, - "grad_norm": 0.7228438854217529, - "learning_rate": 1.3519482167787792e-05, - "loss": 0.0541, - "step": 38295 - }, - { - "epoch": 0.9722045944916867, - "grad_norm": 0.5768752098083496, - "learning_rate": 1.351863603672209e-05, - "loss": 0.0568, - "step": 38300 - }, - { - "epoch": 0.9723315141515421, - "grad_norm": 0.3087155818939209, - "learning_rate": 1.3517789905656387e-05, - "loss": 0.0462, - "step": 38305 - }, - { - "epoch": 0.9724584338113974, - "grad_norm": 0.42124035954475403, - "learning_rate": 1.3516943774590685e-05, - "loss": 0.0528, - "step": 38310 - }, - { - "epoch": 0.9725853534712527, - "grad_norm": 0.7143296599388123, - "learning_rate": 1.3516097643524983e-05, - "loss": 0.0603, - "step": 38315 - }, - { - "epoch": 0.972712273131108, - "grad_norm": 0.4008719325065613, - "learning_rate": 1.3515251512459282e-05, - "loss": 0.0612, - "step": 38320 - }, - { - "epoch": 0.9728391927909633, - "grad_norm": 0.527399480342865, - "learning_rate": 1.3514405381393579e-05, - "loss": 0.075, - "step": 38325 - }, - { - "epoch": 0.9729661124508187, - "grad_norm": 0.6165546774864197, - "learning_rate": 1.3513559250327877e-05, - "loss": 0.053, - "step": 38330 - }, - { - "epoch": 0.973093032110674, - "grad_norm": 0.49419671297073364, - "learning_rate": 1.3512713119262175e-05, - "loss": 0.0527, - "step": 38335 - }, - { - "epoch": 0.9732199517705292, - "grad_norm": 0.8450465798377991, - "learning_rate": 1.3511866988196474e-05, - "loss": 0.0461, - "step": 38340 - }, - { - "epoch": 0.9733468714303846, - "grad_norm": 0.47831329703330994, - "learning_rate": 1.351102085713077e-05, - "loss": 0.0516, - "step": 38345 - }, - { - "epoch": 0.9734737910902399, - "grad_norm": 0.5680994987487793, - "learning_rate": 1.3510174726065069e-05, - "loss": 0.0669, - "step": 38350 - }, - { - "epoch": 0.9736007107500952, - "grad_norm": 0.7097906470298767, - "learning_rate": 1.3509328594999367e-05, - "loss": 0.058, - "step": 38355 - }, - { - "epoch": 0.9737276304099505, - "grad_norm": 0.7027037143707275, - "learning_rate": 1.3508482463933666e-05, - "loss": 0.0649, - "step": 38360 - }, - { - "epoch": 0.9738545500698058, - "grad_norm": 0.696455180644989, - "learning_rate": 1.3507636332867962e-05, - "loss": 0.0589, - "step": 38365 - }, - { - "epoch": 0.9739814697296612, - "grad_norm": 1.1354196071624756, - "learning_rate": 1.350679020180226e-05, - "loss": 0.0548, - "step": 38370 - }, - { - "epoch": 0.9741083893895164, - "grad_norm": 0.6800327301025391, - "learning_rate": 1.350594407073656e-05, - "loss": 0.0554, - "step": 38375 - }, - { - "epoch": 0.9742353090493717, - "grad_norm": 0.4763775169849396, - "learning_rate": 1.3505097939670858e-05, - "loss": 0.0535, - "step": 38380 - }, - { - "epoch": 0.9743622287092271, - "grad_norm": 0.5962806940078735, - "learning_rate": 1.3504251808605153e-05, - "loss": 0.0579, - "step": 38385 - }, - { - "epoch": 0.9744891483690824, - "grad_norm": 0.3646998107433319, - "learning_rate": 1.3503405677539451e-05, - "loss": 0.0695, - "step": 38390 - }, - { - "epoch": 0.9746160680289376, - "grad_norm": 0.6711261868476868, - "learning_rate": 1.350255954647375e-05, - "loss": 0.0434, - "step": 38395 - }, - { - "epoch": 0.974742987688793, - "grad_norm": 0.321520060300827, - "learning_rate": 1.3501713415408048e-05, - "loss": 0.0432, - "step": 38400 - }, - { - "epoch": 0.9748699073486483, - "grad_norm": 0.5779749751091003, - "learning_rate": 1.3500867284342345e-05, - "loss": 0.0546, - "step": 38405 - }, - { - "epoch": 0.9749968270085037, - "grad_norm": 0.5296617150306702, - "learning_rate": 1.3500021153276643e-05, - "loss": 0.0734, - "step": 38410 - }, - { - "epoch": 0.9751237466683589, - "grad_norm": 0.5146998167037964, - "learning_rate": 1.3499175022210941e-05, - "loss": 0.0613, - "step": 38415 - }, - { - "epoch": 0.9752506663282142, - "grad_norm": 0.4075583815574646, - "learning_rate": 1.349832889114524e-05, - "loss": 0.0561, - "step": 38420 - }, - { - "epoch": 0.9753775859880696, - "grad_norm": 0.3966740071773529, - "learning_rate": 1.3497482760079536e-05, - "loss": 0.0653, - "step": 38425 - }, - { - "epoch": 0.9755045056479249, - "grad_norm": 0.4921891391277313, - "learning_rate": 1.3496636629013835e-05, - "loss": 0.0722, - "step": 38430 - }, - { - "epoch": 0.9756314253077801, - "grad_norm": 0.3672555088996887, - "learning_rate": 1.3495790497948133e-05, - "loss": 0.0586, - "step": 38435 - }, - { - "epoch": 0.9757583449676355, - "grad_norm": 0.38045433163642883, - "learning_rate": 1.3494944366882432e-05, - "loss": 0.06, - "step": 38440 - }, - { - "epoch": 0.9758852646274908, - "grad_norm": 0.946666419506073, - "learning_rate": 1.3494098235816728e-05, - "loss": 0.0662, - "step": 38445 - }, - { - "epoch": 0.9760121842873462, - "grad_norm": 0.3988424837589264, - "learning_rate": 1.3493252104751027e-05, - "loss": 0.0549, - "step": 38450 - }, - { - "epoch": 0.9761391039472014, - "grad_norm": 0.34339439868927, - "learning_rate": 1.3492405973685325e-05, - "loss": 0.0759, - "step": 38455 - }, - { - "epoch": 0.9762660236070567, - "grad_norm": 0.47789520025253296, - "learning_rate": 1.3491559842619624e-05, - "loss": 0.0702, - "step": 38460 - }, - { - "epoch": 0.9763929432669121, - "grad_norm": 0.4403241276741028, - "learning_rate": 1.349071371155392e-05, - "loss": 0.059, - "step": 38465 - }, - { - "epoch": 0.9765198629267674, - "grad_norm": 0.932727575302124, - "learning_rate": 1.3489867580488219e-05, - "loss": 0.0532, - "step": 38470 - }, - { - "epoch": 0.9766467825866226, - "grad_norm": 0.47536495327949524, - "learning_rate": 1.3489021449422517e-05, - "loss": 0.056, - "step": 38475 - }, - { - "epoch": 0.976773702246478, - "grad_norm": 0.3048267960548401, - "learning_rate": 1.3488175318356816e-05, - "loss": 0.0422, - "step": 38480 - }, - { - "epoch": 0.9769006219063333, - "grad_norm": 0.4209669232368469, - "learning_rate": 1.3487329187291112e-05, - "loss": 0.0627, - "step": 38485 - }, - { - "epoch": 0.9770275415661887, - "grad_norm": 0.7567689418792725, - "learning_rate": 1.348648305622541e-05, - "loss": 0.0626, - "step": 38490 - }, - { - "epoch": 0.9771544612260439, - "grad_norm": 0.5055646896362305, - "learning_rate": 1.3485636925159709e-05, - "loss": 0.0636, - "step": 38495 - }, - { - "epoch": 0.9772813808858992, - "grad_norm": 0.5473458170890808, - "learning_rate": 1.3484790794094007e-05, - "loss": 0.0691, - "step": 38500 - }, - { - "epoch": 0.9774083005457546, - "grad_norm": 0.3867489695549011, - "learning_rate": 1.3483944663028304e-05, - "loss": 0.0625, - "step": 38505 - }, - { - "epoch": 0.9775352202056099, - "grad_norm": 0.9968945980072021, - "learning_rate": 1.3483098531962603e-05, - "loss": 0.0669, - "step": 38510 - }, - { - "epoch": 0.9776621398654651, - "grad_norm": 0.6460256576538086, - "learning_rate": 1.3482252400896901e-05, - "loss": 0.0557, - "step": 38515 - }, - { - "epoch": 0.9777890595253205, - "grad_norm": 0.4439522325992584, - "learning_rate": 1.34814062698312e-05, - "loss": 0.0564, - "step": 38520 - }, - { - "epoch": 0.9779159791851758, - "grad_norm": 0.33931565284729004, - "learning_rate": 1.3480560138765494e-05, - "loss": 0.0561, - "step": 38525 - }, - { - "epoch": 0.9780428988450312, - "grad_norm": 0.4667336046695709, - "learning_rate": 1.3479714007699793e-05, - "loss": 0.0587, - "step": 38530 - }, - { - "epoch": 0.9781698185048864, - "grad_norm": 0.6280606985092163, - "learning_rate": 1.3478867876634091e-05, - "loss": 0.0615, - "step": 38535 - }, - { - "epoch": 0.9782967381647417, - "grad_norm": 0.5357491970062256, - "learning_rate": 1.347802174556839e-05, - "loss": 0.0652, - "step": 38540 - }, - { - "epoch": 0.9784236578245971, - "grad_norm": 0.24876882135868073, - "learning_rate": 1.3477175614502686e-05, - "loss": 0.0446, - "step": 38545 - }, - { - "epoch": 0.9785505774844523, - "grad_norm": 0.4280865490436554, - "learning_rate": 1.3476329483436985e-05, - "loss": 0.0559, - "step": 38550 - }, - { - "epoch": 0.9786774971443076, - "grad_norm": 0.49096032977104187, - "learning_rate": 1.3475483352371283e-05, - "loss": 0.0742, - "step": 38555 - }, - { - "epoch": 0.978804416804163, - "grad_norm": 0.5077487826347351, - "learning_rate": 1.3474637221305581e-05, - "loss": 0.0609, - "step": 38560 - }, - { - "epoch": 0.9789313364640183, - "grad_norm": 0.5790875554084778, - "learning_rate": 1.3473791090239878e-05, - "loss": 0.0597, - "step": 38565 - }, - { - "epoch": 0.9790582561238735, - "grad_norm": 0.45664262771606445, - "learning_rate": 1.3472944959174177e-05, - "loss": 0.0576, - "step": 38570 - }, - { - "epoch": 0.9791851757837289, - "grad_norm": 0.5568498969078064, - "learning_rate": 1.3472098828108475e-05, - "loss": 0.0716, - "step": 38575 - }, - { - "epoch": 0.9793120954435842, - "grad_norm": 0.4841797351837158, - "learning_rate": 1.3471252697042773e-05, - "loss": 0.059, - "step": 38580 - }, - { - "epoch": 0.9794390151034396, - "grad_norm": 0.3932197690010071, - "learning_rate": 1.347040656597707e-05, - "loss": 0.0586, - "step": 38585 - }, - { - "epoch": 0.9795659347632948, - "grad_norm": 0.5082854628562927, - "learning_rate": 1.3469560434911368e-05, - "loss": 0.0442, - "step": 38590 - }, - { - "epoch": 0.9796928544231501, - "grad_norm": 0.4127240777015686, - "learning_rate": 1.3468714303845667e-05, - "loss": 0.0398, - "step": 38595 - }, - { - "epoch": 0.9798197740830055, - "grad_norm": 0.4823914170265198, - "learning_rate": 1.3467868172779965e-05, - "loss": 0.0538, - "step": 38600 - }, - { - "epoch": 0.9799466937428608, - "grad_norm": 0.3836614787578583, - "learning_rate": 1.3467022041714262e-05, - "loss": 0.0448, - "step": 38605 - }, - { - "epoch": 0.980073613402716, - "grad_norm": 2.3650877475738525, - "learning_rate": 1.346617591064856e-05, - "loss": 0.0679, - "step": 38610 - }, - { - "epoch": 0.9802005330625714, - "grad_norm": 0.6285296082496643, - "learning_rate": 1.3465329779582859e-05, - "loss": 0.0744, - "step": 38615 - }, - { - "epoch": 0.9803274527224267, - "grad_norm": 0.6920507550239563, - "learning_rate": 1.3464483648517157e-05, - "loss": 0.0437, - "step": 38620 - }, - { - "epoch": 0.9804543723822821, - "grad_norm": 0.6150595545768738, - "learning_rate": 1.3463637517451454e-05, - "loss": 0.0702, - "step": 38625 - }, - { - "epoch": 0.9805812920421373, - "grad_norm": 0.5214827656745911, - "learning_rate": 1.3462791386385752e-05, - "loss": 0.0588, - "step": 38630 - }, - { - "epoch": 0.9807082117019926, - "grad_norm": 0.5018746256828308, - "learning_rate": 1.346194525532005e-05, - "loss": 0.0603, - "step": 38635 - }, - { - "epoch": 0.980835131361848, - "grad_norm": 0.40171152353286743, - "learning_rate": 1.3461099124254349e-05, - "loss": 0.0441, - "step": 38640 - }, - { - "epoch": 0.9809620510217033, - "grad_norm": 0.5690922737121582, - "learning_rate": 1.3460252993188644e-05, - "loss": 0.0682, - "step": 38645 - }, - { - "epoch": 0.9810889706815585, - "grad_norm": 1.0740294456481934, - "learning_rate": 1.3459406862122944e-05, - "loss": 0.087, - "step": 38650 - }, - { - "epoch": 0.9812158903414139, - "grad_norm": 0.34657803177833557, - "learning_rate": 1.3458560731057243e-05, - "loss": 0.0533, - "step": 38655 - }, - { - "epoch": 0.9813428100012692, - "grad_norm": 0.6289403438568115, - "learning_rate": 1.3457714599991541e-05, - "loss": 0.0858, - "step": 38660 - }, - { - "epoch": 0.9814697296611246, - "grad_norm": 0.6840898394584656, - "learning_rate": 1.3456868468925836e-05, - "loss": 0.0726, - "step": 38665 - }, - { - "epoch": 0.9815966493209798, - "grad_norm": 0.3440672755241394, - "learning_rate": 1.3456022337860134e-05, - "loss": 0.0503, - "step": 38670 - }, - { - "epoch": 0.9817235689808351, - "grad_norm": 0.3493899405002594, - "learning_rate": 1.3455176206794433e-05, - "loss": 0.0507, - "step": 38675 - }, - { - "epoch": 0.9818504886406905, - "grad_norm": 1.0021263360977173, - "learning_rate": 1.3454330075728731e-05, - "loss": 0.0591, - "step": 38680 - }, - { - "epoch": 0.9819774083005458, - "grad_norm": 1.1097482442855835, - "learning_rate": 1.3453483944663028e-05, - "loss": 0.0557, - "step": 38685 - }, - { - "epoch": 0.982104327960401, - "grad_norm": 0.4422120749950409, - "learning_rate": 1.3452637813597326e-05, - "loss": 0.0541, - "step": 38690 - }, - { - "epoch": 0.9822312476202564, - "grad_norm": 0.501522958278656, - "learning_rate": 1.3451791682531625e-05, - "loss": 0.0511, - "step": 38695 - }, - { - "epoch": 0.9823581672801117, - "grad_norm": 0.4648786783218384, - "learning_rate": 1.3450945551465923e-05, - "loss": 0.0467, - "step": 38700 - }, - { - "epoch": 0.9824850869399671, - "grad_norm": 0.5543327331542969, - "learning_rate": 1.345009942040022e-05, - "loss": 0.0518, - "step": 38705 - }, - { - "epoch": 0.9826120065998223, - "grad_norm": 0.6231045126914978, - "learning_rate": 1.3449253289334518e-05, - "loss": 0.0581, - "step": 38710 - }, - { - "epoch": 0.9827389262596776, - "grad_norm": 0.3379734754562378, - "learning_rate": 1.3448407158268817e-05, - "loss": 0.0457, - "step": 38715 - }, - { - "epoch": 0.982865845919533, - "grad_norm": 0.41919153928756714, - "learning_rate": 1.3447561027203115e-05, - "loss": 0.0532, - "step": 38720 - }, - { - "epoch": 0.9829927655793882, - "grad_norm": 0.44526728987693787, - "learning_rate": 1.3446714896137413e-05, - "loss": 0.06, - "step": 38725 - }, - { - "epoch": 0.9831196852392435, - "grad_norm": 0.8831859230995178, - "learning_rate": 1.344586876507171e-05, - "loss": 0.0574, - "step": 38730 - }, - { - "epoch": 0.9832466048990989, - "grad_norm": 0.3815108835697174, - "learning_rate": 1.3445022634006009e-05, - "loss": 0.0906, - "step": 38735 - }, - { - "epoch": 0.9833735245589542, - "grad_norm": 0.4133070707321167, - "learning_rate": 1.3444176502940307e-05, - "loss": 0.0457, - "step": 38740 - }, - { - "epoch": 0.9835004442188094, - "grad_norm": 0.3150346577167511, - "learning_rate": 1.3443330371874605e-05, - "loss": 0.0359, - "step": 38745 - }, - { - "epoch": 0.9836273638786648, - "grad_norm": 0.3698703348636627, - "learning_rate": 1.3442484240808902e-05, - "loss": 0.0643, - "step": 38750 - }, - { - "epoch": 0.9837542835385201, - "grad_norm": 0.39175474643707275, - "learning_rate": 1.34416381097432e-05, - "loss": 0.0614, - "step": 38755 - }, - { - "epoch": 0.9838812031983755, - "grad_norm": 0.3490341603755951, - "learning_rate": 1.3440791978677499e-05, - "loss": 0.0491, - "step": 38760 - }, - { - "epoch": 0.9840081228582307, - "grad_norm": 0.43810760974884033, - "learning_rate": 1.3439945847611797e-05, - "loss": 0.0478, - "step": 38765 - }, - { - "epoch": 0.984135042518086, - "grad_norm": 0.39300137758255005, - "learning_rate": 1.3439099716546094e-05, - "loss": 0.0372, - "step": 38770 - }, - { - "epoch": 0.9842619621779414, - "grad_norm": 0.4997010827064514, - "learning_rate": 1.3438253585480392e-05, - "loss": 0.0716, - "step": 38775 - }, - { - "epoch": 0.9843888818377967, - "grad_norm": 0.6360993385314941, - "learning_rate": 1.343740745441469e-05, - "loss": 0.0629, - "step": 38780 - }, - { - "epoch": 0.9845158014976519, - "grad_norm": 0.6396772861480713, - "learning_rate": 1.343656132334899e-05, - "loss": 0.0625, - "step": 38785 - }, - { - "epoch": 0.9846427211575073, - "grad_norm": 0.4295228123664856, - "learning_rate": 1.3435715192283286e-05, - "loss": 0.0459, - "step": 38790 - }, - { - "epoch": 0.9847696408173626, - "grad_norm": 0.40254446864128113, - "learning_rate": 1.3434869061217584e-05, - "loss": 0.0568, - "step": 38795 - }, - { - "epoch": 0.984896560477218, - "grad_norm": 0.4228493273258209, - "learning_rate": 1.3434022930151883e-05, - "loss": 0.0653, - "step": 38800 - }, - { - "epoch": 0.9850234801370732, - "grad_norm": 0.4520529806613922, - "learning_rate": 1.3433176799086181e-05, - "loss": 0.0594, - "step": 38805 - }, - { - "epoch": 0.9851503997969285, - "grad_norm": 0.5043602585792542, - "learning_rate": 1.3432330668020476e-05, - "loss": 0.0527, - "step": 38810 - }, - { - "epoch": 0.9852773194567839, - "grad_norm": 0.6375881433486938, - "learning_rate": 1.3431484536954775e-05, - "loss": 0.069, - "step": 38815 - }, - { - "epoch": 0.9854042391166392, - "grad_norm": 0.37149110436439514, - "learning_rate": 1.3430638405889073e-05, - "loss": 0.0548, - "step": 38820 - }, - { - "epoch": 0.9855311587764944, - "grad_norm": 0.33118200302124023, - "learning_rate": 1.3429792274823373e-05, - "loss": 0.0564, - "step": 38825 - }, - { - "epoch": 0.9856580784363498, - "grad_norm": 0.31722673773765564, - "learning_rate": 1.3428946143757668e-05, - "loss": 0.0626, - "step": 38830 - }, - { - "epoch": 0.9857849980962051, - "grad_norm": 0.45425495505332947, - "learning_rate": 1.3428100012691966e-05, - "loss": 0.052, - "step": 38835 - }, - { - "epoch": 0.9859119177560605, - "grad_norm": 0.3975805640220642, - "learning_rate": 1.3427253881626265e-05, - "loss": 0.0541, - "step": 38840 - }, - { - "epoch": 0.9860388374159157, - "grad_norm": 0.5004658699035645, - "learning_rate": 1.3426407750560563e-05, - "loss": 0.0686, - "step": 38845 - }, - { - "epoch": 0.986165757075771, - "grad_norm": 0.43407854437828064, - "learning_rate": 1.342556161949486e-05, - "loss": 0.0885, - "step": 38850 - }, - { - "epoch": 0.9862926767356264, - "grad_norm": 0.3772173821926117, - "learning_rate": 1.3424715488429158e-05, - "loss": 0.0499, - "step": 38855 - }, - { - "epoch": 0.9864195963954817, - "grad_norm": 0.5036830902099609, - "learning_rate": 1.3423869357363457e-05, - "loss": 0.0506, - "step": 38860 - }, - { - "epoch": 0.9865465160553369, - "grad_norm": 0.6410204768180847, - "learning_rate": 1.3423023226297755e-05, - "loss": 0.0471, - "step": 38865 - }, - { - "epoch": 0.9866734357151923, - "grad_norm": 0.6326226592063904, - "learning_rate": 1.3422177095232052e-05, - "loss": 0.0734, - "step": 38870 - }, - { - "epoch": 0.9868003553750476, - "grad_norm": 0.7660290002822876, - "learning_rate": 1.342133096416635e-05, - "loss": 0.0582, - "step": 38875 - }, - { - "epoch": 0.9869272750349029, - "grad_norm": 1.606743574142456, - "learning_rate": 1.3420484833100649e-05, - "loss": 0.0737, - "step": 38880 - }, - { - "epoch": 0.9870541946947582, - "grad_norm": 0.5039710402488708, - "learning_rate": 1.3419638702034947e-05, - "loss": 0.0525, - "step": 38885 - }, - { - "epoch": 0.9871811143546135, - "grad_norm": 0.5030731558799744, - "learning_rate": 1.3418792570969244e-05, - "loss": 0.0556, - "step": 38890 - }, - { - "epoch": 0.9873080340144689, - "grad_norm": 0.3629481792449951, - "learning_rate": 1.3417946439903542e-05, - "loss": 0.0554, - "step": 38895 - }, - { - "epoch": 0.9874349536743241, - "grad_norm": 0.7855660319328308, - "learning_rate": 1.341710030883784e-05, - "loss": 0.0534, - "step": 38900 - }, - { - "epoch": 0.9875618733341794, - "grad_norm": 0.5014315843582153, - "learning_rate": 1.3416254177772139e-05, - "loss": 0.0562, - "step": 38905 - }, - { - "epoch": 0.9876887929940348, - "grad_norm": 0.6386884450912476, - "learning_rate": 1.3415408046706436e-05, - "loss": 0.0522, - "step": 38910 - }, - { - "epoch": 0.9878157126538901, - "grad_norm": 0.447870671749115, - "learning_rate": 1.3414561915640734e-05, - "loss": 0.0503, - "step": 38915 - }, - { - "epoch": 0.9879426323137454, - "grad_norm": 0.3402958810329437, - "learning_rate": 1.3413715784575033e-05, - "loss": 0.0407, - "step": 38920 - }, - { - "epoch": 0.9880695519736007, - "grad_norm": 0.47613978385925293, - "learning_rate": 1.3412869653509331e-05, - "loss": 0.034, - "step": 38925 - }, - { - "epoch": 0.988196471633456, - "grad_norm": 0.49837619066238403, - "learning_rate": 1.3412023522443628e-05, - "loss": 0.0631, - "step": 38930 - }, - { - "epoch": 0.9883233912933114, - "grad_norm": 0.8446959257125854, - "learning_rate": 1.3411177391377926e-05, - "loss": 0.0577, - "step": 38935 - }, - { - "epoch": 0.9884503109531666, - "grad_norm": 0.4276309013366699, - "learning_rate": 1.3410331260312224e-05, - "loss": 0.0605, - "step": 38940 - }, - { - "epoch": 0.9885772306130219, - "grad_norm": 1.2470521926879883, - "learning_rate": 1.3409485129246523e-05, - "loss": 0.0526, - "step": 38945 - }, - { - "epoch": 0.9887041502728773, - "grad_norm": 0.6143015623092651, - "learning_rate": 1.3408638998180818e-05, - "loss": 0.0621, - "step": 38950 - }, - { - "epoch": 0.9888310699327326, - "grad_norm": 0.7445563673973083, - "learning_rate": 1.3407792867115116e-05, - "loss": 0.0593, - "step": 38955 - }, - { - "epoch": 0.9889579895925878, - "grad_norm": 0.4611375033855438, - "learning_rate": 1.3406946736049415e-05, - "loss": 0.0661, - "step": 38960 - }, - { - "epoch": 0.9890849092524432, - "grad_norm": 0.43681544065475464, - "learning_rate": 1.3406100604983713e-05, - "loss": 0.0643, - "step": 38965 - }, - { - "epoch": 0.9892118289122985, - "grad_norm": 0.6700069308280945, - "learning_rate": 1.340525447391801e-05, - "loss": 0.0406, - "step": 38970 - }, - { - "epoch": 0.9893387485721539, - "grad_norm": 0.6056674718856812, - "learning_rate": 1.3404408342852308e-05, - "loss": 0.0598, - "step": 38975 - }, - { - "epoch": 0.9894656682320091, - "grad_norm": 0.5145580768585205, - "learning_rate": 1.3403562211786607e-05, - "loss": 0.0453, - "step": 38980 - }, - { - "epoch": 0.9895925878918644, - "grad_norm": 0.30512458086013794, - "learning_rate": 1.3402716080720905e-05, - "loss": 0.0529, - "step": 38985 - }, - { - "epoch": 0.9897195075517198, - "grad_norm": 0.3917909562587738, - "learning_rate": 1.3401869949655202e-05, - "loss": 0.0429, - "step": 38990 - }, - { - "epoch": 0.9898464272115751, - "grad_norm": 0.450702041387558, - "learning_rate": 1.34010238185895e-05, - "loss": 0.0706, - "step": 38995 - }, - { - "epoch": 0.9899733468714303, - "grad_norm": 0.3172222971916199, - "learning_rate": 1.3400177687523798e-05, - "loss": 0.0548, - "step": 39000 - }, - { - "epoch": 0.9901002665312857, - "grad_norm": 0.6760785579681396, - "learning_rate": 1.3399331556458097e-05, - "loss": 0.065, - "step": 39005 - }, - { - "epoch": 0.990227186191141, - "grad_norm": 0.35869526863098145, - "learning_rate": 1.3398485425392394e-05, - "loss": 0.0573, - "step": 39010 - }, - { - "epoch": 0.9903541058509964, - "grad_norm": 0.3856920897960663, - "learning_rate": 1.3397639294326692e-05, - "loss": 0.0611, - "step": 39015 - }, - { - "epoch": 0.9904810255108516, - "grad_norm": 0.4707334339618683, - "learning_rate": 1.339679316326099e-05, - "loss": 0.0559, - "step": 39020 - }, - { - "epoch": 0.9906079451707069, - "grad_norm": 0.6052232980728149, - "learning_rate": 1.3395947032195289e-05, - "loss": 0.0482, - "step": 39025 - }, - { - "epoch": 0.9907348648305623, - "grad_norm": 0.5042586922645569, - "learning_rate": 1.3395100901129586e-05, - "loss": 0.068, - "step": 39030 - }, - { - "epoch": 0.9908617844904176, - "grad_norm": 0.978948712348938, - "learning_rate": 1.3394254770063884e-05, - "loss": 0.0679, - "step": 39035 - }, - { - "epoch": 0.9909887041502728, - "grad_norm": 0.5048556923866272, - "learning_rate": 1.3393408638998182e-05, - "loss": 0.0649, - "step": 39040 - }, - { - "epoch": 0.9911156238101282, - "grad_norm": 0.5127792954444885, - "learning_rate": 1.339256250793248e-05, - "loss": 0.0769, - "step": 39045 - }, - { - "epoch": 0.9912425434699835, - "grad_norm": 0.3436877131462097, - "learning_rate": 1.3391716376866777e-05, - "loss": 0.058, - "step": 39050 - }, - { - "epoch": 0.9913694631298388, - "grad_norm": 0.3652808964252472, - "learning_rate": 1.3390870245801076e-05, - "loss": 0.0668, - "step": 39055 - }, - { - "epoch": 0.9914963827896941, - "grad_norm": 0.41201040148735046, - "learning_rate": 1.3390024114735374e-05, - "loss": 0.0586, - "step": 39060 - }, - { - "epoch": 0.9916233024495494, - "grad_norm": 0.49784281849861145, - "learning_rate": 1.3389177983669673e-05, - "loss": 0.0491, - "step": 39065 - }, - { - "epoch": 0.9917502221094048, - "grad_norm": 0.6285993456840515, - "learning_rate": 1.3388331852603968e-05, - "loss": 0.0612, - "step": 39070 - }, - { - "epoch": 0.99187714176926, - "grad_norm": 0.37934672832489014, - "learning_rate": 1.3387485721538268e-05, - "loss": 0.0636, - "step": 39075 - }, - { - "epoch": 0.9920040614291153, - "grad_norm": 0.3757530450820923, - "learning_rate": 1.3386639590472566e-05, - "loss": 0.0521, - "step": 39080 - }, - { - "epoch": 0.9921309810889707, - "grad_norm": 0.5228649377822876, - "learning_rate": 1.3385793459406865e-05, - "loss": 0.0561, - "step": 39085 - }, - { - "epoch": 0.992257900748826, - "grad_norm": 0.33878207206726074, - "learning_rate": 1.338494732834116e-05, - "loss": 0.0406, - "step": 39090 - }, - { - "epoch": 0.9923848204086813, - "grad_norm": 0.9297897815704346, - "learning_rate": 1.3384101197275458e-05, - "loss": 0.0508, - "step": 39095 - }, - { - "epoch": 0.9925117400685366, - "grad_norm": 0.547353208065033, - "learning_rate": 1.3383255066209756e-05, - "loss": 0.0526, - "step": 39100 - }, - { - "epoch": 0.9926386597283919, - "grad_norm": 0.6131523847579956, - "learning_rate": 1.3382408935144055e-05, - "loss": 0.0766, - "step": 39105 - }, - { - "epoch": 0.9927655793882473, - "grad_norm": 1.209833025932312, - "learning_rate": 1.3381562804078351e-05, - "loss": 0.069, - "step": 39110 - }, - { - "epoch": 0.9928924990481025, - "grad_norm": 0.27545714378356934, - "learning_rate": 1.338071667301265e-05, - "loss": 0.0558, - "step": 39115 - }, - { - "epoch": 0.9930194187079578, - "grad_norm": 0.5497746467590332, - "learning_rate": 1.3379870541946948e-05, - "loss": 0.0625, - "step": 39120 - }, - { - "epoch": 0.9931463383678132, - "grad_norm": 0.7656990885734558, - "learning_rate": 1.3379024410881247e-05, - "loss": 0.0393, - "step": 39125 - }, - { - "epoch": 0.9932732580276685, - "grad_norm": 0.7311884760856628, - "learning_rate": 1.3378178279815543e-05, - "loss": 0.077, - "step": 39130 - }, - { - "epoch": 0.9934001776875238, - "grad_norm": 0.4651784896850586, - "learning_rate": 1.3377332148749842e-05, - "loss": 0.0584, - "step": 39135 - }, - { - "epoch": 0.9935270973473791, - "grad_norm": 0.6470368504524231, - "learning_rate": 1.337648601768414e-05, - "loss": 0.0642, - "step": 39140 - }, - { - "epoch": 0.9936540170072344, - "grad_norm": 0.4856240153312683, - "learning_rate": 1.3375639886618439e-05, - "loss": 0.0602, - "step": 39145 - }, - { - "epoch": 0.9937809366670898, - "grad_norm": 0.39443016052246094, - "learning_rate": 1.3374793755552735e-05, - "loss": 0.0633, - "step": 39150 - }, - { - "epoch": 0.993907856326945, - "grad_norm": 0.4412926435470581, - "learning_rate": 1.3373947624487034e-05, - "loss": 0.0737, - "step": 39155 - }, - { - "epoch": 0.9940347759868003, - "grad_norm": 0.44791311025619507, - "learning_rate": 1.3373101493421332e-05, - "loss": 0.0537, - "step": 39160 - }, - { - "epoch": 0.9941616956466557, - "grad_norm": 0.4660758078098297, - "learning_rate": 1.337225536235563e-05, - "loss": 0.0564, - "step": 39165 - }, - { - "epoch": 0.994288615306511, - "grad_norm": 0.5814573168754578, - "learning_rate": 1.3371409231289927e-05, - "loss": 0.0607, - "step": 39170 - }, - { - "epoch": 0.9944155349663663, - "grad_norm": 0.33174753189086914, - "learning_rate": 1.3370563100224226e-05, - "loss": 0.0508, - "step": 39175 - }, - { - "epoch": 0.9945424546262216, - "grad_norm": 0.43150410056114197, - "learning_rate": 1.3369716969158524e-05, - "loss": 0.0563, - "step": 39180 - }, - { - "epoch": 0.9946693742860769, - "grad_norm": 0.6634384989738464, - "learning_rate": 1.3368870838092822e-05, - "loss": 0.0676, - "step": 39185 - }, - { - "epoch": 0.9947962939459323, - "grad_norm": 0.41816264390945435, - "learning_rate": 1.3368024707027119e-05, - "loss": 0.0496, - "step": 39190 - }, - { - "epoch": 0.9949232136057875, - "grad_norm": 0.4776964485645294, - "learning_rate": 1.3367178575961418e-05, - "loss": 0.0648, - "step": 39195 - }, - { - "epoch": 0.9950501332656428, - "grad_norm": 0.8483793139457703, - "learning_rate": 1.3366332444895716e-05, - "loss": 0.0655, - "step": 39200 - }, - { - "epoch": 0.9951770529254982, - "grad_norm": 0.5192837715148926, - "learning_rate": 1.3365486313830014e-05, - "loss": 0.0634, - "step": 39205 - }, - { - "epoch": 0.9953039725853535, - "grad_norm": 0.36310064792633057, - "learning_rate": 1.336464018276431e-05, - "loss": 0.0812, - "step": 39210 - }, - { - "epoch": 0.9954308922452088, - "grad_norm": 0.4808320999145508, - "learning_rate": 1.336379405169861e-05, - "loss": 0.0647, - "step": 39215 - }, - { - "epoch": 0.9955578119050641, - "grad_norm": 0.5467652082443237, - "learning_rate": 1.3362947920632908e-05, - "loss": 0.0552, - "step": 39220 - }, - { - "epoch": 0.9956847315649194, - "grad_norm": 1.4123872518539429, - "learning_rate": 1.3362101789567206e-05, - "loss": 0.0472, - "step": 39225 - }, - { - "epoch": 0.9958116512247747, - "grad_norm": 1.5582306385040283, - "learning_rate": 1.3361255658501505e-05, - "loss": 0.0601, - "step": 39230 - }, - { - "epoch": 0.99593857088463, - "grad_norm": 0.6776049733161926, - "learning_rate": 1.33604095274358e-05, - "loss": 0.069, - "step": 39235 - }, - { - "epoch": 0.9960654905444853, - "grad_norm": 0.5653127431869507, - "learning_rate": 1.3359563396370098e-05, - "loss": 0.044, - "step": 39240 - }, - { - "epoch": 0.9961924102043407, - "grad_norm": 0.40255188941955566, - "learning_rate": 1.3358717265304396e-05, - "loss": 0.0465, - "step": 39245 - }, - { - "epoch": 0.9963193298641959, - "grad_norm": 0.4232846796512604, - "learning_rate": 1.3357871134238697e-05, - "loss": 0.076, - "step": 39250 - }, - { - "epoch": 0.9964462495240513, - "grad_norm": 0.5257855653762817, - "learning_rate": 1.3357025003172992e-05, - "loss": 0.0534, - "step": 39255 - }, - { - "epoch": 0.9965731691839066, - "grad_norm": 0.5537463426589966, - "learning_rate": 1.335617887210729e-05, - "loss": 0.0484, - "step": 39260 - }, - { - "epoch": 0.9967000888437619, - "grad_norm": 0.6680005788803101, - "learning_rate": 1.3355332741041588e-05, - "loss": 0.0568, - "step": 39265 - }, - { - "epoch": 0.9968270085036172, - "grad_norm": 0.4830329716205597, - "learning_rate": 1.3354486609975887e-05, - "loss": 0.0632, - "step": 39270 - }, - { - "epoch": 0.9969539281634725, - "grad_norm": 0.39686766266822815, - "learning_rate": 1.3353640478910184e-05, - "loss": 0.0634, - "step": 39275 - }, - { - "epoch": 0.9970808478233278, - "grad_norm": 0.6109327077865601, - "learning_rate": 1.3352794347844482e-05, - "loss": 0.0466, - "step": 39280 - }, - { - "epoch": 0.9972077674831832, - "grad_norm": 0.3406991958618164, - "learning_rate": 1.335194821677878e-05, - "loss": 0.0399, - "step": 39285 - }, - { - "epoch": 0.9973346871430384, - "grad_norm": 0.6797659397125244, - "learning_rate": 1.3351102085713079e-05, - "loss": 0.0492, - "step": 39290 - }, - { - "epoch": 0.9974616068028938, - "grad_norm": 0.7214959263801575, - "learning_rate": 1.3350255954647375e-05, - "loss": 0.0526, - "step": 39295 - }, - { - "epoch": 0.9975885264627491, - "grad_norm": 0.3396146595478058, - "learning_rate": 1.3349409823581674e-05, - "loss": 0.0591, - "step": 39300 - }, - { - "epoch": 0.9977154461226044, - "grad_norm": 0.34786108136177063, - "learning_rate": 1.3348563692515972e-05, - "loss": 0.0526, - "step": 39305 - }, - { - "epoch": 0.9978423657824597, - "grad_norm": 0.5502266883850098, - "learning_rate": 1.334771756145027e-05, - "loss": 0.0592, - "step": 39310 - }, - { - "epoch": 0.997969285442315, - "grad_norm": 0.5257900357246399, - "learning_rate": 1.3346871430384567e-05, - "loss": 0.0637, - "step": 39315 - }, - { - "epoch": 0.9980962051021703, - "grad_norm": 3.438800573348999, - "learning_rate": 1.3346025299318866e-05, - "loss": 0.0635, - "step": 39320 - }, - { - "epoch": 0.9982231247620257, - "grad_norm": 0.3913041949272156, - "learning_rate": 1.3345179168253164e-05, - "loss": 0.0353, - "step": 39325 - }, - { - "epoch": 0.9983500444218809, - "grad_norm": 0.24032923579216003, - "learning_rate": 1.3344333037187463e-05, - "loss": 0.0474, - "step": 39330 - }, - { - "epoch": 0.9984769640817363, - "grad_norm": 0.4586259126663208, - "learning_rate": 1.334348690612176e-05, - "loss": 0.064, - "step": 39335 - }, - { - "epoch": 0.9986038837415916, - "grad_norm": 0.6787373423576355, - "learning_rate": 1.3342640775056058e-05, - "loss": 0.0497, - "step": 39340 - }, - { - "epoch": 0.9987308034014469, - "grad_norm": 0.5593823790550232, - "learning_rate": 1.3341794643990356e-05, - "loss": 0.0849, - "step": 39345 - }, - { - "epoch": 0.9988577230613022, - "grad_norm": 0.3211411237716675, - "learning_rate": 1.3340948512924654e-05, - "loss": 0.051, - "step": 39350 - }, - { - "epoch": 0.9989846427211575, - "grad_norm": 0.8242077827453613, - "learning_rate": 1.3340102381858951e-05, - "loss": 0.0448, - "step": 39355 - }, - { - "epoch": 0.9991115623810128, - "grad_norm": 0.5855270028114319, - "learning_rate": 1.333925625079325e-05, - "loss": 0.058, - "step": 39360 - }, - { - "epoch": 0.9992384820408682, - "grad_norm": 0.5718579292297363, - "learning_rate": 1.3338410119727548e-05, - "loss": 0.0587, - "step": 39365 - }, - { - "epoch": 0.9993654017007234, - "grad_norm": 0.6407716870307922, - "learning_rate": 1.3337563988661846e-05, - "loss": 0.0614, - "step": 39370 - }, - { - "epoch": 0.9994923213605788, - "grad_norm": 0.4896385967731476, - "learning_rate": 1.3336717857596141e-05, - "loss": 0.0545, - "step": 39375 - }, - { - "epoch": 0.9996192410204341, - "grad_norm": 2.4203813076019287, - "learning_rate": 1.333587172653044e-05, - "loss": 0.0625, - "step": 39380 - }, - { - "epoch": 0.9997461606802894, - "grad_norm": 0.6581799387931824, - "learning_rate": 1.3335025595464738e-05, - "loss": 0.057, - "step": 39385 - }, - { - "epoch": 0.9998730803401447, - "grad_norm": 0.4827825725078583, - "learning_rate": 1.3334179464399037e-05, - "loss": 0.0517, - "step": 39390 - }, - { - "epoch": 1.0, - "grad_norm": 0.4584924578666687, - "learning_rate": 1.3333333333333333e-05, - "loss": 0.0623, - "step": 39395 - }, - { - "epoch": 1.0, - "eval_loss": 0.18828971683979034, - "eval_runtime": 1245.6756, - "eval_samples_per_second": 100.347, - "eval_steps_per_second": 6.272, - "step": 39395 - }, - { - "epoch": 1.0001269196598552, - "grad_norm": 0.5291444063186646, - "learning_rate": 1.3332487202267632e-05, - "loss": 0.0633, - "step": 39400 - }, - { - "epoch": 1.0002538393197107, - "grad_norm": 0.683459997177124, - "learning_rate": 1.333164107120193e-05, - "loss": 0.0387, - "step": 39405 - }, - { - "epoch": 1.000380758979566, - "grad_norm": 0.6832085847854614, - "learning_rate": 1.3330794940136229e-05, - "loss": 0.0526, - "step": 39410 - }, - { - "epoch": 1.0005076786394211, - "grad_norm": 0.988511323928833, - "learning_rate": 1.3329948809070525e-05, - "loss": 0.0691, - "step": 39415 - }, - { - "epoch": 1.0006345982992766, - "grad_norm": 0.4375636577606201, - "learning_rate": 1.3329102678004824e-05, - "loss": 0.0512, - "step": 39420 - }, - { - "epoch": 1.0007615179591318, - "grad_norm": 0.3503415286540985, - "learning_rate": 1.3328256546939122e-05, - "loss": 0.045, - "step": 39425 - }, - { - "epoch": 1.0008884376189873, - "grad_norm": 0.43245381116867065, - "learning_rate": 1.332741041587342e-05, - "loss": 0.0745, - "step": 39430 - }, - { - "epoch": 1.0010153572788425, - "grad_norm": 0.6838481426239014, - "learning_rate": 1.3326564284807717e-05, - "loss": 0.0603, - "step": 39435 - }, - { - "epoch": 1.0011422769386977, - "grad_norm": 0.38460788130760193, - "learning_rate": 1.3325718153742016e-05, - "loss": 0.0391, - "step": 39440 - }, - { - "epoch": 1.0012691965985532, - "grad_norm": 0.7734371423721313, - "learning_rate": 1.3324872022676314e-05, - "loss": 0.0668, - "step": 39445 - }, - { - "epoch": 1.0013961162584084, - "grad_norm": 0.9789554476737976, - "learning_rate": 1.3324025891610612e-05, - "loss": 0.048, - "step": 39450 - }, - { - "epoch": 1.0015230359182636, - "grad_norm": 0.4985215365886688, - "learning_rate": 1.3323179760544909e-05, - "loss": 0.0376, - "step": 39455 - }, - { - "epoch": 1.001649955578119, - "grad_norm": 0.7156290411949158, - "learning_rate": 1.3322333629479207e-05, - "loss": 0.0639, - "step": 39460 - }, - { - "epoch": 1.0017768752379743, - "grad_norm": 0.4138924181461334, - "learning_rate": 1.3321487498413506e-05, - "loss": 0.0688, - "step": 39465 - }, - { - "epoch": 1.0019037948978298, - "grad_norm": 0.428419291973114, - "learning_rate": 1.3320641367347804e-05, - "loss": 0.0584, - "step": 39470 - }, - { - "epoch": 1.002030714557685, - "grad_norm": 1.1869888305664062, - "learning_rate": 1.3319795236282101e-05, - "loss": 0.0572, - "step": 39475 - }, - { - "epoch": 1.0021576342175402, - "grad_norm": 0.4797225892543793, - "learning_rate": 1.33189491052164e-05, - "loss": 0.0555, - "step": 39480 - }, - { - "epoch": 1.0022845538773957, - "grad_norm": 0.7549352049827576, - "learning_rate": 1.3318102974150698e-05, - "loss": 0.0603, - "step": 39485 - }, - { - "epoch": 1.002411473537251, - "grad_norm": 0.2528965473175049, - "learning_rate": 1.3317256843084996e-05, - "loss": 0.0532, - "step": 39490 - }, - { - "epoch": 1.0025383931971061, - "grad_norm": 0.45933017134666443, - "learning_rate": 1.3316410712019293e-05, - "loss": 0.0537, - "step": 39495 - }, - { - "epoch": 1.0026653128569616, - "grad_norm": 0.6904463171958923, - "learning_rate": 1.3315564580953591e-05, - "loss": 0.0821, - "step": 39500 - }, - { - "epoch": 1.0027922325168168, - "grad_norm": 0.6833702325820923, - "learning_rate": 1.331471844988789e-05, - "loss": 0.0588, - "step": 39505 - }, - { - "epoch": 1.0029191521766723, - "grad_norm": 0.5821878910064697, - "learning_rate": 1.3313872318822188e-05, - "loss": 0.0467, - "step": 39510 - }, - { - "epoch": 1.0030460718365275, - "grad_norm": 0.2886406481266022, - "learning_rate": 1.3313026187756483e-05, - "loss": 0.0505, - "step": 39515 - }, - { - "epoch": 1.0031729914963827, - "grad_norm": 0.35526591539382935, - "learning_rate": 1.3312180056690781e-05, - "loss": 0.057, - "step": 39520 - }, - { - "epoch": 1.0032999111562382, - "grad_norm": 0.43005213141441345, - "learning_rate": 1.331133392562508e-05, - "loss": 0.0658, - "step": 39525 - }, - { - "epoch": 1.0034268308160934, - "grad_norm": 0.4449928402900696, - "learning_rate": 1.3310487794559378e-05, - "loss": 0.0652, - "step": 39530 - }, - { - "epoch": 1.0035537504759486, - "grad_norm": 0.4650647044181824, - "learning_rate": 1.3309641663493675e-05, - "loss": 0.053, - "step": 39535 - }, - { - "epoch": 1.003680670135804, - "grad_norm": 0.41189268231391907, - "learning_rate": 1.3308795532427973e-05, - "loss": 0.0647, - "step": 39540 - }, - { - "epoch": 1.0038075897956593, - "grad_norm": 0.7327568531036377, - "learning_rate": 1.3307949401362272e-05, - "loss": 0.0663, - "step": 39545 - }, - { - "epoch": 1.0039345094555148, - "grad_norm": 0.46255946159362793, - "learning_rate": 1.330710327029657e-05, - "loss": 0.0732, - "step": 39550 - }, - { - "epoch": 1.00406142911537, - "grad_norm": 0.38971132040023804, - "learning_rate": 1.3306257139230867e-05, - "loss": 0.0508, - "step": 39555 - }, - { - "epoch": 1.0041883487752252, - "grad_norm": 0.18436095118522644, - "learning_rate": 1.3305411008165165e-05, - "loss": 0.0373, - "step": 39560 - }, - { - "epoch": 1.0043152684350807, - "grad_norm": 0.4304863512516022, - "learning_rate": 1.3304564877099464e-05, - "loss": 0.0496, - "step": 39565 - }, - { - "epoch": 1.004442188094936, - "grad_norm": 0.3358505368232727, - "learning_rate": 1.3303718746033762e-05, - "loss": 0.0538, - "step": 39570 - }, - { - "epoch": 1.0045691077547911, - "grad_norm": 0.7366676926612854, - "learning_rate": 1.3302872614968059e-05, - "loss": 0.037, - "step": 39575 - }, - { - "epoch": 1.0046960274146466, - "grad_norm": 0.4109572470188141, - "learning_rate": 1.3302026483902357e-05, - "loss": 0.0612, - "step": 39580 - }, - { - "epoch": 1.0048229470745018, - "grad_norm": 0.48878926038742065, - "learning_rate": 1.3301180352836656e-05, - "loss": 0.0468, - "step": 39585 - }, - { - "epoch": 1.004949866734357, - "grad_norm": 0.47287458181381226, - "learning_rate": 1.3300334221770954e-05, - "loss": 0.0447, - "step": 39590 - }, - { - "epoch": 1.0050767863942125, - "grad_norm": 0.35383495688438416, - "learning_rate": 1.329948809070525e-05, - "loss": 0.0392, - "step": 39595 - }, - { - "epoch": 1.0052037060540677, - "grad_norm": 0.7758587598800659, - "learning_rate": 1.3298641959639549e-05, - "loss": 0.0672, - "step": 39600 - }, - { - "epoch": 1.0053306257139232, - "grad_norm": 0.36080053448677063, - "learning_rate": 1.3297795828573848e-05, - "loss": 0.0582, - "step": 39605 - }, - { - "epoch": 1.0054575453737784, - "grad_norm": 0.47241687774658203, - "learning_rate": 1.3296949697508146e-05, - "loss": 0.0524, - "step": 39610 - }, - { - "epoch": 1.0055844650336336, - "grad_norm": 0.6147326231002808, - "learning_rate": 1.3296103566442443e-05, - "loss": 0.0508, - "step": 39615 - }, - { - "epoch": 1.005711384693489, - "grad_norm": 0.49105629324913025, - "learning_rate": 1.3295257435376741e-05, - "loss": 0.065, - "step": 39620 - }, - { - "epoch": 1.0058383043533443, - "grad_norm": 0.4448331594467163, - "learning_rate": 1.329441130431104e-05, - "loss": 0.0434, - "step": 39625 - }, - { - "epoch": 1.0059652240131995, - "grad_norm": 0.5199721455574036, - "learning_rate": 1.3293565173245338e-05, - "loss": 0.0634, - "step": 39630 - }, - { - "epoch": 1.006092143673055, - "grad_norm": 0.502418577671051, - "learning_rate": 1.3292719042179633e-05, - "loss": 0.0708, - "step": 39635 - }, - { - "epoch": 1.0062190633329102, - "grad_norm": 0.5298988819122314, - "learning_rate": 1.3291872911113933e-05, - "loss": 0.0457, - "step": 39640 - }, - { - "epoch": 1.0063459829927657, - "grad_norm": 0.2110983282327652, - "learning_rate": 1.3291026780048231e-05, - "loss": 0.0585, - "step": 39645 - }, - { - "epoch": 1.006472902652621, - "grad_norm": 0.36951741576194763, - "learning_rate": 1.329018064898253e-05, - "loss": 0.0609, - "step": 39650 - }, - { - "epoch": 1.0065998223124761, - "grad_norm": 0.45700618624687195, - "learning_rate": 1.3289334517916825e-05, - "loss": 0.067, - "step": 39655 - }, - { - "epoch": 1.0067267419723316, - "grad_norm": 0.4440871477127075, - "learning_rate": 1.3288488386851123e-05, - "loss": 0.0324, - "step": 39660 - }, - { - "epoch": 1.0068536616321868, - "grad_norm": 0.6056874990463257, - "learning_rate": 1.3287642255785422e-05, - "loss": 0.055, - "step": 39665 - }, - { - "epoch": 1.006980581292042, - "grad_norm": 0.45994412899017334, - "learning_rate": 1.328679612471972e-05, - "loss": 0.0588, - "step": 39670 - }, - { - "epoch": 1.0071075009518975, - "grad_norm": 0.42776650190353394, - "learning_rate": 1.3285949993654017e-05, - "loss": 0.0612, - "step": 39675 - }, - { - "epoch": 1.0072344206117527, - "grad_norm": 0.5028538107872009, - "learning_rate": 1.3285103862588315e-05, - "loss": 0.0552, - "step": 39680 - }, - { - "epoch": 1.0073613402716082, - "grad_norm": 0.4270574748516083, - "learning_rate": 1.3284257731522614e-05, - "loss": 0.0517, - "step": 39685 - }, - { - "epoch": 1.0074882599314634, - "grad_norm": 0.5270164608955383, - "learning_rate": 1.3283411600456912e-05, - "loss": 0.0594, - "step": 39690 - }, - { - "epoch": 1.0076151795913186, - "grad_norm": 0.39982521533966064, - "learning_rate": 1.3282565469391209e-05, - "loss": 0.0628, - "step": 39695 - }, - { - "epoch": 1.007742099251174, - "grad_norm": 0.4009515941143036, - "learning_rate": 1.3281719338325507e-05, - "loss": 0.0644, - "step": 39700 - }, - { - "epoch": 1.0078690189110293, - "grad_norm": 0.3483113944530487, - "learning_rate": 1.3280873207259805e-05, - "loss": 0.0514, - "step": 39705 - }, - { - "epoch": 1.0079959385708845, - "grad_norm": 1.071771502494812, - "learning_rate": 1.3280027076194104e-05, - "loss": 0.0597, - "step": 39710 - }, - { - "epoch": 1.00812285823074, - "grad_norm": 0.68867027759552, - "learning_rate": 1.32791809451284e-05, - "loss": 0.0451, - "step": 39715 - }, - { - "epoch": 1.0082497778905952, - "grad_norm": 0.42373475432395935, - "learning_rate": 1.3278334814062699e-05, - "loss": 0.0584, - "step": 39720 - }, - { - "epoch": 1.0083766975504507, - "grad_norm": 0.595788836479187, - "learning_rate": 1.3277488682996997e-05, - "loss": 0.0782, - "step": 39725 - }, - { - "epoch": 1.008503617210306, - "grad_norm": 0.4089776277542114, - "learning_rate": 1.3276642551931296e-05, - "loss": 0.0395, - "step": 39730 - }, - { - "epoch": 1.0086305368701611, - "grad_norm": 0.9869831800460815, - "learning_rate": 1.3275796420865592e-05, - "loss": 0.0627, - "step": 39735 - }, - { - "epoch": 1.0087574565300166, - "grad_norm": 0.37842071056365967, - "learning_rate": 1.327495028979989e-05, - "loss": 0.059, - "step": 39740 - }, - { - "epoch": 1.0088843761898718, - "grad_norm": 0.9097848534584045, - "learning_rate": 1.327410415873419e-05, - "loss": 0.078, - "step": 39745 - }, - { - "epoch": 1.009011295849727, - "grad_norm": 0.3902098536491394, - "learning_rate": 1.3273258027668488e-05, - "loss": 0.0608, - "step": 39750 - }, - { - "epoch": 1.0091382155095825, - "grad_norm": 0.6572629809379578, - "learning_rate": 1.3272411896602786e-05, - "loss": 0.0716, - "step": 39755 - }, - { - "epoch": 1.0092651351694377, - "grad_norm": 0.41523417830467224, - "learning_rate": 1.3271565765537083e-05, - "loss": 0.0545, - "step": 39760 - }, - { - "epoch": 1.009392054829293, - "grad_norm": 0.5606778860092163, - "learning_rate": 1.3270719634471381e-05, - "loss": 0.0519, - "step": 39765 - }, - { - "epoch": 1.0095189744891484, - "grad_norm": 0.6888303160667419, - "learning_rate": 1.326987350340568e-05, - "loss": 0.0548, - "step": 39770 - }, - { - "epoch": 1.0096458941490036, - "grad_norm": 0.4859393537044525, - "learning_rate": 1.3269027372339978e-05, - "loss": 0.0542, - "step": 39775 - }, - { - "epoch": 1.009772813808859, - "grad_norm": 0.5766547918319702, - "learning_rate": 1.3268181241274275e-05, - "loss": 0.0479, - "step": 39780 - }, - { - "epoch": 1.0098997334687143, - "grad_norm": 0.5290656089782715, - "learning_rate": 1.3267335110208573e-05, - "loss": 0.0598, - "step": 39785 - }, - { - "epoch": 1.0100266531285695, - "grad_norm": 0.48722946643829346, - "learning_rate": 1.3266488979142871e-05, - "loss": 0.0636, - "step": 39790 - }, - { - "epoch": 1.010153572788425, - "grad_norm": 0.45424166321754456, - "learning_rate": 1.326564284807717e-05, - "loss": 0.0584, - "step": 39795 - }, - { - "epoch": 1.0102804924482802, - "grad_norm": 0.601348340511322, - "learning_rate": 1.3264796717011465e-05, - "loss": 0.0494, - "step": 39800 - }, - { - "epoch": 1.0104074121081354, - "grad_norm": 0.4926494359970093, - "learning_rate": 1.3263950585945763e-05, - "loss": 0.0516, - "step": 39805 - }, - { - "epoch": 1.010534331767991, - "grad_norm": 0.5752704739570618, - "learning_rate": 1.3263104454880062e-05, - "loss": 0.0557, - "step": 39810 - }, - { - "epoch": 1.0106612514278461, - "grad_norm": 0.2388419508934021, - "learning_rate": 1.3262258323814362e-05, - "loss": 0.0656, - "step": 39815 - }, - { - "epoch": 1.0107881710877016, - "grad_norm": 0.4340057969093323, - "learning_rate": 1.3261412192748657e-05, - "loss": 0.0484, - "step": 39820 - }, - { - "epoch": 1.0109150907475568, - "grad_norm": 0.17737525701522827, - "learning_rate": 1.3260566061682955e-05, - "loss": 0.0443, - "step": 39825 - }, - { - "epoch": 1.011042010407412, - "grad_norm": 1.1961146593093872, - "learning_rate": 1.3259719930617254e-05, - "loss": 0.0485, - "step": 39830 - }, - { - "epoch": 1.0111689300672675, - "grad_norm": 0.42776450514793396, - "learning_rate": 1.3258873799551552e-05, - "loss": 0.059, - "step": 39835 - }, - { - "epoch": 1.0112958497271227, - "grad_norm": 0.30065682530403137, - "learning_rate": 1.3258027668485849e-05, - "loss": 0.0545, - "step": 39840 - }, - { - "epoch": 1.011422769386978, - "grad_norm": 0.45888084173202515, - "learning_rate": 1.3257181537420147e-05, - "loss": 0.0642, - "step": 39845 - }, - { - "epoch": 1.0115496890468334, - "grad_norm": 0.5002809166908264, - "learning_rate": 1.3256335406354446e-05, - "loss": 0.0571, - "step": 39850 - }, - { - "epoch": 1.0116766087066886, - "grad_norm": 0.41622453927993774, - "learning_rate": 1.3255489275288744e-05, - "loss": 0.0627, - "step": 39855 - }, - { - "epoch": 1.011803528366544, - "grad_norm": 0.6980414390563965, - "learning_rate": 1.325464314422304e-05, - "loss": 0.0583, - "step": 39860 - }, - { - "epoch": 1.0119304480263993, - "grad_norm": 0.43752795457839966, - "learning_rate": 1.3253797013157339e-05, - "loss": 0.052, - "step": 39865 - }, - { - "epoch": 1.0120573676862545, - "grad_norm": 0.6000775694847107, - "learning_rate": 1.3252950882091637e-05, - "loss": 0.0572, - "step": 39870 - }, - { - "epoch": 1.01218428734611, - "grad_norm": 0.30171480774879456, - "learning_rate": 1.3252104751025936e-05, - "loss": 0.046, - "step": 39875 - }, - { - "epoch": 1.0123112070059652, - "grad_norm": 0.4245207607746124, - "learning_rate": 1.3251258619960233e-05, - "loss": 0.052, - "step": 39880 - }, - { - "epoch": 1.0124381266658204, - "grad_norm": 0.6799716949462891, - "learning_rate": 1.3250412488894531e-05, - "loss": 0.0683, - "step": 39885 - }, - { - "epoch": 1.012565046325676, - "grad_norm": 0.44057655334472656, - "learning_rate": 1.324956635782883e-05, - "loss": 0.0399, - "step": 39890 - }, - { - "epoch": 1.0126919659855311, - "grad_norm": 0.2919107675552368, - "learning_rate": 1.3248720226763128e-05, - "loss": 0.0479, - "step": 39895 - }, - { - "epoch": 1.0128188856453866, - "grad_norm": 0.7431767582893372, - "learning_rate": 1.3247874095697424e-05, - "loss": 0.0862, - "step": 39900 - }, - { - "epoch": 1.0129458053052418, - "grad_norm": 0.6669244766235352, - "learning_rate": 1.3247027964631723e-05, - "loss": 0.0469, - "step": 39905 - }, - { - "epoch": 1.013072724965097, - "grad_norm": 0.5128422379493713, - "learning_rate": 1.3246181833566021e-05, - "loss": 0.0427, - "step": 39910 - }, - { - "epoch": 1.0131996446249525, - "grad_norm": 0.5426151156425476, - "learning_rate": 1.324533570250032e-05, - "loss": 0.0591, - "step": 39915 - }, - { - "epoch": 1.0133265642848077, - "grad_norm": 0.3822177052497864, - "learning_rate": 1.3244489571434616e-05, - "loss": 0.0602, - "step": 39920 - }, - { - "epoch": 1.013453483944663, - "grad_norm": 0.39572834968566895, - "learning_rate": 1.3243643440368915e-05, - "loss": 0.0638, - "step": 39925 - }, - { - "epoch": 1.0135804036045184, - "grad_norm": 0.3796148896217346, - "learning_rate": 1.3242797309303213e-05, - "loss": 0.0541, - "step": 39930 - }, - { - "epoch": 1.0137073232643736, - "grad_norm": 0.4959101676940918, - "learning_rate": 1.3241951178237512e-05, - "loss": 0.0666, - "step": 39935 - }, - { - "epoch": 1.0138342429242289, - "grad_norm": 1.4045029878616333, - "learning_rate": 1.3241105047171807e-05, - "loss": 0.0404, - "step": 39940 - }, - { - "epoch": 1.0139611625840843, - "grad_norm": 0.42170336842536926, - "learning_rate": 1.3240258916106105e-05, - "loss": 0.0566, - "step": 39945 - }, - { - "epoch": 1.0140880822439395, - "grad_norm": 0.6134588718414307, - "learning_rate": 1.3239412785040403e-05, - "loss": 0.0615, - "step": 39950 - }, - { - "epoch": 1.014215001903795, - "grad_norm": 0.6735240817070007, - "learning_rate": 1.3238566653974702e-05, - "loss": 0.0547, - "step": 39955 - }, - { - "epoch": 1.0143419215636502, - "grad_norm": 0.5755873322486877, - "learning_rate": 1.3237720522908999e-05, - "loss": 0.0451, - "step": 39960 - }, - { - "epoch": 1.0144688412235054, - "grad_norm": 0.4751157760620117, - "learning_rate": 1.3236874391843297e-05, - "loss": 0.0586, - "step": 39965 - }, - { - "epoch": 1.014595760883361, - "grad_norm": 0.6171771287918091, - "learning_rate": 1.3236028260777595e-05, - "loss": 0.0358, - "step": 39970 - }, - { - "epoch": 1.0147226805432161, - "grad_norm": 0.7189069986343384, - "learning_rate": 1.3235182129711894e-05, - "loss": 0.085, - "step": 39975 - }, - { - "epoch": 1.0148496002030714, - "grad_norm": 0.31406351923942566, - "learning_rate": 1.323433599864619e-05, - "loss": 0.0637, - "step": 39980 - }, - { - "epoch": 1.0149765198629268, - "grad_norm": 0.5617179870605469, - "learning_rate": 1.3233489867580489e-05, - "loss": 0.0491, - "step": 39985 - }, - { - "epoch": 1.015103439522782, - "grad_norm": 0.41398271918296814, - "learning_rate": 1.3232643736514787e-05, - "loss": 0.0477, - "step": 39990 - }, - { - "epoch": 1.0152303591826375, - "grad_norm": 0.4426698684692383, - "learning_rate": 1.3231797605449086e-05, - "loss": 0.0732, - "step": 39995 - }, - { - "epoch": 1.0153572788424927, - "grad_norm": 0.6221765875816345, - "learning_rate": 1.3230951474383382e-05, - "loss": 0.0736, - "step": 40000 - }, - { - "epoch": 1.015484198502348, - "grad_norm": 3.715949535369873, - "learning_rate": 1.323010534331768e-05, - "loss": 0.0571, - "step": 40005 - }, - { - "epoch": 1.0156111181622034, - "grad_norm": 0.4172447621822357, - "learning_rate": 1.3229259212251979e-05, - "loss": 0.0574, - "step": 40010 - }, - { - "epoch": 1.0157380378220586, - "grad_norm": 0.4029063284397125, - "learning_rate": 1.3228413081186278e-05, - "loss": 0.0498, - "step": 40015 - }, - { - "epoch": 1.0158649574819139, - "grad_norm": 0.29804176092147827, - "learning_rate": 1.3227566950120574e-05, - "loss": 0.0551, - "step": 40020 - }, - { - "epoch": 1.0159918771417693, - "grad_norm": 0.4545416235923767, - "learning_rate": 1.3226720819054873e-05, - "loss": 0.0698, - "step": 40025 - }, - { - "epoch": 1.0161187968016245, - "grad_norm": 0.38006332516670227, - "learning_rate": 1.3225874687989171e-05, - "loss": 0.0477, - "step": 40030 - }, - { - "epoch": 1.01624571646148, - "grad_norm": 0.5668849349021912, - "learning_rate": 1.322502855692347e-05, - "loss": 0.0636, - "step": 40035 - }, - { - "epoch": 1.0163726361213352, - "grad_norm": 0.47415682673454285, - "learning_rate": 1.3224182425857766e-05, - "loss": 0.063, - "step": 40040 - }, - { - "epoch": 1.0164995557811904, - "grad_norm": 0.5188112854957581, - "learning_rate": 1.3223336294792065e-05, - "loss": 0.0621, - "step": 40045 - }, - { - "epoch": 1.016626475441046, - "grad_norm": 0.54042649269104, - "learning_rate": 1.3222490163726363e-05, - "loss": 0.0728, - "step": 40050 - }, - { - "epoch": 1.0167533951009011, - "grad_norm": 0.3710640072822571, - "learning_rate": 1.3221644032660661e-05, - "loss": 0.087, - "step": 40055 - }, - { - "epoch": 1.0168803147607564, - "grad_norm": 0.517937958240509, - "learning_rate": 1.3220797901594958e-05, - "loss": 0.0593, - "step": 40060 - }, - { - "epoch": 1.0170072344206118, - "grad_norm": 0.693785548210144, - "learning_rate": 1.3219951770529256e-05, - "loss": 0.0561, - "step": 40065 - }, - { - "epoch": 1.017134154080467, - "grad_norm": 0.35073375701904297, - "learning_rate": 1.3219105639463555e-05, - "loss": 0.0373, - "step": 40070 - }, - { - "epoch": 1.0172610737403223, - "grad_norm": 0.5563818216323853, - "learning_rate": 1.3218259508397853e-05, - "loss": 0.0584, - "step": 40075 - }, - { - "epoch": 1.0173879934001777, - "grad_norm": 0.28958022594451904, - "learning_rate": 1.3217413377332148e-05, - "loss": 0.0578, - "step": 40080 - }, - { - "epoch": 1.017514913060033, - "grad_norm": 0.5431613922119141, - "learning_rate": 1.3216567246266447e-05, - "loss": 0.059, - "step": 40085 - }, - { - "epoch": 1.0176418327198884, - "grad_norm": 0.43254074454307556, - "learning_rate": 1.3215721115200745e-05, - "loss": 0.0529, - "step": 40090 - }, - { - "epoch": 1.0177687523797436, - "grad_norm": 0.7585704326629639, - "learning_rate": 1.3214874984135044e-05, - "loss": 0.0824, - "step": 40095 - }, - { - "epoch": 1.0178956720395989, - "grad_norm": 0.42965167760849, - "learning_rate": 1.321402885306934e-05, - "loss": 0.0335, - "step": 40100 - }, - { - "epoch": 1.0180225916994543, - "grad_norm": 0.5238553285598755, - "learning_rate": 1.3213182722003639e-05, - "loss": 0.0578, - "step": 40105 - }, - { - "epoch": 1.0181495113593095, - "grad_norm": 0.4899773895740509, - "learning_rate": 1.3212336590937937e-05, - "loss": 0.0665, - "step": 40110 - }, - { - "epoch": 1.0182764310191648, - "grad_norm": 0.7925177812576294, - "learning_rate": 1.3211490459872235e-05, - "loss": 0.0611, - "step": 40115 - }, - { - "epoch": 1.0184033506790202, - "grad_norm": 0.27535685896873474, - "learning_rate": 1.3210644328806532e-05, - "loss": 0.0496, - "step": 40120 - }, - { - "epoch": 1.0185302703388754, - "grad_norm": 0.9689750671386719, - "learning_rate": 1.320979819774083e-05, - "loss": 0.0543, - "step": 40125 - }, - { - "epoch": 1.018657189998731, - "grad_norm": 0.7085169553756714, - "learning_rate": 1.3208952066675129e-05, - "loss": 0.0581, - "step": 40130 - }, - { - "epoch": 1.0187841096585861, - "grad_norm": 0.7547633647918701, - "learning_rate": 1.3208105935609427e-05, - "loss": 0.0586, - "step": 40135 - }, - { - "epoch": 1.0189110293184414, - "grad_norm": 0.5993574857711792, - "learning_rate": 1.3207259804543724e-05, - "loss": 0.0568, - "step": 40140 - }, - { - "epoch": 1.0190379489782968, - "grad_norm": 0.39146748185157776, - "learning_rate": 1.3206413673478022e-05, - "loss": 0.048, - "step": 40145 - }, - { - "epoch": 1.019164868638152, - "grad_norm": 0.34194138646125793, - "learning_rate": 1.3205567542412321e-05, - "loss": 0.0558, - "step": 40150 - }, - { - "epoch": 1.0192917882980073, - "grad_norm": 0.4659612476825714, - "learning_rate": 1.320472141134662e-05, - "loss": 0.0597, - "step": 40155 - }, - { - "epoch": 1.0194187079578627, - "grad_norm": 0.501904308795929, - "learning_rate": 1.3203875280280916e-05, - "loss": 0.0625, - "step": 40160 - }, - { - "epoch": 1.019545627617718, - "grad_norm": 0.4502047598361969, - "learning_rate": 1.3203029149215214e-05, - "loss": 0.0693, - "step": 40165 - }, - { - "epoch": 1.0196725472775734, - "grad_norm": 0.47188329696655273, - "learning_rate": 1.3202183018149513e-05, - "loss": 0.0622, - "step": 40170 - }, - { - "epoch": 1.0197994669374286, - "grad_norm": 0.5226172208786011, - "learning_rate": 1.3201336887083811e-05, - "loss": 0.0566, - "step": 40175 - }, - { - "epoch": 1.0199263865972839, - "grad_norm": 0.5645157694816589, - "learning_rate": 1.3200490756018108e-05, - "loss": 0.0542, - "step": 40180 - }, - { - "epoch": 1.0200533062571393, - "grad_norm": 0.43624719977378845, - "learning_rate": 1.3199644624952406e-05, - "loss": 0.0632, - "step": 40185 - }, - { - "epoch": 1.0201802259169945, - "grad_norm": 1.1869235038757324, - "learning_rate": 1.3198798493886705e-05, - "loss": 0.0579, - "step": 40190 - }, - { - "epoch": 1.0203071455768498, - "grad_norm": 0.34499451518058777, - "learning_rate": 1.3197952362821003e-05, - "loss": 0.0531, - "step": 40195 - }, - { - "epoch": 1.0204340652367052, - "grad_norm": 0.4393627345561981, - "learning_rate": 1.3197106231755298e-05, - "loss": 0.0399, - "step": 40200 - }, - { - "epoch": 1.0205609848965604, - "grad_norm": 1.2753499746322632, - "learning_rate": 1.3196260100689598e-05, - "loss": 0.0605, - "step": 40205 - }, - { - "epoch": 1.020687904556416, - "grad_norm": 0.39626288414001465, - "learning_rate": 1.3195413969623897e-05, - "loss": 0.0652, - "step": 40210 - }, - { - "epoch": 1.0208148242162711, - "grad_norm": 0.5607436895370483, - "learning_rate": 1.3194567838558195e-05, - "loss": 0.0474, - "step": 40215 - }, - { - "epoch": 1.0209417438761264, - "grad_norm": 0.3512641489505768, - "learning_rate": 1.319372170749249e-05, - "loss": 0.0558, - "step": 40220 - }, - { - "epoch": 1.0210686635359818, - "grad_norm": 0.4654415249824524, - "learning_rate": 1.3192875576426788e-05, - "loss": 0.0547, - "step": 40225 - }, - { - "epoch": 1.021195583195837, - "grad_norm": 0.4180464744567871, - "learning_rate": 1.3192029445361087e-05, - "loss": 0.0288, - "step": 40230 - }, - { - "epoch": 1.0213225028556923, - "grad_norm": 0.539047360420227, - "learning_rate": 1.3191183314295385e-05, - "loss": 0.0764, - "step": 40235 - }, - { - "epoch": 1.0214494225155477, - "grad_norm": 0.5122261643409729, - "learning_rate": 1.3190337183229682e-05, - "loss": 0.045, - "step": 40240 - }, - { - "epoch": 1.021576342175403, - "grad_norm": 0.7635321617126465, - "learning_rate": 1.318949105216398e-05, - "loss": 0.0785, - "step": 40245 - }, - { - "epoch": 1.0217032618352584, - "grad_norm": 0.39970117807388306, - "learning_rate": 1.3188644921098279e-05, - "loss": 0.0501, - "step": 40250 - }, - { - "epoch": 1.0218301814951136, - "grad_norm": 0.6380658149719238, - "learning_rate": 1.3187798790032577e-05, - "loss": 0.0553, - "step": 40255 - }, - { - "epoch": 1.0219571011549688, - "grad_norm": 0.4979270398616791, - "learning_rate": 1.3186952658966876e-05, - "loss": 0.0597, - "step": 40260 - }, - { - "epoch": 1.0220840208148243, - "grad_norm": 0.9343898892402649, - "learning_rate": 1.3186106527901172e-05, - "loss": 0.055, - "step": 40265 - }, - { - "epoch": 1.0222109404746795, - "grad_norm": 0.4481163024902344, - "learning_rate": 1.318526039683547e-05, - "loss": 0.0555, - "step": 40270 - }, - { - "epoch": 1.0223378601345348, - "grad_norm": 0.3293524980545044, - "learning_rate": 1.3184414265769769e-05, - "loss": 0.0547, - "step": 40275 - }, - { - "epoch": 1.0224647797943902, - "grad_norm": 0.40204760432243347, - "learning_rate": 1.3183568134704067e-05, - "loss": 0.0476, - "step": 40280 - }, - { - "epoch": 1.0225916994542454, - "grad_norm": 0.48302963376045227, - "learning_rate": 1.3182722003638364e-05, - "loss": 0.048, - "step": 40285 - }, - { - "epoch": 1.0227186191141007, - "grad_norm": 0.8360553979873657, - "learning_rate": 1.3181875872572663e-05, - "loss": 0.0644, - "step": 40290 - }, - { - "epoch": 1.0228455387739561, - "grad_norm": 0.4473345875740051, - "learning_rate": 1.3181029741506961e-05, - "loss": 0.0636, - "step": 40295 - }, - { - "epoch": 1.0229724584338113, - "grad_norm": 0.48262667655944824, - "learning_rate": 1.318018361044126e-05, - "loss": 0.0777, - "step": 40300 - }, - { - "epoch": 1.0230993780936668, - "grad_norm": 0.34904706478118896, - "learning_rate": 1.3179337479375556e-05, - "loss": 0.0594, - "step": 40305 - }, - { - "epoch": 1.023226297753522, - "grad_norm": 0.3099619150161743, - "learning_rate": 1.3178491348309854e-05, - "loss": 0.0618, - "step": 40310 - }, - { - "epoch": 1.0233532174133773, - "grad_norm": 0.5296275019645691, - "learning_rate": 1.3177645217244153e-05, - "loss": 0.065, - "step": 40315 - }, - { - "epoch": 1.0234801370732327, - "grad_norm": 0.3603265583515167, - "learning_rate": 1.3176799086178451e-05, - "loss": 0.0666, - "step": 40320 - }, - { - "epoch": 1.023607056733088, - "grad_norm": 0.4547267258167267, - "learning_rate": 1.3175952955112748e-05, - "loss": 0.0399, - "step": 40325 - }, - { - "epoch": 1.0237339763929432, - "grad_norm": 0.5154409408569336, - "learning_rate": 1.3175106824047046e-05, - "loss": 0.0447, - "step": 40330 - }, - { - "epoch": 1.0238608960527986, - "grad_norm": 0.4056219756603241, - "learning_rate": 1.3174260692981345e-05, - "loss": 0.0592, - "step": 40335 - }, - { - "epoch": 1.0239878157126538, - "grad_norm": 0.7250158190727234, - "learning_rate": 1.3173414561915643e-05, - "loss": 0.0586, - "step": 40340 - }, - { - "epoch": 1.0241147353725093, - "grad_norm": 0.49917295575141907, - "learning_rate": 1.317256843084994e-05, - "loss": 0.0489, - "step": 40345 - }, - { - "epoch": 1.0242416550323645, - "grad_norm": 0.5284388065338135, - "learning_rate": 1.3171722299784238e-05, - "loss": 0.057, - "step": 40350 - }, - { - "epoch": 1.0243685746922198, - "grad_norm": 0.4083913564682007, - "learning_rate": 1.3170876168718537e-05, - "loss": 0.0565, - "step": 40355 - }, - { - "epoch": 1.0244954943520752, - "grad_norm": 0.38513270020484924, - "learning_rate": 1.3170030037652835e-05, - "loss": 0.0407, - "step": 40360 - }, - { - "epoch": 1.0246224140119304, - "grad_norm": 0.593148410320282, - "learning_rate": 1.316918390658713e-05, - "loss": 0.0598, - "step": 40365 - }, - { - "epoch": 1.0247493336717857, - "grad_norm": 0.4632100462913513, - "learning_rate": 1.3168337775521429e-05, - "loss": 0.0508, - "step": 40370 - }, - { - "epoch": 1.0248762533316411, - "grad_norm": 0.6384402513504028, - "learning_rate": 1.3167491644455727e-05, - "loss": 0.0541, - "step": 40375 - }, - { - "epoch": 1.0250031729914963, - "grad_norm": 0.7770858407020569, - "learning_rate": 1.3166645513390027e-05, - "loss": 0.0481, - "step": 40380 - }, - { - "epoch": 1.0251300926513518, - "grad_norm": 0.4002634584903717, - "learning_rate": 1.3165799382324322e-05, - "loss": 0.0454, - "step": 40385 - }, - { - "epoch": 1.025257012311207, - "grad_norm": 0.6422095894813538, - "learning_rate": 1.316495325125862e-05, - "loss": 0.0433, - "step": 40390 - }, - { - "epoch": 1.0253839319710623, - "grad_norm": 0.6139222383499146, - "learning_rate": 1.3164107120192919e-05, - "loss": 0.0583, - "step": 40395 - }, - { - "epoch": 1.0255108516309177, - "grad_norm": 0.6438016295433044, - "learning_rate": 1.3163260989127217e-05, - "loss": 0.049, - "step": 40400 - }, - { - "epoch": 1.025637771290773, - "grad_norm": 0.48905715346336365, - "learning_rate": 1.3162414858061514e-05, - "loss": 0.0569, - "step": 40405 - }, - { - "epoch": 1.0257646909506282, - "grad_norm": 0.5039765238761902, - "learning_rate": 1.3161568726995812e-05, - "loss": 0.0472, - "step": 40410 - }, - { - "epoch": 1.0258916106104836, - "grad_norm": 0.5103529691696167, - "learning_rate": 1.316072259593011e-05, - "loss": 0.0493, - "step": 40415 - }, - { - "epoch": 1.0260185302703388, - "grad_norm": 0.7642925381660461, - "learning_rate": 1.3159876464864409e-05, - "loss": 0.0402, - "step": 40420 - }, - { - "epoch": 1.026145449930194, - "grad_norm": 0.5751630067825317, - "learning_rate": 1.3159030333798706e-05, - "loss": 0.0571, - "step": 40425 - }, - { - "epoch": 1.0262723695900495, - "grad_norm": 0.49819403886795044, - "learning_rate": 1.3158184202733004e-05, - "loss": 0.054, - "step": 40430 - }, - { - "epoch": 1.0263992892499048, - "grad_norm": 0.3899089992046356, - "learning_rate": 1.3157338071667303e-05, - "loss": 0.0279, - "step": 40435 - }, - { - "epoch": 1.0265262089097602, - "grad_norm": 0.7125265598297119, - "learning_rate": 1.3156491940601601e-05, - "loss": 0.0695, - "step": 40440 - }, - { - "epoch": 1.0266531285696154, - "grad_norm": 0.3489779829978943, - "learning_rate": 1.3155645809535898e-05, - "loss": 0.0448, - "step": 40445 - }, - { - "epoch": 1.0267800482294707, - "grad_norm": 0.4405103027820587, - "learning_rate": 1.3154799678470196e-05, - "loss": 0.045, - "step": 40450 - }, - { - "epoch": 1.0269069678893261, - "grad_norm": 0.5723069906234741, - "learning_rate": 1.3153953547404495e-05, - "loss": 0.0623, - "step": 40455 - }, - { - "epoch": 1.0270338875491813, - "grad_norm": 0.5829423666000366, - "learning_rate": 1.3153107416338793e-05, - "loss": 0.0522, - "step": 40460 - }, - { - "epoch": 1.0271608072090366, - "grad_norm": 0.48934951424598694, - "learning_rate": 1.315226128527309e-05, - "loss": 0.0694, - "step": 40465 - }, - { - "epoch": 1.027287726868892, - "grad_norm": 0.3294101357460022, - "learning_rate": 1.3151415154207388e-05, - "loss": 0.0485, - "step": 40470 - }, - { - "epoch": 1.0274146465287473, - "grad_norm": 0.8989230990409851, - "learning_rate": 1.3150569023141686e-05, - "loss": 0.064, - "step": 40475 - }, - { - "epoch": 1.0275415661886027, - "grad_norm": 0.5629283785820007, - "learning_rate": 1.3149722892075985e-05, - "loss": 0.0691, - "step": 40480 - }, - { - "epoch": 1.027668485848458, - "grad_norm": 0.5412095189094543, - "learning_rate": 1.3148876761010282e-05, - "loss": 0.0826, - "step": 40485 - }, - { - "epoch": 1.0277954055083132, - "grad_norm": 0.5287240147590637, - "learning_rate": 1.314803062994458e-05, - "loss": 0.053, - "step": 40490 - }, - { - "epoch": 1.0279223251681686, - "grad_norm": 0.8939244151115417, - "learning_rate": 1.3147184498878878e-05, - "loss": 0.0376, - "step": 40495 - }, - { - "epoch": 1.0280492448280238, - "grad_norm": 0.6483488082885742, - "learning_rate": 1.3146338367813177e-05, - "loss": 0.0728, - "step": 40500 - }, - { - "epoch": 1.028176164487879, - "grad_norm": 0.39960306882858276, - "learning_rate": 1.3145492236747472e-05, - "loss": 0.0515, - "step": 40505 - }, - { - "epoch": 1.0283030841477345, - "grad_norm": 0.5591258406639099, - "learning_rate": 1.314464610568177e-05, - "loss": 0.0358, - "step": 40510 - }, - { - "epoch": 1.0284300038075898, - "grad_norm": 1.578360676765442, - "learning_rate": 1.3143799974616069e-05, - "loss": 0.0764, - "step": 40515 - }, - { - "epoch": 1.0285569234674452, - "grad_norm": 0.31684452295303345, - "learning_rate": 1.3142953843550367e-05, - "loss": 0.045, - "step": 40520 - }, - { - "epoch": 1.0286838431273004, - "grad_norm": 1.068755030632019, - "learning_rate": 1.3142107712484664e-05, - "loss": 0.0614, - "step": 40525 - }, - { - "epoch": 1.0288107627871557, - "grad_norm": 1.271463394165039, - "learning_rate": 1.3141261581418962e-05, - "loss": 0.0493, - "step": 40530 - }, - { - "epoch": 1.0289376824470111, - "grad_norm": 0.636859118938446, - "learning_rate": 1.314041545035326e-05, - "loss": 0.0595, - "step": 40535 - }, - { - "epoch": 1.0290646021068663, - "grad_norm": 0.5615971088409424, - "learning_rate": 1.3139569319287559e-05, - "loss": 0.0529, - "step": 40540 - }, - { - "epoch": 1.0291915217667216, - "grad_norm": 0.6403688788414001, - "learning_rate": 1.3138723188221856e-05, - "loss": 0.0545, - "step": 40545 - }, - { - "epoch": 1.029318441426577, - "grad_norm": 0.4895595610141754, - "learning_rate": 1.3137877057156154e-05, - "loss": 0.0511, - "step": 40550 - }, - { - "epoch": 1.0294453610864323, - "grad_norm": 0.7288350462913513, - "learning_rate": 1.3137030926090452e-05, - "loss": 0.0435, - "step": 40555 - }, - { - "epoch": 1.0295722807462877, - "grad_norm": 0.7279900908470154, - "learning_rate": 1.3136184795024751e-05, - "loss": 0.0617, - "step": 40560 - }, - { - "epoch": 1.029699200406143, - "grad_norm": 0.39001378417015076, - "learning_rate": 1.3135338663959048e-05, - "loss": 0.0687, - "step": 40565 - }, - { - "epoch": 1.0298261200659982, - "grad_norm": 0.42589154839515686, - "learning_rate": 1.3134492532893346e-05, - "loss": 0.0614, - "step": 40570 - }, - { - "epoch": 1.0299530397258536, - "grad_norm": 0.41593098640441895, - "learning_rate": 1.3133646401827644e-05, - "loss": 0.0663, - "step": 40575 - }, - { - "epoch": 1.0300799593857088, - "grad_norm": 0.37593135237693787, - "learning_rate": 1.3132800270761943e-05, - "loss": 0.0537, - "step": 40580 - }, - { - "epoch": 1.030206879045564, - "grad_norm": 1.1812822818756104, - "learning_rate": 1.313195413969624e-05, - "loss": 0.0599, - "step": 40585 - }, - { - "epoch": 1.0303337987054195, - "grad_norm": 0.44667312502861023, - "learning_rate": 1.3131108008630538e-05, - "loss": 0.049, - "step": 40590 - }, - { - "epoch": 1.0304607183652748, - "grad_norm": 0.8215839862823486, - "learning_rate": 1.3130261877564836e-05, - "loss": 0.0568, - "step": 40595 - }, - { - "epoch": 1.0305876380251302, - "grad_norm": 0.5345288515090942, - "learning_rate": 1.3129415746499135e-05, - "loss": 0.0673, - "step": 40600 - }, - { - "epoch": 1.0307145576849854, - "grad_norm": 0.28709980845451355, - "learning_rate": 1.3128569615433431e-05, - "loss": 0.0429, - "step": 40605 - }, - { - "epoch": 1.0308414773448407, - "grad_norm": 0.6352481842041016, - "learning_rate": 1.312772348436773e-05, - "loss": 0.0535, - "step": 40610 - }, - { - "epoch": 1.0309683970046961, - "grad_norm": 0.3844361901283264, - "learning_rate": 1.3126877353302028e-05, - "loss": 0.0429, - "step": 40615 - }, - { - "epoch": 1.0310953166645513, - "grad_norm": 0.7225225567817688, - "learning_rate": 1.3126031222236327e-05, - "loss": 0.0672, - "step": 40620 - }, - { - "epoch": 1.0312222363244066, - "grad_norm": 1.085951566696167, - "learning_rate": 1.3125185091170622e-05, - "loss": 0.0627, - "step": 40625 - }, - { - "epoch": 1.031349155984262, - "grad_norm": 0.8149519562721252, - "learning_rate": 1.3124338960104922e-05, - "loss": 0.0586, - "step": 40630 - }, - { - "epoch": 1.0314760756441173, - "grad_norm": 0.5709579586982727, - "learning_rate": 1.312349282903922e-05, - "loss": 0.0605, - "step": 40635 - }, - { - "epoch": 1.0316029953039725, - "grad_norm": 0.4963061511516571, - "learning_rate": 1.3122646697973519e-05, - "loss": 0.0674, - "step": 40640 - }, - { - "epoch": 1.031729914963828, - "grad_norm": 0.31051504611968994, - "learning_rate": 1.3121800566907814e-05, - "loss": 0.045, - "step": 40645 - }, - { - "epoch": 1.0318568346236832, - "grad_norm": 0.5565371513366699, - "learning_rate": 1.3120954435842112e-05, - "loss": 0.062, - "step": 40650 - }, - { - "epoch": 1.0319837542835386, - "grad_norm": 0.4175560176372528, - "learning_rate": 1.312010830477641e-05, - "loss": 0.0419, - "step": 40655 - }, - { - "epoch": 1.0321106739433938, - "grad_norm": 0.3849213123321533, - "learning_rate": 1.3119262173710709e-05, - "loss": 0.0581, - "step": 40660 - }, - { - "epoch": 1.032237593603249, - "grad_norm": 0.6354408860206604, - "learning_rate": 1.3118416042645005e-05, - "loss": 0.0523, - "step": 40665 - }, - { - "epoch": 1.0323645132631045, - "grad_norm": 0.5572760701179504, - "learning_rate": 1.3117569911579304e-05, - "loss": 0.0688, - "step": 40670 - }, - { - "epoch": 1.0324914329229598, - "grad_norm": 0.47032082080841064, - "learning_rate": 1.3116723780513602e-05, - "loss": 0.0551, - "step": 40675 - }, - { - "epoch": 1.032618352582815, - "grad_norm": 0.4422062337398529, - "learning_rate": 1.31158776494479e-05, - "loss": 0.0414, - "step": 40680 - }, - { - "epoch": 1.0327452722426704, - "grad_norm": 0.3828607499599457, - "learning_rate": 1.3115031518382197e-05, - "loss": 0.0637, - "step": 40685 - }, - { - "epoch": 1.0328721919025257, - "grad_norm": 0.48387739062309265, - "learning_rate": 1.3114185387316496e-05, - "loss": 0.0548, - "step": 40690 - }, - { - "epoch": 1.0329991115623811, - "grad_norm": 0.6456672549247742, - "learning_rate": 1.3113339256250794e-05, - "loss": 0.0548, - "step": 40695 - }, - { - "epoch": 1.0331260312222363, - "grad_norm": 0.7888874411582947, - "learning_rate": 1.3112493125185093e-05, - "loss": 0.0711, - "step": 40700 - }, - { - "epoch": 1.0332529508820916, - "grad_norm": 0.43859273195266724, - "learning_rate": 1.311164699411939e-05, - "loss": 0.0681, - "step": 40705 - }, - { - "epoch": 1.033379870541947, - "grad_norm": 0.5580222010612488, - "learning_rate": 1.3110800863053688e-05, - "loss": 0.056, - "step": 40710 - }, - { - "epoch": 1.0335067902018023, - "grad_norm": 0.42131930589675903, - "learning_rate": 1.3109954731987986e-05, - "loss": 0.0626, - "step": 40715 - }, - { - "epoch": 1.0336337098616575, - "grad_norm": 0.3395465612411499, - "learning_rate": 1.3109108600922284e-05, - "loss": 0.0419, - "step": 40720 - }, - { - "epoch": 1.033760629521513, - "grad_norm": 0.5501566529273987, - "learning_rate": 1.3108262469856581e-05, - "loss": 0.068, - "step": 40725 - }, - { - "epoch": 1.0338875491813682, - "grad_norm": 0.5833454132080078, - "learning_rate": 1.310741633879088e-05, - "loss": 0.0686, - "step": 40730 - }, - { - "epoch": 1.0340144688412236, - "grad_norm": 0.3969499468803406, - "learning_rate": 1.3106570207725178e-05, - "loss": 0.0456, - "step": 40735 - }, - { - "epoch": 1.0341413885010788, - "grad_norm": 0.4157037138938904, - "learning_rate": 1.3105724076659476e-05, - "loss": 0.0538, - "step": 40740 - }, - { - "epoch": 1.034268308160934, - "grad_norm": 0.46883460879325867, - "learning_rate": 1.3104877945593773e-05, - "loss": 0.0506, - "step": 40745 - }, - { - "epoch": 1.0343952278207895, - "grad_norm": 0.36312970519065857, - "learning_rate": 1.3104031814528071e-05, - "loss": 0.0487, - "step": 40750 - }, - { - "epoch": 1.0345221474806447, - "grad_norm": 0.38549262285232544, - "learning_rate": 1.310318568346237e-05, - "loss": 0.0625, - "step": 40755 - }, - { - "epoch": 1.0346490671405, - "grad_norm": 0.5959295034408569, - "learning_rate": 1.3102339552396668e-05, - "loss": 0.0574, - "step": 40760 - }, - { - "epoch": 1.0347759868003554, - "grad_norm": 0.5131233930587769, - "learning_rate": 1.3101493421330963e-05, - "loss": 0.0609, - "step": 40765 - }, - { - "epoch": 1.0349029064602107, - "grad_norm": 0.25007039308547974, - "learning_rate": 1.3100647290265263e-05, - "loss": 0.0582, - "step": 40770 - }, - { - "epoch": 1.0350298261200659, - "grad_norm": 0.6663190126419067, - "learning_rate": 1.3099801159199562e-05, - "loss": 0.052, - "step": 40775 - }, - { - "epoch": 1.0351567457799213, - "grad_norm": 0.41600796580314636, - "learning_rate": 1.309895502813386e-05, - "loss": 0.045, - "step": 40780 - }, - { - "epoch": 1.0352836654397766, - "grad_norm": 0.6051861047744751, - "learning_rate": 1.3098108897068159e-05, - "loss": 0.0584, - "step": 40785 - }, - { - "epoch": 1.035410585099632, - "grad_norm": 0.3984309434890747, - "learning_rate": 1.3097262766002454e-05, - "loss": 0.05, - "step": 40790 - }, - { - "epoch": 1.0355375047594872, - "grad_norm": 0.6245915293693542, - "learning_rate": 1.3096416634936752e-05, - "loss": 0.0695, - "step": 40795 - }, - { - "epoch": 1.0356644244193425, - "grad_norm": 0.632346510887146, - "learning_rate": 1.309557050387105e-05, - "loss": 0.055, - "step": 40800 - }, - { - "epoch": 1.035791344079198, - "grad_norm": 0.36502668261528015, - "learning_rate": 1.309472437280535e-05, - "loss": 0.048, - "step": 40805 - }, - { - "epoch": 1.0359182637390532, - "grad_norm": 0.6485906839370728, - "learning_rate": 1.3093878241739646e-05, - "loss": 0.0558, - "step": 40810 - }, - { - "epoch": 1.0360451833989084, - "grad_norm": 0.5418567657470703, - "learning_rate": 1.3093032110673944e-05, - "loss": 0.0526, - "step": 40815 - }, - { - "epoch": 1.0361721030587638, - "grad_norm": 0.2968630790710449, - "learning_rate": 1.3092185979608242e-05, - "loss": 0.0591, - "step": 40820 - }, - { - "epoch": 1.036299022718619, - "grad_norm": 0.3394763171672821, - "learning_rate": 1.309133984854254e-05, - "loss": 0.0521, - "step": 40825 - }, - { - "epoch": 1.0364259423784745, - "grad_norm": 0.7563051581382751, - "learning_rate": 1.3090493717476837e-05, - "loss": 0.0435, - "step": 40830 - }, - { - "epoch": 1.0365528620383297, - "grad_norm": 0.4781455993652344, - "learning_rate": 1.3089647586411136e-05, - "loss": 0.0398, - "step": 40835 - }, - { - "epoch": 1.036679781698185, - "grad_norm": 0.47994834184646606, - "learning_rate": 1.3088801455345434e-05, - "loss": 0.0543, - "step": 40840 - }, - { - "epoch": 1.0368067013580404, - "grad_norm": 0.5322094559669495, - "learning_rate": 1.3087955324279733e-05, - "loss": 0.0468, - "step": 40845 - }, - { - "epoch": 1.0369336210178957, - "grad_norm": 0.43174508213996887, - "learning_rate": 1.308710919321403e-05, - "loss": 0.0406, - "step": 40850 - }, - { - "epoch": 1.0370605406777509, - "grad_norm": 0.3303143084049225, - "learning_rate": 1.3086263062148328e-05, - "loss": 0.052, - "step": 40855 - }, - { - "epoch": 1.0371874603376063, - "grad_norm": 0.4156996011734009, - "learning_rate": 1.3085416931082626e-05, - "loss": 0.0358, - "step": 40860 - }, - { - "epoch": 1.0373143799974616, - "grad_norm": 0.5931384563446045, - "learning_rate": 1.3084570800016925e-05, - "loss": 0.0729, - "step": 40865 - }, - { - "epoch": 1.037441299657317, - "grad_norm": 0.47024017572402954, - "learning_rate": 1.3083724668951221e-05, - "loss": 0.0553, - "step": 40870 - }, - { - "epoch": 1.0375682193171722, - "grad_norm": 0.4050208032131195, - "learning_rate": 1.308287853788552e-05, - "loss": 0.0618, - "step": 40875 - }, - { - "epoch": 1.0376951389770275, - "grad_norm": 0.4300154149532318, - "learning_rate": 1.3082032406819818e-05, - "loss": 0.0761, - "step": 40880 - }, - { - "epoch": 1.037822058636883, - "grad_norm": 0.44024965167045593, - "learning_rate": 1.3081186275754116e-05, - "loss": 0.0393, - "step": 40885 - }, - { - "epoch": 1.0379489782967382, - "grad_norm": 0.47961392998695374, - "learning_rate": 1.3080340144688413e-05, - "loss": 0.0654, - "step": 40890 - }, - { - "epoch": 1.0380758979565934, - "grad_norm": 0.5136916637420654, - "learning_rate": 1.3079494013622712e-05, - "loss": 0.069, - "step": 40895 - }, - { - "epoch": 1.0382028176164488, - "grad_norm": 0.4905608296394348, - "learning_rate": 1.307864788255701e-05, - "loss": 0.0532, - "step": 40900 - }, - { - "epoch": 1.038329737276304, - "grad_norm": 0.36637407541275024, - "learning_rate": 1.3077801751491308e-05, - "loss": 0.05, - "step": 40905 - }, - { - "epoch": 1.0384566569361595, - "grad_norm": 0.49538102746009827, - "learning_rate": 1.3076955620425605e-05, - "loss": 0.0628, - "step": 40910 - }, - { - "epoch": 1.0385835765960147, - "grad_norm": 0.832433819770813, - "learning_rate": 1.3076109489359904e-05, - "loss": 0.0524, - "step": 40915 - }, - { - "epoch": 1.03871049625587, - "grad_norm": 0.5088241100311279, - "learning_rate": 1.3075263358294202e-05, - "loss": 0.0613, - "step": 40920 - }, - { - "epoch": 1.0388374159157254, - "grad_norm": 0.5898182392120361, - "learning_rate": 1.30744172272285e-05, - "loss": 0.0584, - "step": 40925 - }, - { - "epoch": 1.0389643355755807, - "grad_norm": 0.7681825160980225, - "learning_rate": 1.3073571096162795e-05, - "loss": 0.0587, - "step": 40930 - }, - { - "epoch": 1.0390912552354359, - "grad_norm": 0.44188955426216125, - "learning_rate": 1.3072724965097094e-05, - "loss": 0.0539, - "step": 40935 - }, - { - "epoch": 1.0392181748952913, - "grad_norm": 0.6205862164497375, - "learning_rate": 1.3071878834031392e-05, - "loss": 0.0461, - "step": 40940 - }, - { - "epoch": 1.0393450945551466, - "grad_norm": 0.5045483112335205, - "learning_rate": 1.307103270296569e-05, - "loss": 0.0385, - "step": 40945 - }, - { - "epoch": 1.039472014215002, - "grad_norm": 0.47149357199668884, - "learning_rate": 1.3070186571899987e-05, - "loss": 0.0602, - "step": 40950 - }, - { - "epoch": 1.0395989338748572, - "grad_norm": 0.4306657910346985, - "learning_rate": 1.3069340440834286e-05, - "loss": 0.0446, - "step": 40955 - }, - { - "epoch": 1.0397258535347125, - "grad_norm": 0.3988128900527954, - "learning_rate": 1.3068494309768584e-05, - "loss": 0.0373, - "step": 40960 - }, - { - "epoch": 1.039852773194568, - "grad_norm": 0.6555421352386475, - "learning_rate": 1.3067648178702882e-05, - "loss": 0.0567, - "step": 40965 - }, - { - "epoch": 1.0399796928544232, - "grad_norm": 0.3707217574119568, - "learning_rate": 1.306680204763718e-05, - "loss": 0.0622, - "step": 40970 - }, - { - "epoch": 1.0401066125142784, - "grad_norm": 0.6872326731681824, - "learning_rate": 1.3065955916571478e-05, - "loss": 0.0709, - "step": 40975 - }, - { - "epoch": 1.0402335321741338, - "grad_norm": 0.36929142475128174, - "learning_rate": 1.3065109785505776e-05, - "loss": 0.0536, - "step": 40980 - }, - { - "epoch": 1.040360451833989, - "grad_norm": 0.3172878324985504, - "learning_rate": 1.3064263654440074e-05, - "loss": 0.0605, - "step": 40985 - }, - { - "epoch": 1.0404873714938443, - "grad_norm": 0.5189022421836853, - "learning_rate": 1.3063417523374371e-05, - "loss": 0.0545, - "step": 40990 - }, - { - "epoch": 1.0406142911536997, - "grad_norm": 0.5013735890388489, - "learning_rate": 1.306257139230867e-05, - "loss": 0.0602, - "step": 40995 - }, - { - "epoch": 1.040741210813555, - "grad_norm": 0.5132792592048645, - "learning_rate": 1.3061725261242968e-05, - "loss": 0.0488, - "step": 41000 - }, - { - "epoch": 1.0408681304734104, - "grad_norm": 0.3402225375175476, - "learning_rate": 1.3060879130177266e-05, - "loss": 0.066, - "step": 41005 - }, - { - "epoch": 1.0409950501332657, - "grad_norm": 0.3965432941913605, - "learning_rate": 1.3060032999111563e-05, - "loss": 0.0577, - "step": 41010 - }, - { - "epoch": 1.0411219697931209, - "grad_norm": 0.40247949957847595, - "learning_rate": 1.3059186868045861e-05, - "loss": 0.0662, - "step": 41015 - }, - { - "epoch": 1.0412488894529763, - "grad_norm": 0.4573756754398346, - "learning_rate": 1.305834073698016e-05, - "loss": 0.0543, - "step": 41020 - }, - { - "epoch": 1.0413758091128316, - "grad_norm": 1.226308822631836, - "learning_rate": 1.3057494605914458e-05, - "loss": 0.0648, - "step": 41025 - }, - { - "epoch": 1.0415027287726868, - "grad_norm": 0.5421380996704102, - "learning_rate": 1.3056648474848755e-05, - "loss": 0.0535, - "step": 41030 - }, - { - "epoch": 1.0416296484325422, - "grad_norm": 0.4929865002632141, - "learning_rate": 1.3055802343783053e-05, - "loss": 0.0533, - "step": 41035 - }, - { - "epoch": 1.0417565680923975, - "grad_norm": 0.3096861243247986, - "learning_rate": 1.3054956212717352e-05, - "loss": 0.0706, - "step": 41040 - }, - { - "epoch": 1.041883487752253, - "grad_norm": 0.7679807543754578, - "learning_rate": 1.305411008165165e-05, - "loss": 0.0481, - "step": 41045 - }, - { - "epoch": 1.0420104074121082, - "grad_norm": 2.0490658283233643, - "learning_rate": 1.3053263950585947e-05, - "loss": 0.0377, - "step": 41050 - }, - { - "epoch": 1.0421373270719634, - "grad_norm": 0.5012702941894531, - "learning_rate": 1.3052417819520245e-05, - "loss": 0.0468, - "step": 41055 - }, - { - "epoch": 1.0422642467318188, - "grad_norm": 0.4662017524242401, - "learning_rate": 1.3051571688454544e-05, - "loss": 0.051, - "step": 41060 - }, - { - "epoch": 1.042391166391674, - "grad_norm": 0.7585615515708923, - "learning_rate": 1.3050725557388842e-05, - "loss": 0.0523, - "step": 41065 - }, - { - "epoch": 1.0425180860515293, - "grad_norm": 0.5473172664642334, - "learning_rate": 1.3049879426323137e-05, - "loss": 0.0417, - "step": 41070 - }, - { - "epoch": 1.0426450057113847, - "grad_norm": 0.4049126207828522, - "learning_rate": 1.3049033295257435e-05, - "loss": 0.0471, - "step": 41075 - }, - { - "epoch": 1.04277192537124, - "grad_norm": 0.8342042565345764, - "learning_rate": 1.3048187164191734e-05, - "loss": 0.0517, - "step": 41080 - }, - { - "epoch": 1.0428988450310954, - "grad_norm": 0.350705087184906, - "learning_rate": 1.3047341033126032e-05, - "loss": 0.0597, - "step": 41085 - }, - { - "epoch": 1.0430257646909507, - "grad_norm": 0.33364424109458923, - "learning_rate": 1.3046494902060329e-05, - "loss": 0.0562, - "step": 41090 - }, - { - "epoch": 1.0431526843508059, - "grad_norm": 0.31018775701522827, - "learning_rate": 1.3045648770994627e-05, - "loss": 0.0506, - "step": 41095 - }, - { - "epoch": 1.0432796040106613, - "grad_norm": 0.8837998509407043, - "learning_rate": 1.3044802639928926e-05, - "loss": 0.0581, - "step": 41100 - }, - { - "epoch": 1.0434065236705166, - "grad_norm": 0.2773641049861908, - "learning_rate": 1.3043956508863224e-05, - "loss": 0.0689, - "step": 41105 - }, - { - "epoch": 1.0435334433303718, - "grad_norm": 0.9898240566253662, - "learning_rate": 1.3043110377797521e-05, - "loss": 0.0688, - "step": 41110 - }, - { - "epoch": 1.0436603629902272, - "grad_norm": 0.4480316936969757, - "learning_rate": 1.304226424673182e-05, - "loss": 0.0453, - "step": 41115 - }, - { - "epoch": 1.0437872826500825, - "grad_norm": 0.4220537543296814, - "learning_rate": 1.3041418115666118e-05, - "loss": 0.0632, - "step": 41120 - }, - { - "epoch": 1.0439142023099377, - "grad_norm": 1.0142982006072998, - "learning_rate": 1.3040571984600416e-05, - "loss": 0.0552, - "step": 41125 - }, - { - "epoch": 1.0440411219697932, - "grad_norm": 0.8832979202270508, - "learning_rate": 1.3039725853534713e-05, - "loss": 0.069, - "step": 41130 - }, - { - "epoch": 1.0441680416296484, - "grad_norm": 0.8887441158294678, - "learning_rate": 1.3038879722469011e-05, - "loss": 0.0537, - "step": 41135 - }, - { - "epoch": 1.0442949612895038, - "grad_norm": 0.7114430665969849, - "learning_rate": 1.303803359140331e-05, - "loss": 0.0543, - "step": 41140 - }, - { - "epoch": 1.044421880949359, - "grad_norm": 0.4194078743457794, - "learning_rate": 1.3037187460337608e-05, - "loss": 0.0607, - "step": 41145 - }, - { - "epoch": 1.0445488006092143, - "grad_norm": 0.4281783699989319, - "learning_rate": 1.3036341329271905e-05, - "loss": 0.0492, - "step": 41150 - }, - { - "epoch": 1.0446757202690697, - "grad_norm": 0.4258081614971161, - "learning_rate": 1.3035495198206203e-05, - "loss": 0.0742, - "step": 41155 - }, - { - "epoch": 1.044802639928925, - "grad_norm": 0.4109033942222595, - "learning_rate": 1.3034649067140501e-05, - "loss": 0.0534, - "step": 41160 - }, - { - "epoch": 1.0449295595887802, - "grad_norm": 0.4237222373485565, - "learning_rate": 1.30338029360748e-05, - "loss": 0.0476, - "step": 41165 - }, - { - "epoch": 1.0450564792486357, - "grad_norm": 0.5842612981796265, - "learning_rate": 1.3032956805009097e-05, - "loss": 0.0626, - "step": 41170 - }, - { - "epoch": 1.0451833989084909, - "grad_norm": 0.36833956837654114, - "learning_rate": 1.3032110673943395e-05, - "loss": 0.0716, - "step": 41175 - }, - { - "epoch": 1.0453103185683463, - "grad_norm": 0.42371806502342224, - "learning_rate": 1.3031264542877693e-05, - "loss": 0.0438, - "step": 41180 - }, - { - "epoch": 1.0454372382282016, - "grad_norm": 0.5331552624702454, - "learning_rate": 1.3030418411811992e-05, - "loss": 0.0465, - "step": 41185 - }, - { - "epoch": 1.0455641578880568, - "grad_norm": 0.4843643307685852, - "learning_rate": 1.3029572280746287e-05, - "loss": 0.0554, - "step": 41190 - }, - { - "epoch": 1.0456910775479122, - "grad_norm": 0.45012545585632324, - "learning_rate": 1.3028726149680587e-05, - "loss": 0.0491, - "step": 41195 - }, - { - "epoch": 1.0458179972077675, - "grad_norm": 0.6564866900444031, - "learning_rate": 1.3027880018614885e-05, - "loss": 0.0566, - "step": 41200 - }, - { - "epoch": 1.0459449168676227, - "grad_norm": 0.46404221653938293, - "learning_rate": 1.3027033887549184e-05, - "loss": 0.0472, - "step": 41205 - }, - { - "epoch": 1.0460718365274781, - "grad_norm": 0.37626105546951294, - "learning_rate": 1.3026187756483479e-05, - "loss": 0.059, - "step": 41210 - }, - { - "epoch": 1.0461987561873334, - "grad_norm": 0.4091545045375824, - "learning_rate": 1.3025341625417777e-05, - "loss": 0.0668, - "step": 41215 - }, - { - "epoch": 1.0463256758471888, - "grad_norm": 0.438631147146225, - "learning_rate": 1.3024495494352076e-05, - "loss": 0.0422, - "step": 41220 - }, - { - "epoch": 1.046452595507044, - "grad_norm": 0.39954641461372375, - "learning_rate": 1.3023649363286374e-05, - "loss": 0.0578, - "step": 41225 - }, - { - "epoch": 1.0465795151668993, - "grad_norm": 0.4953702688217163, - "learning_rate": 1.302280323222067e-05, - "loss": 0.0648, - "step": 41230 - }, - { - "epoch": 1.0467064348267547, - "grad_norm": 0.4113217890262604, - "learning_rate": 1.3021957101154969e-05, - "loss": 0.0585, - "step": 41235 - }, - { - "epoch": 1.04683335448661, - "grad_norm": 0.5556795597076416, - "learning_rate": 1.3021110970089267e-05, - "loss": 0.0561, - "step": 41240 - }, - { - "epoch": 1.0469602741464652, - "grad_norm": 0.3978025019168854, - "learning_rate": 1.3020264839023566e-05, - "loss": 0.0455, - "step": 41245 - }, - { - "epoch": 1.0470871938063206, - "grad_norm": 0.4425811767578125, - "learning_rate": 1.3019418707957863e-05, - "loss": 0.0551, - "step": 41250 - }, - { - "epoch": 1.0472141134661759, - "grad_norm": 0.4763433039188385, - "learning_rate": 1.3018572576892161e-05, - "loss": 0.0555, - "step": 41255 - }, - { - "epoch": 1.0473410331260313, - "grad_norm": 0.49364784359931946, - "learning_rate": 1.301772644582646e-05, - "loss": 0.0402, - "step": 41260 - }, - { - "epoch": 1.0474679527858866, - "grad_norm": 0.47031286358833313, - "learning_rate": 1.3016880314760758e-05, - "loss": 0.033, - "step": 41265 - }, - { - "epoch": 1.0475948724457418, - "grad_norm": 0.35705599188804626, - "learning_rate": 1.3016034183695054e-05, - "loss": 0.0455, - "step": 41270 - }, - { - "epoch": 1.0477217921055972, - "grad_norm": 0.3683982789516449, - "learning_rate": 1.3015188052629353e-05, - "loss": 0.0494, - "step": 41275 - }, - { - "epoch": 1.0478487117654525, - "grad_norm": 0.5447392463684082, - "learning_rate": 1.3014341921563651e-05, - "loss": 0.0447, - "step": 41280 - }, - { - "epoch": 1.0479756314253077, - "grad_norm": 0.4229360818862915, - "learning_rate": 1.301349579049795e-05, - "loss": 0.0542, - "step": 41285 - }, - { - "epoch": 1.0481025510851631, - "grad_norm": 0.3762662708759308, - "learning_rate": 1.3012649659432248e-05, - "loss": 0.0724, - "step": 41290 - }, - { - "epoch": 1.0482294707450184, - "grad_norm": 0.5307736992835999, - "learning_rate": 1.3011803528366545e-05, - "loss": 0.0776, - "step": 41295 - }, - { - "epoch": 1.0483563904048738, - "grad_norm": 0.49386435747146606, - "learning_rate": 1.3010957397300843e-05, - "loss": 0.051, - "step": 41300 - }, - { - "epoch": 1.048483310064729, - "grad_norm": 0.4532535672187805, - "learning_rate": 1.3010111266235142e-05, - "loss": 0.0499, - "step": 41305 - }, - { - "epoch": 1.0486102297245843, - "grad_norm": 0.7903286218643188, - "learning_rate": 1.300926513516944e-05, - "loss": 0.0519, - "step": 41310 - }, - { - "epoch": 1.0487371493844397, - "grad_norm": 0.696512758731842, - "learning_rate": 1.3008419004103737e-05, - "loss": 0.0716, - "step": 41315 - }, - { - "epoch": 1.048864069044295, - "grad_norm": 0.5634885430335999, - "learning_rate": 1.3007572873038035e-05, - "loss": 0.0597, - "step": 41320 - }, - { - "epoch": 1.0489909887041502, - "grad_norm": 0.382367879152298, - "learning_rate": 1.3006726741972334e-05, - "loss": 0.0446, - "step": 41325 - }, - { - "epoch": 1.0491179083640056, - "grad_norm": 0.44363489747047424, - "learning_rate": 1.3005880610906632e-05, - "loss": 0.0422, - "step": 41330 - }, - { - "epoch": 1.0492448280238609, - "grad_norm": 0.6158618330955505, - "learning_rate": 1.3005034479840929e-05, - "loss": 0.0525, - "step": 41335 - }, - { - "epoch": 1.049371747683716, - "grad_norm": 0.6332354545593262, - "learning_rate": 1.3004188348775227e-05, - "loss": 0.0542, - "step": 41340 - }, - { - "epoch": 1.0494986673435716, - "grad_norm": 0.5567050576210022, - "learning_rate": 1.3003342217709525e-05, - "loss": 0.044, - "step": 41345 - }, - { - "epoch": 1.0496255870034268, - "grad_norm": 0.6058274507522583, - "learning_rate": 1.3002496086643824e-05, - "loss": 0.0571, - "step": 41350 - }, - { - "epoch": 1.0497525066632822, - "grad_norm": 0.4363894462585449, - "learning_rate": 1.3001649955578119e-05, - "loss": 0.0692, - "step": 41355 - }, - { - "epoch": 1.0498794263231375, - "grad_norm": 0.4741010367870331, - "learning_rate": 1.3000803824512417e-05, - "loss": 0.0627, - "step": 41360 - }, - { - "epoch": 1.0500063459829927, - "grad_norm": 0.7157001495361328, - "learning_rate": 1.2999957693446716e-05, - "loss": 0.0657, - "step": 41365 - }, - { - "epoch": 1.0501332656428481, - "grad_norm": 0.4229941964149475, - "learning_rate": 1.2999111562381016e-05, - "loss": 0.0596, - "step": 41370 - }, - { - "epoch": 1.0502601853027034, - "grad_norm": 0.6348133683204651, - "learning_rate": 1.299826543131531e-05, - "loss": 0.0537, - "step": 41375 - }, - { - "epoch": 1.0503871049625586, - "grad_norm": 0.39400652050971985, - "learning_rate": 1.299741930024961e-05, - "loss": 0.07, - "step": 41380 - }, - { - "epoch": 1.050514024622414, - "grad_norm": 1.6450833082199097, - "learning_rate": 1.2996573169183908e-05, - "loss": 0.0479, - "step": 41385 - }, - { - "epoch": 1.0506409442822693, - "grad_norm": 0.4368126094341278, - "learning_rate": 1.2995727038118206e-05, - "loss": 0.0687, - "step": 41390 - }, - { - "epoch": 1.0507678639421247, - "grad_norm": 0.532996416091919, - "learning_rate": 1.2994880907052503e-05, - "loss": 0.0607, - "step": 41395 - }, - { - "epoch": 1.05089478360198, - "grad_norm": 0.4050813615322113, - "learning_rate": 1.2994034775986801e-05, - "loss": 0.0522, - "step": 41400 - }, - { - "epoch": 1.0510217032618352, - "grad_norm": 1.8196989297866821, - "learning_rate": 1.29931886449211e-05, - "loss": 0.0769, - "step": 41405 - }, - { - "epoch": 1.0511486229216906, - "grad_norm": 0.9790627956390381, - "learning_rate": 1.2992342513855398e-05, - "loss": 0.0513, - "step": 41410 - }, - { - "epoch": 1.0512755425815459, - "grad_norm": 0.3614395558834076, - "learning_rate": 1.2991496382789695e-05, - "loss": 0.0523, - "step": 41415 - }, - { - "epoch": 1.051402462241401, - "grad_norm": 1.5521413087844849, - "learning_rate": 1.2990650251723993e-05, - "loss": 0.06, - "step": 41420 - }, - { - "epoch": 1.0515293819012566, - "grad_norm": 0.616723358631134, - "learning_rate": 1.2989804120658291e-05, - "loss": 0.0606, - "step": 41425 - }, - { - "epoch": 1.0516563015611118, - "grad_norm": 0.37726232409477234, - "learning_rate": 1.298895798959259e-05, - "loss": 0.049, - "step": 41430 - }, - { - "epoch": 1.0517832212209672, - "grad_norm": 0.5190566778182983, - "learning_rate": 1.2988111858526886e-05, - "loss": 0.0439, - "step": 41435 - }, - { - "epoch": 1.0519101408808225, - "grad_norm": 0.45911914110183716, - "learning_rate": 1.2987265727461185e-05, - "loss": 0.0465, - "step": 41440 - }, - { - "epoch": 1.0520370605406777, - "grad_norm": 0.5101099014282227, - "learning_rate": 1.2986419596395483e-05, - "loss": 0.0381, - "step": 41445 - }, - { - "epoch": 1.0521639802005331, - "grad_norm": 0.5083696842193604, - "learning_rate": 1.2985573465329782e-05, - "loss": 0.0696, - "step": 41450 - }, - { - "epoch": 1.0522908998603884, - "grad_norm": 0.5795021653175354, - "learning_rate": 1.2984727334264078e-05, - "loss": 0.0549, - "step": 41455 - }, - { - "epoch": 1.0524178195202436, - "grad_norm": 0.6830499172210693, - "learning_rate": 1.2983881203198377e-05, - "loss": 0.0488, - "step": 41460 - }, - { - "epoch": 1.052544739180099, - "grad_norm": 0.5565662384033203, - "learning_rate": 1.2983035072132675e-05, - "loss": 0.0703, - "step": 41465 - }, - { - "epoch": 1.0526716588399543, - "grad_norm": 0.5908958911895752, - "learning_rate": 1.2982188941066974e-05, - "loss": 0.0618, - "step": 41470 - }, - { - "epoch": 1.0527985784998095, - "grad_norm": 0.4641123414039612, - "learning_rate": 1.298134281000127e-05, - "loss": 0.0847, - "step": 41475 - }, - { - "epoch": 1.052925498159665, - "grad_norm": 0.4429587721824646, - "learning_rate": 1.2980496678935569e-05, - "loss": 0.0413, - "step": 41480 - }, - { - "epoch": 1.0530524178195202, - "grad_norm": 1.0130102634429932, - "learning_rate": 1.2979650547869867e-05, - "loss": 0.0517, - "step": 41485 - }, - { - "epoch": 1.0531793374793756, - "grad_norm": 0.677467405796051, - "learning_rate": 1.2978804416804166e-05, - "loss": 0.0575, - "step": 41490 - }, - { - "epoch": 1.0533062571392309, - "grad_norm": 0.5158794522285461, - "learning_rate": 1.297795828573846e-05, - "loss": 0.0437, - "step": 41495 - }, - { - "epoch": 1.053433176799086, - "grad_norm": 0.34845227003097534, - "learning_rate": 1.2977112154672759e-05, - "loss": 0.0494, - "step": 41500 - }, - { - "epoch": 1.0535600964589416, - "grad_norm": 0.41721633076667786, - "learning_rate": 1.2976266023607057e-05, - "loss": 0.0327, - "step": 41505 - }, - { - "epoch": 1.0536870161187968, - "grad_norm": 0.41037458181381226, - "learning_rate": 1.2975419892541356e-05, - "loss": 0.0653, - "step": 41510 - }, - { - "epoch": 1.053813935778652, - "grad_norm": 0.5133470296859741, - "learning_rate": 1.2974573761475652e-05, - "loss": 0.0554, - "step": 41515 - }, - { - "epoch": 1.0539408554385075, - "grad_norm": 0.44084250926971436, - "learning_rate": 1.2973727630409951e-05, - "loss": 0.0499, - "step": 41520 - }, - { - "epoch": 1.0540677750983627, - "grad_norm": 0.5562013983726501, - "learning_rate": 1.297288149934425e-05, - "loss": 0.0605, - "step": 41525 - }, - { - "epoch": 1.0541946947582181, - "grad_norm": 0.39417538046836853, - "learning_rate": 1.2972035368278548e-05, - "loss": 0.043, - "step": 41530 - }, - { - "epoch": 1.0543216144180734, - "grad_norm": 0.38509324193000793, - "learning_rate": 1.2971189237212844e-05, - "loss": 0.0611, - "step": 41535 - }, - { - "epoch": 1.0544485340779286, - "grad_norm": 0.6170629262924194, - "learning_rate": 1.2970343106147143e-05, - "loss": 0.0555, - "step": 41540 - }, - { - "epoch": 1.054575453737784, - "grad_norm": 0.6523053050041199, - "learning_rate": 1.2969496975081441e-05, - "loss": 0.0482, - "step": 41545 - }, - { - "epoch": 1.0547023733976393, - "grad_norm": 0.576227605342865, - "learning_rate": 1.296865084401574e-05, - "loss": 0.0511, - "step": 41550 - }, - { - "epoch": 1.0548292930574945, - "grad_norm": 0.531326949596405, - "learning_rate": 1.2967804712950036e-05, - "loss": 0.0705, - "step": 41555 - }, - { - "epoch": 1.05495621271735, - "grad_norm": 0.3677254319190979, - "learning_rate": 1.2966958581884335e-05, - "loss": 0.0661, - "step": 41560 - }, - { - "epoch": 1.0550831323772052, - "grad_norm": 0.3382275700569153, - "learning_rate": 1.2966112450818633e-05, - "loss": 0.0716, - "step": 41565 - }, - { - "epoch": 1.0552100520370606, - "grad_norm": 1.0489662885665894, - "learning_rate": 1.2965266319752931e-05, - "loss": 0.0525, - "step": 41570 - }, - { - "epoch": 1.0553369716969159, - "grad_norm": 0.6365177035331726, - "learning_rate": 1.2964420188687228e-05, - "loss": 0.0661, - "step": 41575 - }, - { - "epoch": 1.055463891356771, - "grad_norm": 0.4041413366794586, - "learning_rate": 1.2963574057621527e-05, - "loss": 0.0752, - "step": 41580 - }, - { - "epoch": 1.0555908110166266, - "grad_norm": 0.5446473360061646, - "learning_rate": 1.2962727926555825e-05, - "loss": 0.0646, - "step": 41585 - }, - { - "epoch": 1.0557177306764818, - "grad_norm": 0.4041541814804077, - "learning_rate": 1.2961881795490123e-05, - "loss": 0.0481, - "step": 41590 - }, - { - "epoch": 1.055844650336337, - "grad_norm": 0.35527533292770386, - "learning_rate": 1.296103566442442e-05, - "loss": 0.0719, - "step": 41595 - }, - { - "epoch": 1.0559715699961925, - "grad_norm": 2.0991220474243164, - "learning_rate": 1.2960189533358719e-05, - "loss": 0.0577, - "step": 41600 - }, - { - "epoch": 1.0560984896560477, - "grad_norm": 0.645781934261322, - "learning_rate": 1.2959343402293017e-05, - "loss": 0.0661, - "step": 41605 - }, - { - "epoch": 1.056225409315903, - "grad_norm": 0.6069595217704773, - "learning_rate": 1.2958497271227315e-05, - "loss": 0.0518, - "step": 41610 - }, - { - "epoch": 1.0563523289757584, - "grad_norm": 0.4326424300670624, - "learning_rate": 1.2957651140161612e-05, - "loss": 0.0663, - "step": 41615 - }, - { - "epoch": 1.0564792486356136, - "grad_norm": 0.6016061902046204, - "learning_rate": 1.295680500909591e-05, - "loss": 0.0638, - "step": 41620 - }, - { - "epoch": 1.056606168295469, - "grad_norm": 0.44909918308258057, - "learning_rate": 1.2955958878030209e-05, - "loss": 0.051, - "step": 41625 - }, - { - "epoch": 1.0567330879553243, - "grad_norm": 0.5703963041305542, - "learning_rate": 1.2955112746964507e-05, - "loss": 0.0584, - "step": 41630 - }, - { - "epoch": 1.0568600076151795, - "grad_norm": 0.4296400249004364, - "learning_rate": 1.2954266615898802e-05, - "loss": 0.0541, - "step": 41635 - }, - { - "epoch": 1.056986927275035, - "grad_norm": 0.3648775815963745, - "learning_rate": 1.29534204848331e-05, - "loss": 0.0737, - "step": 41640 - }, - { - "epoch": 1.0571138469348902, - "grad_norm": 0.43253615498542786, - "learning_rate": 1.2952574353767399e-05, - "loss": 0.0434, - "step": 41645 - }, - { - "epoch": 1.0572407665947456, - "grad_norm": 0.5994099974632263, - "learning_rate": 1.2951728222701697e-05, - "loss": 0.0783, - "step": 41650 - }, - { - "epoch": 1.0573676862546009, - "grad_norm": 0.3573848605155945, - "learning_rate": 1.2950882091635994e-05, - "loss": 0.0411, - "step": 41655 - }, - { - "epoch": 1.057494605914456, - "grad_norm": 0.33666154742240906, - "learning_rate": 1.2950035960570293e-05, - "loss": 0.0442, - "step": 41660 - }, - { - "epoch": 1.0576215255743115, - "grad_norm": 0.4949266314506531, - "learning_rate": 1.2949189829504591e-05, - "loss": 0.0476, - "step": 41665 - }, - { - "epoch": 1.0577484452341668, - "grad_norm": 0.512859582901001, - "learning_rate": 1.294834369843889e-05, - "loss": 0.0618, - "step": 41670 - }, - { - "epoch": 1.057875364894022, - "grad_norm": 0.41474372148513794, - "learning_rate": 1.2947497567373186e-05, - "loss": 0.0336, - "step": 41675 - }, - { - "epoch": 1.0580022845538775, - "grad_norm": 0.4341334104537964, - "learning_rate": 1.2946651436307484e-05, - "loss": 0.0659, - "step": 41680 - }, - { - "epoch": 1.0581292042137327, - "grad_norm": 0.4263514578342438, - "learning_rate": 1.2945805305241783e-05, - "loss": 0.0453, - "step": 41685 - }, - { - "epoch": 1.058256123873588, - "grad_norm": 0.4721200466156006, - "learning_rate": 1.2944959174176081e-05, - "loss": 0.0544, - "step": 41690 - }, - { - "epoch": 1.0583830435334434, - "grad_norm": 1.2367373704910278, - "learning_rate": 1.2944113043110378e-05, - "loss": 0.0684, - "step": 41695 - }, - { - "epoch": 1.0585099631932986, - "grad_norm": 0.48991626501083374, - "learning_rate": 1.2943266912044676e-05, - "loss": 0.0663, - "step": 41700 - }, - { - "epoch": 1.058636882853154, - "grad_norm": 0.35208696126937866, - "learning_rate": 1.2942420780978975e-05, - "loss": 0.0446, - "step": 41705 - }, - { - "epoch": 1.0587638025130093, - "grad_norm": 0.5334458947181702, - "learning_rate": 1.2941574649913273e-05, - "loss": 0.0568, - "step": 41710 - }, - { - "epoch": 1.0588907221728645, - "grad_norm": 1.460250973701477, - "learning_rate": 1.294072851884757e-05, - "loss": 0.0566, - "step": 41715 - }, - { - "epoch": 1.05901764183272, - "grad_norm": 0.6258809566497803, - "learning_rate": 1.2939882387781868e-05, - "loss": 0.0562, - "step": 41720 - }, - { - "epoch": 1.0591445614925752, - "grad_norm": 0.40572983026504517, - "learning_rate": 1.2939036256716167e-05, - "loss": 0.0625, - "step": 41725 - }, - { - "epoch": 1.0592714811524304, - "grad_norm": 0.35097536444664, - "learning_rate": 1.2938190125650465e-05, - "loss": 0.0517, - "step": 41730 - }, - { - "epoch": 1.0593984008122859, - "grad_norm": 0.5061612129211426, - "learning_rate": 1.2937343994584762e-05, - "loss": 0.0386, - "step": 41735 - }, - { - "epoch": 1.059525320472141, - "grad_norm": 0.5154628753662109, - "learning_rate": 1.293649786351906e-05, - "loss": 0.0556, - "step": 41740 - }, - { - "epoch": 1.0596522401319965, - "grad_norm": 0.38843926787376404, - "learning_rate": 1.2935651732453359e-05, - "loss": 0.0668, - "step": 41745 - }, - { - "epoch": 1.0597791597918518, - "grad_norm": 0.6814700961112976, - "learning_rate": 1.2934805601387657e-05, - "loss": 0.0519, - "step": 41750 - }, - { - "epoch": 1.059906079451707, - "grad_norm": 0.5098558068275452, - "learning_rate": 1.2933959470321952e-05, - "loss": 0.0594, - "step": 41755 - }, - { - "epoch": 1.0600329991115625, - "grad_norm": 0.3660115599632263, - "learning_rate": 1.2933113339256252e-05, - "loss": 0.0643, - "step": 41760 - }, - { - "epoch": 1.0601599187714177, - "grad_norm": 0.4940687119960785, - "learning_rate": 1.293226720819055e-05, - "loss": 0.0616, - "step": 41765 - }, - { - "epoch": 1.060286838431273, - "grad_norm": 0.4434888958930969, - "learning_rate": 1.2931421077124849e-05, - "loss": 0.0641, - "step": 41770 - }, - { - "epoch": 1.0604137580911284, - "grad_norm": 0.43244388699531555, - "learning_rate": 1.2930574946059144e-05, - "loss": 0.063, - "step": 41775 - }, - { - "epoch": 1.0605406777509836, - "grad_norm": 0.689229428768158, - "learning_rate": 1.2929728814993442e-05, - "loss": 0.048, - "step": 41780 - }, - { - "epoch": 1.060667597410839, - "grad_norm": 0.2657982110977173, - "learning_rate": 1.292888268392774e-05, - "loss": 0.0419, - "step": 41785 - }, - { - "epoch": 1.0607945170706943, - "grad_norm": 0.3324444890022278, - "learning_rate": 1.292803655286204e-05, - "loss": 0.0602, - "step": 41790 - }, - { - "epoch": 1.0609214367305495, - "grad_norm": 0.5876323580741882, - "learning_rate": 1.2927190421796336e-05, - "loss": 0.0673, - "step": 41795 - }, - { - "epoch": 1.061048356390405, - "grad_norm": 0.5054888725280762, - "learning_rate": 1.2926344290730634e-05, - "loss": 0.0667, - "step": 41800 - }, - { - "epoch": 1.0611752760502602, - "grad_norm": 0.8232479691505432, - "learning_rate": 1.2925498159664933e-05, - "loss": 0.0847, - "step": 41805 - }, - { - "epoch": 1.0613021957101154, - "grad_norm": 1.2482364177703857, - "learning_rate": 1.2924652028599231e-05, - "loss": 0.0467, - "step": 41810 - }, - { - "epoch": 1.0614291153699709, - "grad_norm": 0.4875280261039734, - "learning_rate": 1.292380589753353e-05, - "loss": 0.0563, - "step": 41815 - }, - { - "epoch": 1.061556035029826, - "grad_norm": 0.40900927782058716, - "learning_rate": 1.2922959766467826e-05, - "loss": 0.0508, - "step": 41820 - }, - { - "epoch": 1.0616829546896813, - "grad_norm": 0.7086466550827026, - "learning_rate": 1.2922113635402125e-05, - "loss": 0.0576, - "step": 41825 - }, - { - "epoch": 1.0618098743495368, - "grad_norm": 0.5868103504180908, - "learning_rate": 1.2921267504336423e-05, - "loss": 0.0516, - "step": 41830 - }, - { - "epoch": 1.061936794009392, - "grad_norm": 0.5205690264701843, - "learning_rate": 1.2920421373270721e-05, - "loss": 0.0564, - "step": 41835 - }, - { - "epoch": 1.0620637136692475, - "grad_norm": 0.9364803433418274, - "learning_rate": 1.2919575242205018e-05, - "loss": 0.0714, - "step": 41840 - }, - { - "epoch": 1.0621906333291027, - "grad_norm": 0.6369641423225403, - "learning_rate": 1.2918729111139316e-05, - "loss": 0.0491, - "step": 41845 - }, - { - "epoch": 1.062317552988958, - "grad_norm": 0.4906681478023529, - "learning_rate": 1.2917882980073615e-05, - "loss": 0.0624, - "step": 41850 - }, - { - "epoch": 1.0624444726488134, - "grad_norm": 0.5375103950500488, - "learning_rate": 1.2917036849007913e-05, - "loss": 0.0816, - "step": 41855 - }, - { - "epoch": 1.0625713923086686, - "grad_norm": 0.5704138278961182, - "learning_rate": 1.291619071794221e-05, - "loss": 0.0553, - "step": 41860 - }, - { - "epoch": 1.0626983119685238, - "grad_norm": 0.4132044315338135, - "learning_rate": 1.2915344586876508e-05, - "loss": 0.0553, - "step": 41865 - }, - { - "epoch": 1.0628252316283793, - "grad_norm": 0.336789608001709, - "learning_rate": 1.2914498455810807e-05, - "loss": 0.0531, - "step": 41870 - }, - { - "epoch": 1.0629521512882345, - "grad_norm": 0.4472302496433258, - "learning_rate": 1.2913652324745105e-05, - "loss": 0.0629, - "step": 41875 - }, - { - "epoch": 1.06307907094809, - "grad_norm": 0.2860460877418518, - "learning_rate": 1.2912806193679402e-05, - "loss": 0.045, - "step": 41880 - }, - { - "epoch": 1.0632059906079452, - "grad_norm": 0.5858103036880493, - "learning_rate": 1.29119600626137e-05, - "loss": 0.06, - "step": 41885 - }, - { - "epoch": 1.0633329102678004, - "grad_norm": 0.38002532720565796, - "learning_rate": 1.2911113931547999e-05, - "loss": 0.0542, - "step": 41890 - }, - { - "epoch": 1.0634598299276559, - "grad_norm": 0.38793277740478516, - "learning_rate": 1.2910267800482297e-05, - "loss": 0.0568, - "step": 41895 - }, - { - "epoch": 1.063586749587511, - "grad_norm": 0.4240960478782654, - "learning_rate": 1.2909421669416594e-05, - "loss": 0.0497, - "step": 41900 - }, - { - "epoch": 1.0637136692473663, - "grad_norm": 0.44176360964775085, - "learning_rate": 1.2908575538350892e-05, - "loss": 0.0551, - "step": 41905 - }, - { - "epoch": 1.0638405889072218, - "grad_norm": 0.4909718632698059, - "learning_rate": 1.290772940728519e-05, - "loss": 0.0326, - "step": 41910 - }, - { - "epoch": 1.063967508567077, - "grad_norm": 0.7133340239524841, - "learning_rate": 1.2906883276219489e-05, - "loss": 0.0611, - "step": 41915 - }, - { - "epoch": 1.0640944282269325, - "grad_norm": 0.47759154438972473, - "learning_rate": 1.2906037145153784e-05, - "loss": 0.0592, - "step": 41920 - }, - { - "epoch": 1.0642213478867877, - "grad_norm": 0.6294342875480652, - "learning_rate": 1.2905191014088082e-05, - "loss": 0.0782, - "step": 41925 - }, - { - "epoch": 1.064348267546643, - "grad_norm": 0.5822111368179321, - "learning_rate": 1.2904344883022381e-05, - "loss": 0.059, - "step": 41930 - }, - { - "epoch": 1.0644751872064984, - "grad_norm": 0.5212265849113464, - "learning_rate": 1.2903498751956681e-05, - "loss": 0.0695, - "step": 41935 - }, - { - "epoch": 1.0646021068663536, - "grad_norm": 0.5031025409698486, - "learning_rate": 1.2902652620890976e-05, - "loss": 0.0546, - "step": 41940 - }, - { - "epoch": 1.0647290265262088, - "grad_norm": 0.36730527877807617, - "learning_rate": 1.2901806489825274e-05, - "loss": 0.0596, - "step": 41945 - }, - { - "epoch": 1.0648559461860643, - "grad_norm": 0.5336204171180725, - "learning_rate": 1.2900960358759573e-05, - "loss": 0.0538, - "step": 41950 - }, - { - "epoch": 1.0649828658459195, - "grad_norm": 0.36350736021995544, - "learning_rate": 1.2900114227693871e-05, - "loss": 0.0527, - "step": 41955 - }, - { - "epoch": 1.0651097855057747, - "grad_norm": 0.5829561352729797, - "learning_rate": 1.2899268096628168e-05, - "loss": 0.0474, - "step": 41960 - }, - { - "epoch": 1.0652367051656302, - "grad_norm": 0.5711713433265686, - "learning_rate": 1.2898421965562466e-05, - "loss": 0.0469, - "step": 41965 - }, - { - "epoch": 1.0653636248254854, - "grad_norm": 0.5037083625793457, - "learning_rate": 1.2897575834496765e-05, - "loss": 0.0322, - "step": 41970 - }, - { - "epoch": 1.0654905444853409, - "grad_norm": 0.6177434325218201, - "learning_rate": 1.2896729703431063e-05, - "loss": 0.0786, - "step": 41975 - }, - { - "epoch": 1.065617464145196, - "grad_norm": 0.3645039200782776, - "learning_rate": 1.289588357236536e-05, - "loss": 0.041, - "step": 41980 - }, - { - "epoch": 1.0657443838050513, - "grad_norm": 0.3139374554157257, - "learning_rate": 1.2895037441299658e-05, - "loss": 0.0473, - "step": 41985 - }, - { - "epoch": 1.0658713034649068, - "grad_norm": 0.2567952275276184, - "learning_rate": 1.2894191310233957e-05, - "loss": 0.0594, - "step": 41990 - }, - { - "epoch": 1.065998223124762, - "grad_norm": 0.5599599480628967, - "learning_rate": 1.2893345179168255e-05, - "loss": 0.0714, - "step": 41995 - }, - { - "epoch": 1.0661251427846175, - "grad_norm": 0.39078518748283386, - "learning_rate": 1.2892499048102552e-05, - "loss": 0.0418, - "step": 42000 - }, - { - "epoch": 1.0662520624444727, - "grad_norm": 0.6535588502883911, - "learning_rate": 1.289165291703685e-05, - "loss": 0.0644, - "step": 42005 - }, - { - "epoch": 1.066378982104328, - "grad_norm": 0.4336315989494324, - "learning_rate": 1.2890806785971149e-05, - "loss": 0.0456, - "step": 42010 - }, - { - "epoch": 1.0665059017641834, - "grad_norm": 0.41762563586235046, - "learning_rate": 1.2889960654905447e-05, - "loss": 0.0442, - "step": 42015 - }, - { - "epoch": 1.0666328214240386, - "grad_norm": 0.4195692241191864, - "learning_rate": 1.2889114523839744e-05, - "loss": 0.0612, - "step": 42020 - }, - { - "epoch": 1.0667597410838938, - "grad_norm": 0.5571006536483765, - "learning_rate": 1.2888268392774042e-05, - "loss": 0.0514, - "step": 42025 - }, - { - "epoch": 1.0668866607437493, - "grad_norm": 0.5563896894454956, - "learning_rate": 1.288742226170834e-05, - "loss": 0.0848, - "step": 42030 - }, - { - "epoch": 1.0670135804036045, - "grad_norm": 0.34803032875061035, - "learning_rate": 1.2886576130642639e-05, - "loss": 0.0346, - "step": 42035 - }, - { - "epoch": 1.0671405000634597, - "grad_norm": 0.533142626285553, - "learning_rate": 1.2885729999576936e-05, - "loss": 0.0598, - "step": 42040 - }, - { - "epoch": 1.0672674197233152, - "grad_norm": 0.374454140663147, - "learning_rate": 1.2884883868511234e-05, - "loss": 0.0369, - "step": 42045 - }, - { - "epoch": 1.0673943393831704, - "grad_norm": 0.6367083787918091, - "learning_rate": 1.2884037737445532e-05, - "loss": 0.0518, - "step": 42050 - }, - { - "epoch": 1.0675212590430259, - "grad_norm": 0.4829799234867096, - "learning_rate": 1.288319160637983e-05, - "loss": 0.0417, - "step": 42055 - }, - { - "epoch": 1.067648178702881, - "grad_norm": 0.5866217613220215, - "learning_rate": 1.2882345475314126e-05, - "loss": 0.0441, - "step": 42060 - }, - { - "epoch": 1.0677750983627363, - "grad_norm": 0.43783724308013916, - "learning_rate": 1.2881499344248424e-05, - "loss": 0.0429, - "step": 42065 - }, - { - "epoch": 1.0679020180225918, - "grad_norm": 0.5638310313224792, - "learning_rate": 1.2880653213182723e-05, - "loss": 0.0668, - "step": 42070 - }, - { - "epoch": 1.068028937682447, - "grad_norm": 0.30877119302749634, - "learning_rate": 1.2879807082117021e-05, - "loss": 0.0433, - "step": 42075 - }, - { - "epoch": 1.0681558573423022, - "grad_norm": 0.4412735402584076, - "learning_rate": 1.2878960951051318e-05, - "loss": 0.0671, - "step": 42080 - }, - { - "epoch": 1.0682827770021577, - "grad_norm": 0.73014897108078, - "learning_rate": 1.2878114819985616e-05, - "loss": 0.0793, - "step": 42085 - }, - { - "epoch": 1.068409696662013, - "grad_norm": 0.6568683981895447, - "learning_rate": 1.2877268688919914e-05, - "loss": 0.054, - "step": 42090 - }, - { - "epoch": 1.0685366163218684, - "grad_norm": 0.9664768576622009, - "learning_rate": 1.2876422557854213e-05, - "loss": 0.0484, - "step": 42095 - }, - { - "epoch": 1.0686635359817236, - "grad_norm": 0.5709855556488037, - "learning_rate": 1.287557642678851e-05, - "loss": 0.0507, - "step": 42100 - }, - { - "epoch": 1.0687904556415788, - "grad_norm": 0.49944478273391724, - "learning_rate": 1.2874730295722808e-05, - "loss": 0.0806, - "step": 42105 - }, - { - "epoch": 1.0689173753014343, - "grad_norm": 0.48905467987060547, - "learning_rate": 1.2873884164657106e-05, - "loss": 0.0456, - "step": 42110 - }, - { - "epoch": 1.0690442949612895, - "grad_norm": 0.3490868806838989, - "learning_rate": 1.2873038033591405e-05, - "loss": 0.0664, - "step": 42115 - }, - { - "epoch": 1.0691712146211447, - "grad_norm": 0.43491068482398987, - "learning_rate": 1.2872191902525702e-05, - "loss": 0.0491, - "step": 42120 - }, - { - "epoch": 1.0692981342810002, - "grad_norm": 0.5942682027816772, - "learning_rate": 1.287134577146e-05, - "loss": 0.0626, - "step": 42125 - }, - { - "epoch": 1.0694250539408554, - "grad_norm": 0.33345967531204224, - "learning_rate": 1.2870499640394298e-05, - "loss": 0.0714, - "step": 42130 - }, - { - "epoch": 1.0695519736007109, - "grad_norm": 0.541355550289154, - "learning_rate": 1.2869653509328597e-05, - "loss": 0.0486, - "step": 42135 - }, - { - "epoch": 1.069678893260566, - "grad_norm": 0.41073912382125854, - "learning_rate": 1.2868807378262893e-05, - "loss": 0.0465, - "step": 42140 - }, - { - "epoch": 1.0698058129204213, - "grad_norm": 0.6094735264778137, - "learning_rate": 1.2867961247197192e-05, - "loss": 0.0465, - "step": 42145 - }, - { - "epoch": 1.0699327325802768, - "grad_norm": 0.7818964123725891, - "learning_rate": 1.286711511613149e-05, - "loss": 0.0879, - "step": 42150 - }, - { - "epoch": 1.070059652240132, - "grad_norm": 0.6388485431671143, - "learning_rate": 1.2866268985065789e-05, - "loss": 0.0629, - "step": 42155 - }, - { - "epoch": 1.0701865718999872, - "grad_norm": 0.5929128527641296, - "learning_rate": 1.2865422854000085e-05, - "loss": 0.0504, - "step": 42160 - }, - { - "epoch": 1.0703134915598427, - "grad_norm": 0.4144948720932007, - "learning_rate": 1.2864576722934384e-05, - "loss": 0.0543, - "step": 42165 - }, - { - "epoch": 1.070440411219698, - "grad_norm": 0.6530867218971252, - "learning_rate": 1.2863730591868682e-05, - "loss": 0.063, - "step": 42170 - }, - { - "epoch": 1.0705673308795531, - "grad_norm": 0.49089914560317993, - "learning_rate": 1.286288446080298e-05, - "loss": 0.0605, - "step": 42175 - }, - { - "epoch": 1.0706942505394086, - "grad_norm": 0.6176713109016418, - "learning_rate": 1.2862038329737276e-05, - "loss": 0.0627, - "step": 42180 - }, - { - "epoch": 1.0708211701992638, - "grad_norm": 0.6201483011245728, - "learning_rate": 1.2861192198671576e-05, - "loss": 0.0872, - "step": 42185 - }, - { - "epoch": 1.0709480898591193, - "grad_norm": 2.072798728942871, - "learning_rate": 1.2860346067605874e-05, - "loss": 0.0491, - "step": 42190 - }, - { - "epoch": 1.0710750095189745, - "grad_norm": 0.47410643100738525, - "learning_rate": 1.2859499936540172e-05, - "loss": 0.0527, - "step": 42195 - }, - { - "epoch": 1.0712019291788297, - "grad_norm": 0.38300082087516785, - "learning_rate": 1.2858653805474467e-05, - "loss": 0.0437, - "step": 42200 - }, - { - "epoch": 1.0713288488386852, - "grad_norm": 0.46066656708717346, - "learning_rate": 1.2857807674408766e-05, - "loss": 0.0628, - "step": 42205 - }, - { - "epoch": 1.0714557684985404, - "grad_norm": 0.3184831738471985, - "learning_rate": 1.2856961543343064e-05, - "loss": 0.0564, - "step": 42210 - }, - { - "epoch": 1.0715826881583956, - "grad_norm": 0.6285569667816162, - "learning_rate": 1.2856115412277363e-05, - "loss": 0.0696, - "step": 42215 - }, - { - "epoch": 1.071709607818251, - "grad_norm": 0.4707241952419281, - "learning_rate": 1.285526928121166e-05, - "loss": 0.0782, - "step": 42220 - }, - { - "epoch": 1.0718365274781063, - "grad_norm": 0.6177979707717896, - "learning_rate": 1.2854423150145958e-05, - "loss": 0.0647, - "step": 42225 - }, - { - "epoch": 1.0719634471379618, - "grad_norm": 0.41661927103996277, - "learning_rate": 1.2853577019080256e-05, - "loss": 0.059, - "step": 42230 - }, - { - "epoch": 1.072090366797817, - "grad_norm": 0.6836217641830444, - "learning_rate": 1.2852730888014555e-05, - "loss": 0.0675, - "step": 42235 - }, - { - "epoch": 1.0722172864576722, - "grad_norm": 0.537560224533081, - "learning_rate": 1.2851884756948851e-05, - "loss": 0.0639, - "step": 42240 - }, - { - "epoch": 1.0723442061175277, - "grad_norm": 0.3887097239494324, - "learning_rate": 1.285103862588315e-05, - "loss": 0.0591, - "step": 42245 - }, - { - "epoch": 1.072471125777383, - "grad_norm": 0.9634504318237305, - "learning_rate": 1.2850192494817448e-05, - "loss": 0.0584, - "step": 42250 - }, - { - "epoch": 1.0725980454372381, - "grad_norm": 0.5893450379371643, - "learning_rate": 1.2849346363751746e-05, - "loss": 0.0432, - "step": 42255 - }, - { - "epoch": 1.0727249650970936, - "grad_norm": 0.5385972857475281, - "learning_rate": 1.2848500232686043e-05, - "loss": 0.0549, - "step": 42260 - }, - { - "epoch": 1.0728518847569488, - "grad_norm": 0.37132585048675537, - "learning_rate": 1.2847654101620342e-05, - "loss": 0.0493, - "step": 42265 - }, - { - "epoch": 1.0729788044168043, - "grad_norm": 0.3869856297969818, - "learning_rate": 1.284680797055464e-05, - "loss": 0.0613, - "step": 42270 - }, - { - "epoch": 1.0731057240766595, - "grad_norm": 0.4861757457256317, - "learning_rate": 1.2845961839488938e-05, - "loss": 0.0401, - "step": 42275 - }, - { - "epoch": 1.0732326437365147, - "grad_norm": 0.6169375777244568, - "learning_rate": 1.2845115708423235e-05, - "loss": 0.0757, - "step": 42280 - }, - { - "epoch": 1.0733595633963702, - "grad_norm": 0.6234797239303589, - "learning_rate": 1.2844269577357534e-05, - "loss": 0.0676, - "step": 42285 - }, - { - "epoch": 1.0734864830562254, - "grad_norm": 0.3604176640510559, - "learning_rate": 1.2843423446291832e-05, - "loss": 0.057, - "step": 42290 - }, - { - "epoch": 1.0736134027160806, - "grad_norm": 0.36979833245277405, - "learning_rate": 1.284257731522613e-05, - "loss": 0.0508, - "step": 42295 - }, - { - "epoch": 1.073740322375936, - "grad_norm": 0.7542506456375122, - "learning_rate": 1.2841731184160427e-05, - "loss": 0.0781, - "step": 42300 - }, - { - "epoch": 1.0738672420357913, - "grad_norm": 0.5016604065895081, - "learning_rate": 1.2840885053094725e-05, - "loss": 0.0549, - "step": 42305 - }, - { - "epoch": 1.0739941616956465, - "grad_norm": 0.6273196339607239, - "learning_rate": 1.2840038922029024e-05, - "loss": 0.0486, - "step": 42310 - }, - { - "epoch": 1.074121081355502, - "grad_norm": 0.4035051167011261, - "learning_rate": 1.2839192790963322e-05, - "loss": 0.0543, - "step": 42315 - }, - { - "epoch": 1.0742480010153572, - "grad_norm": 0.3551696240901947, - "learning_rate": 1.283834665989762e-05, - "loss": 0.0739, - "step": 42320 - }, - { - "epoch": 1.0743749206752127, - "grad_norm": 0.6555771827697754, - "learning_rate": 1.2837500528831917e-05, - "loss": 0.0517, - "step": 42325 - }, - { - "epoch": 1.074501840335068, - "grad_norm": 0.4476749300956726, - "learning_rate": 1.2836654397766216e-05, - "loss": 0.0486, - "step": 42330 - }, - { - "epoch": 1.0746287599949231, - "grad_norm": 0.5338792204856873, - "learning_rate": 1.2835808266700514e-05, - "loss": 0.0567, - "step": 42335 - }, - { - "epoch": 1.0747556796547786, - "grad_norm": 0.44095146656036377, - "learning_rate": 1.2834962135634813e-05, - "loss": 0.0533, - "step": 42340 - }, - { - "epoch": 1.0748825993146338, - "grad_norm": 0.27932852506637573, - "learning_rate": 1.2834116004569108e-05, - "loss": 0.0535, - "step": 42345 - }, - { - "epoch": 1.0750095189744893, - "grad_norm": 0.6481393575668335, - "learning_rate": 1.2833269873503406e-05, - "loss": 0.0545, - "step": 42350 - }, - { - "epoch": 1.0751364386343445, - "grad_norm": 0.352495938539505, - "learning_rate": 1.2832423742437704e-05, - "loss": 0.0595, - "step": 42355 - }, - { - "epoch": 1.0752633582941997, - "grad_norm": 0.47731807827949524, - "learning_rate": 1.2831577611372004e-05, - "loss": 0.0459, - "step": 42360 - }, - { - "epoch": 1.0753902779540552, - "grad_norm": 0.5317132472991943, - "learning_rate": 1.28307314803063e-05, - "loss": 0.0441, - "step": 42365 - }, - { - "epoch": 1.0755171976139104, - "grad_norm": 0.27412548661231995, - "learning_rate": 1.2829885349240598e-05, - "loss": 0.0471, - "step": 42370 - }, - { - "epoch": 1.0756441172737656, - "grad_norm": 0.6142472624778748, - "learning_rate": 1.2829039218174896e-05, - "loss": 0.066, - "step": 42375 - }, - { - "epoch": 1.075771036933621, - "grad_norm": 0.3918299973011017, - "learning_rate": 1.2828193087109195e-05, - "loss": 0.0623, - "step": 42380 - }, - { - "epoch": 1.0758979565934763, - "grad_norm": 0.4413627088069916, - "learning_rate": 1.2827346956043491e-05, - "loss": 0.0531, - "step": 42385 - }, - { - "epoch": 1.0760248762533315, - "grad_norm": 0.7155771851539612, - "learning_rate": 1.282650082497779e-05, - "loss": 0.0671, - "step": 42390 - }, - { - "epoch": 1.076151795913187, - "grad_norm": 0.39816445112228394, - "learning_rate": 1.2825654693912088e-05, - "loss": 0.0472, - "step": 42395 - }, - { - "epoch": 1.0762787155730422, - "grad_norm": 0.5632269978523254, - "learning_rate": 1.2824808562846387e-05, - "loss": 0.0489, - "step": 42400 - }, - { - "epoch": 1.0764056352328977, - "grad_norm": 0.845872700214386, - "learning_rate": 1.2823962431780683e-05, - "loss": 0.0678, - "step": 42405 - }, - { - "epoch": 1.076532554892753, - "grad_norm": 0.4471699297428131, - "learning_rate": 1.2823116300714982e-05, - "loss": 0.0524, - "step": 42410 - }, - { - "epoch": 1.0766594745526081, - "grad_norm": 0.44744130969047546, - "learning_rate": 1.282227016964928e-05, - "loss": 0.0629, - "step": 42415 - }, - { - "epoch": 1.0767863942124636, - "grad_norm": 0.6550050377845764, - "learning_rate": 1.2821424038583579e-05, - "loss": 0.0539, - "step": 42420 - }, - { - "epoch": 1.0769133138723188, - "grad_norm": 0.8646288514137268, - "learning_rate": 1.2820577907517875e-05, - "loss": 0.061, - "step": 42425 - }, - { - "epoch": 1.077040233532174, - "grad_norm": 0.49837127327919006, - "learning_rate": 1.2819731776452174e-05, - "loss": 0.0511, - "step": 42430 - }, - { - "epoch": 1.0771671531920295, - "grad_norm": 0.499048113822937, - "learning_rate": 1.2818885645386472e-05, - "loss": 0.0595, - "step": 42435 - }, - { - "epoch": 1.0772940728518847, - "grad_norm": 0.3862304985523224, - "learning_rate": 1.281803951432077e-05, - "loss": 0.0638, - "step": 42440 - }, - { - "epoch": 1.0774209925117402, - "grad_norm": 0.597767174243927, - "learning_rate": 1.2817193383255067e-05, - "loss": 0.0713, - "step": 42445 - }, - { - "epoch": 1.0775479121715954, - "grad_norm": 0.6871424913406372, - "learning_rate": 1.2816347252189366e-05, - "loss": 0.0457, - "step": 42450 - }, - { - "epoch": 1.0776748318314506, - "grad_norm": 0.6373463869094849, - "learning_rate": 1.2815501121123664e-05, - "loss": 0.0461, - "step": 42455 - }, - { - "epoch": 1.077801751491306, - "grad_norm": 0.49066251516342163, - "learning_rate": 1.2814654990057962e-05, - "loss": 0.0525, - "step": 42460 - }, - { - "epoch": 1.0779286711511613, - "grad_norm": 0.7323153614997864, - "learning_rate": 1.2813808858992259e-05, - "loss": 0.0905, - "step": 42465 - }, - { - "epoch": 1.0780555908110165, - "grad_norm": 0.4293615221977234, - "learning_rate": 1.2812962727926557e-05, - "loss": 0.0375, - "step": 42470 - }, - { - "epoch": 1.078182510470872, - "grad_norm": 0.43721145391464233, - "learning_rate": 1.2812116596860856e-05, - "loss": 0.0614, - "step": 42475 - }, - { - "epoch": 1.0783094301307272, - "grad_norm": 0.36503246426582336, - "learning_rate": 1.2811270465795154e-05, - "loss": 0.0538, - "step": 42480 - }, - { - "epoch": 1.0784363497905827, - "grad_norm": 0.45524054765701294, - "learning_rate": 1.281042433472945e-05, - "loss": 0.041, - "step": 42485 - }, - { - "epoch": 1.078563269450438, - "grad_norm": 0.3024946451187134, - "learning_rate": 1.2809578203663748e-05, - "loss": 0.0455, - "step": 42490 - }, - { - "epoch": 1.0786901891102931, - "grad_norm": 0.3941533863544464, - "learning_rate": 1.2808732072598046e-05, - "loss": 0.0577, - "step": 42495 - }, - { - "epoch": 1.0788171087701486, - "grad_norm": 0.6439189910888672, - "learning_rate": 1.2807885941532344e-05, - "loss": 0.0581, - "step": 42500 - }, - { - "epoch": 1.0789440284300038, - "grad_norm": 0.5617189407348633, - "learning_rate": 1.2807039810466641e-05, - "loss": 0.053, - "step": 42505 - }, - { - "epoch": 1.079070948089859, - "grad_norm": 0.38844531774520874, - "learning_rate": 1.280619367940094e-05, - "loss": 0.0491, - "step": 42510 - }, - { - "epoch": 1.0791978677497145, - "grad_norm": 1.1876835823059082, - "learning_rate": 1.2805347548335238e-05, - "loss": 0.0642, - "step": 42515 - }, - { - "epoch": 1.0793247874095697, - "grad_norm": 0.5467357635498047, - "learning_rate": 1.2804501417269536e-05, - "loss": 0.0781, - "step": 42520 - }, - { - "epoch": 1.079451707069425, - "grad_norm": 0.4129079282283783, - "learning_rate": 1.2803655286203833e-05, - "loss": 0.0569, - "step": 42525 - }, - { - "epoch": 1.0795786267292804, - "grad_norm": 0.5820169448852539, - "learning_rate": 1.2802809155138132e-05, - "loss": 0.0445, - "step": 42530 - }, - { - "epoch": 1.0797055463891356, - "grad_norm": 0.5011926889419556, - "learning_rate": 1.280196302407243e-05, - "loss": 0.0614, - "step": 42535 - }, - { - "epoch": 1.079832466048991, - "grad_norm": 0.42138761281967163, - "learning_rate": 1.2801116893006728e-05, - "loss": 0.068, - "step": 42540 - }, - { - "epoch": 1.0799593857088463, - "grad_norm": 0.45025181770324707, - "learning_rate": 1.2800270761941025e-05, - "loss": 0.0467, - "step": 42545 - }, - { - "epoch": 1.0800863053687015, - "grad_norm": 0.3557569682598114, - "learning_rate": 1.2799424630875323e-05, - "loss": 0.0558, - "step": 42550 - }, - { - "epoch": 1.080213225028557, - "grad_norm": 0.6005776524543762, - "learning_rate": 1.2798578499809622e-05, - "loss": 0.0754, - "step": 42555 - }, - { - "epoch": 1.0803401446884122, - "grad_norm": 1.0029523372650146, - "learning_rate": 1.279773236874392e-05, - "loss": 0.0517, - "step": 42560 - }, - { - "epoch": 1.0804670643482674, - "grad_norm": 0.41421568393707275, - "learning_rate": 1.2796886237678217e-05, - "loss": 0.0665, - "step": 42565 - }, - { - "epoch": 1.080593984008123, - "grad_norm": 0.4156436026096344, - "learning_rate": 1.2796040106612515e-05, - "loss": 0.0612, - "step": 42570 - }, - { - "epoch": 1.0807209036679781, - "grad_norm": 0.2845958471298218, - "learning_rate": 1.2795193975546814e-05, - "loss": 0.0523, - "step": 42575 - }, - { - "epoch": 1.0808478233278336, - "grad_norm": 0.46573325991630554, - "learning_rate": 1.2794347844481112e-05, - "loss": 0.059, - "step": 42580 - }, - { - "epoch": 1.0809747429876888, - "grad_norm": 0.6172319054603577, - "learning_rate": 1.2793501713415409e-05, - "loss": 0.0501, - "step": 42585 - }, - { - "epoch": 1.081101662647544, - "grad_norm": 0.5540859699249268, - "learning_rate": 1.2792655582349707e-05, - "loss": 0.0601, - "step": 42590 - }, - { - "epoch": 1.0812285823073995, - "grad_norm": 0.358424574136734, - "learning_rate": 1.2791809451284006e-05, - "loss": 0.0352, - "step": 42595 - }, - { - "epoch": 1.0813555019672547, - "grad_norm": 0.4208126962184906, - "learning_rate": 1.2790963320218304e-05, - "loss": 0.0643, - "step": 42600 - }, - { - "epoch": 1.08148242162711, - "grad_norm": 0.36235520243644714, - "learning_rate": 1.27901171891526e-05, - "loss": 0.0499, - "step": 42605 - }, - { - "epoch": 1.0816093412869654, - "grad_norm": 0.6017557978630066, - "learning_rate": 1.27892710580869e-05, - "loss": 0.0603, - "step": 42610 - }, - { - "epoch": 1.0817362609468206, - "grad_norm": 1.0586178302764893, - "learning_rate": 1.2788424927021198e-05, - "loss": 0.0503, - "step": 42615 - }, - { - "epoch": 1.081863180606676, - "grad_norm": 0.3980655372142792, - "learning_rate": 1.2787578795955496e-05, - "loss": 0.0385, - "step": 42620 - }, - { - "epoch": 1.0819901002665313, - "grad_norm": 0.5789939761161804, - "learning_rate": 1.2786732664889791e-05, - "loss": 0.0546, - "step": 42625 - }, - { - "epoch": 1.0821170199263865, - "grad_norm": 0.3946102261543274, - "learning_rate": 1.278588653382409e-05, - "loss": 0.0688, - "step": 42630 - }, - { - "epoch": 1.082243939586242, - "grad_norm": 0.35584691166877747, - "learning_rate": 1.2785040402758388e-05, - "loss": 0.044, - "step": 42635 - }, - { - "epoch": 1.0823708592460972, - "grad_norm": 0.4576937258243561, - "learning_rate": 1.2784194271692686e-05, - "loss": 0.0438, - "step": 42640 - }, - { - "epoch": 1.0824977789059524, - "grad_norm": 0.8676387071609497, - "learning_rate": 1.2783348140626983e-05, - "loss": 0.0712, - "step": 42645 - }, - { - "epoch": 1.082624698565808, - "grad_norm": 0.5995132327079773, - "learning_rate": 1.2782502009561281e-05, - "loss": 0.0413, - "step": 42650 - }, - { - "epoch": 1.0827516182256631, - "grad_norm": 0.3364960551261902, - "learning_rate": 1.278165587849558e-05, - "loss": 0.0464, - "step": 42655 - }, - { - "epoch": 1.0828785378855184, - "grad_norm": 0.44223538041114807, - "learning_rate": 1.2780809747429878e-05, - "loss": 0.0474, - "step": 42660 - }, - { - "epoch": 1.0830054575453738, - "grad_norm": 0.4569545388221741, - "learning_rate": 1.2779963616364175e-05, - "loss": 0.0484, - "step": 42665 - }, - { - "epoch": 1.083132377205229, - "grad_norm": 0.45899510383605957, - "learning_rate": 1.2779117485298473e-05, - "loss": 0.0683, - "step": 42670 - }, - { - "epoch": 1.0832592968650845, - "grad_norm": 0.5498926043510437, - "learning_rate": 1.2778271354232772e-05, - "loss": 0.0692, - "step": 42675 - }, - { - "epoch": 1.0833862165249397, - "grad_norm": 0.3870448172092438, - "learning_rate": 1.277742522316707e-05, - "loss": 0.0465, - "step": 42680 - }, - { - "epoch": 1.083513136184795, - "grad_norm": 0.9311301708221436, - "learning_rate": 1.2776579092101367e-05, - "loss": 0.0501, - "step": 42685 - }, - { - "epoch": 1.0836400558446504, - "grad_norm": 0.7427285313606262, - "learning_rate": 1.2775732961035665e-05, - "loss": 0.0482, - "step": 42690 - }, - { - "epoch": 1.0837669755045056, - "grad_norm": 0.3884924054145813, - "learning_rate": 1.2774886829969964e-05, - "loss": 0.0747, - "step": 42695 - }, - { - "epoch": 1.083893895164361, - "grad_norm": 0.6752228736877441, - "learning_rate": 1.2774040698904262e-05, - "loss": 0.0439, - "step": 42700 - }, - { - "epoch": 1.0840208148242163, - "grad_norm": 0.5687264800071716, - "learning_rate": 1.2773194567838559e-05, - "loss": 0.0566, - "step": 42705 - }, - { - "epoch": 1.0841477344840715, - "grad_norm": 0.42419424653053284, - "learning_rate": 1.2772348436772857e-05, - "loss": 0.0836, - "step": 42710 - }, - { - "epoch": 1.084274654143927, - "grad_norm": 0.22522971034049988, - "learning_rate": 1.2771502305707155e-05, - "loss": 0.0515, - "step": 42715 - }, - { - "epoch": 1.0844015738037822, - "grad_norm": 0.38171452283859253, - "learning_rate": 1.2770656174641454e-05, - "loss": 0.0298, - "step": 42720 - }, - { - "epoch": 1.0845284934636374, - "grad_norm": 0.5964037179946899, - "learning_rate": 1.276981004357575e-05, - "loss": 0.0515, - "step": 42725 - }, - { - "epoch": 1.084655413123493, - "grad_norm": 0.41871803998947144, - "learning_rate": 1.2768963912510049e-05, - "loss": 0.0585, - "step": 42730 - }, - { - "epoch": 1.0847823327833481, - "grad_norm": 0.4088152348995209, - "learning_rate": 1.2768117781444347e-05, - "loss": 0.0531, - "step": 42735 - }, - { - "epoch": 1.0849092524432034, - "grad_norm": 0.5454362630844116, - "learning_rate": 1.2767271650378646e-05, - "loss": 0.0617, - "step": 42740 - }, - { - "epoch": 1.0850361721030588, - "grad_norm": 0.4611736834049225, - "learning_rate": 1.276642551931294e-05, - "loss": 0.0436, - "step": 42745 - }, - { - "epoch": 1.085163091762914, - "grad_norm": 0.574993371963501, - "learning_rate": 1.2765579388247241e-05, - "loss": 0.0422, - "step": 42750 - }, - { - "epoch": 1.0852900114227695, - "grad_norm": 1.054211139678955, - "learning_rate": 1.276473325718154e-05, - "loss": 0.0624, - "step": 42755 - }, - { - "epoch": 1.0854169310826247, - "grad_norm": 0.6965357661247253, - "learning_rate": 1.2763887126115838e-05, - "loss": 0.0621, - "step": 42760 - }, - { - "epoch": 1.08554385074248, - "grad_norm": 0.527485728263855, - "learning_rate": 1.2763040995050133e-05, - "loss": 0.0535, - "step": 42765 - }, - { - "epoch": 1.0856707704023354, - "grad_norm": 0.5525309443473816, - "learning_rate": 1.2762194863984431e-05, - "loss": 0.0663, - "step": 42770 - }, - { - "epoch": 1.0857976900621906, - "grad_norm": 0.5517336130142212, - "learning_rate": 1.276134873291873e-05, - "loss": 0.0599, - "step": 42775 - }, - { - "epoch": 1.0859246097220459, - "grad_norm": 0.42181098461151123, - "learning_rate": 1.2760502601853028e-05, - "loss": 0.0435, - "step": 42780 - }, - { - "epoch": 1.0860515293819013, - "grad_norm": 0.6212353706359863, - "learning_rate": 1.2759656470787325e-05, - "loss": 0.0458, - "step": 42785 - }, - { - "epoch": 1.0861784490417565, - "grad_norm": 0.8007168769836426, - "learning_rate": 1.2758810339721623e-05, - "loss": 0.0439, - "step": 42790 - }, - { - "epoch": 1.0863053687016118, - "grad_norm": 0.43116363883018494, - "learning_rate": 1.2757964208655921e-05, - "loss": 0.0567, - "step": 42795 - }, - { - "epoch": 1.0864322883614672, - "grad_norm": 0.351953387260437, - "learning_rate": 1.275711807759022e-05, - "loss": 0.0463, - "step": 42800 - }, - { - "epoch": 1.0865592080213224, - "grad_norm": 0.6619280576705933, - "learning_rate": 1.2756271946524517e-05, - "loss": 0.06, - "step": 42805 - }, - { - "epoch": 1.086686127681178, - "grad_norm": 0.6280010342597961, - "learning_rate": 1.2755425815458815e-05, - "loss": 0.0462, - "step": 42810 - }, - { - "epoch": 1.0868130473410331, - "grad_norm": 0.70237797498703, - "learning_rate": 1.2754579684393113e-05, - "loss": 0.0579, - "step": 42815 - }, - { - "epoch": 1.0869399670008884, - "grad_norm": 0.7083315849304199, - "learning_rate": 1.2753733553327412e-05, - "loss": 0.0677, - "step": 42820 - }, - { - "epoch": 1.0870668866607438, - "grad_norm": 0.6413589715957642, - "learning_rate": 1.2752887422261708e-05, - "loss": 0.0628, - "step": 42825 - }, - { - "epoch": 1.087193806320599, - "grad_norm": 0.5767530202865601, - "learning_rate": 1.2752041291196007e-05, - "loss": 0.046, - "step": 42830 - }, - { - "epoch": 1.0873207259804545, - "grad_norm": 1.4461615085601807, - "learning_rate": 1.2751195160130305e-05, - "loss": 0.0631, - "step": 42835 - }, - { - "epoch": 1.0874476456403097, - "grad_norm": 0.2793924808502197, - "learning_rate": 1.2750349029064604e-05, - "loss": 0.0473, - "step": 42840 - }, - { - "epoch": 1.087574565300165, - "grad_norm": 0.6172242164611816, - "learning_rate": 1.2749502897998902e-05, - "loss": 0.0515, - "step": 42845 - }, - { - "epoch": 1.0877014849600204, - "grad_norm": 0.6208865642547607, - "learning_rate": 1.2748656766933199e-05, - "loss": 0.0532, - "step": 42850 - }, - { - "epoch": 1.0878284046198756, - "grad_norm": 0.41955476999282837, - "learning_rate": 1.2747810635867497e-05, - "loss": 0.0553, - "step": 42855 - }, - { - "epoch": 1.0879553242797309, - "grad_norm": 0.3226391673088074, - "learning_rate": 1.2746964504801796e-05, - "loss": 0.0677, - "step": 42860 - }, - { - "epoch": 1.0880822439395863, - "grad_norm": 0.691662073135376, - "learning_rate": 1.2746118373736094e-05, - "loss": 0.05, - "step": 42865 - }, - { - "epoch": 1.0882091635994415, - "grad_norm": 1.0402472019195557, - "learning_rate": 1.274527224267039e-05, - "loss": 0.065, - "step": 42870 - }, - { - "epoch": 1.0883360832592968, - "grad_norm": 0.331019788980484, - "learning_rate": 1.2744426111604689e-05, - "loss": 0.0532, - "step": 42875 - }, - { - "epoch": 1.0884630029191522, - "grad_norm": 0.5550357103347778, - "learning_rate": 1.2743579980538987e-05, - "loss": 0.0557, - "step": 42880 - }, - { - "epoch": 1.0885899225790074, - "grad_norm": 0.5075092911720276, - "learning_rate": 1.2742733849473286e-05, - "loss": 0.0463, - "step": 42885 - }, - { - "epoch": 1.088716842238863, - "grad_norm": 0.8576421141624451, - "learning_rate": 1.2741887718407583e-05, - "loss": 0.0711, - "step": 42890 - }, - { - "epoch": 1.0888437618987181, - "grad_norm": 0.7788500785827637, - "learning_rate": 1.2741041587341881e-05, - "loss": 0.0457, - "step": 42895 - }, - { - "epoch": 1.0889706815585734, - "grad_norm": 0.750716507434845, - "learning_rate": 1.274019545627618e-05, - "loss": 0.063, - "step": 42900 - }, - { - "epoch": 1.0890976012184288, - "grad_norm": 0.4234044551849365, - "learning_rate": 1.2739349325210478e-05, - "loss": 0.0512, - "step": 42905 - }, - { - "epoch": 1.089224520878284, - "grad_norm": 0.5040553212165833, - "learning_rate": 1.2738503194144773e-05, - "loss": 0.0467, - "step": 42910 - }, - { - "epoch": 1.0893514405381393, - "grad_norm": 0.3420347273349762, - "learning_rate": 1.2737657063079071e-05, - "loss": 0.0632, - "step": 42915 - }, - { - "epoch": 1.0894783601979947, - "grad_norm": 0.49469098448753357, - "learning_rate": 1.273681093201337e-05, - "loss": 0.0474, - "step": 42920 - }, - { - "epoch": 1.08960527985785, - "grad_norm": 0.5089055299758911, - "learning_rate": 1.273596480094767e-05, - "loss": 0.0571, - "step": 42925 - }, - { - "epoch": 1.0897321995177054, - "grad_norm": 0.2505168616771698, - "learning_rate": 1.2735118669881965e-05, - "loss": 0.0535, - "step": 42930 - }, - { - "epoch": 1.0898591191775606, - "grad_norm": 0.623157262802124, - "learning_rate": 1.2734272538816263e-05, - "loss": 0.0544, - "step": 42935 - }, - { - "epoch": 1.0899860388374158, - "grad_norm": 0.5219603180885315, - "learning_rate": 1.2733426407750562e-05, - "loss": 0.0687, - "step": 42940 - }, - { - "epoch": 1.0901129584972713, - "grad_norm": 0.646746039390564, - "learning_rate": 1.273258027668486e-05, - "loss": 0.0594, - "step": 42945 - }, - { - "epoch": 1.0902398781571265, - "grad_norm": 0.36872977018356323, - "learning_rate": 1.2731734145619157e-05, - "loss": 0.054, - "step": 42950 - }, - { - "epoch": 1.0903667978169818, - "grad_norm": 0.4411577582359314, - "learning_rate": 1.2730888014553455e-05, - "loss": 0.0606, - "step": 42955 - }, - { - "epoch": 1.0904937174768372, - "grad_norm": 0.3765312135219574, - "learning_rate": 1.2730041883487753e-05, - "loss": 0.0577, - "step": 42960 - }, - { - "epoch": 1.0906206371366924, - "grad_norm": 0.6534539461135864, - "learning_rate": 1.2729195752422052e-05, - "loss": 0.0538, - "step": 42965 - }, - { - "epoch": 1.090747556796548, - "grad_norm": 0.674556314945221, - "learning_rate": 1.2728349621356349e-05, - "loss": 0.0617, - "step": 42970 - }, - { - "epoch": 1.0908744764564031, - "grad_norm": 1.5514971017837524, - "learning_rate": 1.2727503490290647e-05, - "loss": 0.0662, - "step": 42975 - }, - { - "epoch": 1.0910013961162583, - "grad_norm": 0.43168479204177856, - "learning_rate": 1.2726657359224945e-05, - "loss": 0.0488, - "step": 42980 - }, - { - "epoch": 1.0911283157761138, - "grad_norm": 0.3800572156906128, - "learning_rate": 1.2725811228159244e-05, - "loss": 0.0538, - "step": 42985 - }, - { - "epoch": 1.091255235435969, - "grad_norm": 0.5922189354896545, - "learning_rate": 1.272496509709354e-05, - "loss": 0.0423, - "step": 42990 - }, - { - "epoch": 1.0913821550958243, - "grad_norm": 0.2429990917444229, - "learning_rate": 1.2724118966027839e-05, - "loss": 0.0482, - "step": 42995 - }, - { - "epoch": 1.0915090747556797, - "grad_norm": 0.3378002643585205, - "learning_rate": 1.2723272834962137e-05, - "loss": 0.0574, - "step": 43000 - }, - { - "epoch": 1.091635994415535, - "grad_norm": 0.5604490041732788, - "learning_rate": 1.2722426703896436e-05, - "loss": 0.0406, - "step": 43005 - }, - { - "epoch": 1.0917629140753902, - "grad_norm": 0.4520503580570221, - "learning_rate": 1.2721580572830732e-05, - "loss": 0.0498, - "step": 43010 - }, - { - "epoch": 1.0918898337352456, - "grad_norm": 0.6386215090751648, - "learning_rate": 1.272073444176503e-05, - "loss": 0.053, - "step": 43015 - }, - { - "epoch": 1.0920167533951008, - "grad_norm": 0.4633835554122925, - "learning_rate": 1.271988831069933e-05, - "loss": 0.0479, - "step": 43020 - }, - { - "epoch": 1.0921436730549563, - "grad_norm": 0.46136966347694397, - "learning_rate": 1.2719042179633628e-05, - "loss": 0.0485, - "step": 43025 - }, - { - "epoch": 1.0922705927148115, - "grad_norm": 0.3297191262245178, - "learning_rate": 1.2718196048567924e-05, - "loss": 0.0412, - "step": 43030 - }, - { - "epoch": 1.0923975123746668, - "grad_norm": 0.3036384880542755, - "learning_rate": 1.2717349917502223e-05, - "loss": 0.0461, - "step": 43035 - }, - { - "epoch": 1.0925244320345222, - "grad_norm": 0.3970770835876465, - "learning_rate": 1.2716503786436521e-05, - "loss": 0.0738, - "step": 43040 - }, - { - "epoch": 1.0926513516943774, - "grad_norm": 0.4975200891494751, - "learning_rate": 1.271565765537082e-05, - "loss": 0.0408, - "step": 43045 - }, - { - "epoch": 1.0927782713542329, - "grad_norm": 0.45593923330307007, - "learning_rate": 1.2714811524305114e-05, - "loss": 0.0587, - "step": 43050 - }, - { - "epoch": 1.0929051910140881, - "grad_norm": 0.23416857421398163, - "learning_rate": 1.2713965393239413e-05, - "loss": 0.0373, - "step": 43055 - }, - { - "epoch": 1.0930321106739433, - "grad_norm": 0.5985181331634521, - "learning_rate": 1.2713119262173711e-05, - "loss": 0.0468, - "step": 43060 - }, - { - "epoch": 1.0931590303337988, - "grad_norm": 0.45502978563308716, - "learning_rate": 1.271227313110801e-05, - "loss": 0.0527, - "step": 43065 - }, - { - "epoch": 1.093285949993654, - "grad_norm": 0.43053919076919556, - "learning_rate": 1.2711427000042306e-05, - "loss": 0.0501, - "step": 43070 - }, - { - "epoch": 1.0934128696535093, - "grad_norm": 0.528597354888916, - "learning_rate": 1.2710580868976605e-05, - "loss": 0.037, - "step": 43075 - }, - { - "epoch": 1.0935397893133647, - "grad_norm": 0.5302170515060425, - "learning_rate": 1.2709734737910903e-05, - "loss": 0.0565, - "step": 43080 - }, - { - "epoch": 1.09366670897322, - "grad_norm": 0.5084816813468933, - "learning_rate": 1.2708888606845202e-05, - "loss": 0.0667, - "step": 43085 - }, - { - "epoch": 1.0937936286330752, - "grad_norm": 0.521334707736969, - "learning_rate": 1.2708042475779498e-05, - "loss": 0.057, - "step": 43090 - }, - { - "epoch": 1.0939205482929306, - "grad_norm": 1.4776794910430908, - "learning_rate": 1.2707196344713797e-05, - "loss": 0.0456, - "step": 43095 - }, - { - "epoch": 1.0940474679527858, - "grad_norm": 0.6335093379020691, - "learning_rate": 1.2706350213648095e-05, - "loss": 0.0463, - "step": 43100 - }, - { - "epoch": 1.0941743876126413, - "grad_norm": 0.7041542530059814, - "learning_rate": 1.2705504082582394e-05, - "loss": 0.0526, - "step": 43105 - }, - { - "epoch": 1.0943013072724965, - "grad_norm": 0.9015908241271973, - "learning_rate": 1.270465795151669e-05, - "loss": 0.0542, - "step": 43110 - }, - { - "epoch": 1.0944282269323518, - "grad_norm": 0.2784233093261719, - "learning_rate": 1.2703811820450989e-05, - "loss": 0.0849, - "step": 43115 - }, - { - "epoch": 1.0945551465922072, - "grad_norm": 0.4062497019767761, - "learning_rate": 1.2702965689385287e-05, - "loss": 0.0501, - "step": 43120 - }, - { - "epoch": 1.0946820662520624, - "grad_norm": 0.44600212574005127, - "learning_rate": 1.2702119558319585e-05, - "loss": 0.0509, - "step": 43125 - }, - { - "epoch": 1.0948089859119177, - "grad_norm": 1.2634656429290771, - "learning_rate": 1.2701273427253882e-05, - "loss": 0.072, - "step": 43130 - }, - { - "epoch": 1.0949359055717731, - "grad_norm": 0.5298677086830139, - "learning_rate": 1.270042729618818e-05, - "loss": 0.0537, - "step": 43135 - }, - { - "epoch": 1.0950628252316283, - "grad_norm": 0.6869231462478638, - "learning_rate": 1.2699581165122479e-05, - "loss": 0.0577, - "step": 43140 - }, - { - "epoch": 1.0951897448914836, - "grad_norm": 0.6668592095375061, - "learning_rate": 1.2698735034056777e-05, - "loss": 0.0751, - "step": 43145 - }, - { - "epoch": 1.095316664551339, - "grad_norm": 0.7661460638046265, - "learning_rate": 1.2697888902991074e-05, - "loss": 0.0472, - "step": 43150 - }, - { - "epoch": 1.0954435842111943, - "grad_norm": 0.33462899923324585, - "learning_rate": 1.2697042771925372e-05, - "loss": 0.0441, - "step": 43155 - }, - { - "epoch": 1.0955705038710497, - "grad_norm": 0.6177101731300354, - "learning_rate": 1.2696196640859671e-05, - "loss": 0.0649, - "step": 43160 - }, - { - "epoch": 1.095697423530905, - "grad_norm": 0.4382210969924927, - "learning_rate": 1.269535050979397e-05, - "loss": 0.0678, - "step": 43165 - }, - { - "epoch": 1.0958243431907602, - "grad_norm": 0.4558710753917694, - "learning_rate": 1.2694504378728266e-05, - "loss": 0.0651, - "step": 43170 - }, - { - "epoch": 1.0959512628506156, - "grad_norm": 0.5106247067451477, - "learning_rate": 1.2693658247662564e-05, - "loss": 0.0595, - "step": 43175 - }, - { - "epoch": 1.0960781825104708, - "grad_norm": 0.3680804669857025, - "learning_rate": 1.2692812116596863e-05, - "loss": 0.0542, - "step": 43180 - }, - { - "epoch": 1.0962051021703263, - "grad_norm": 0.4874250590801239, - "learning_rate": 1.2691965985531161e-05, - "loss": 0.0742, - "step": 43185 - }, - { - "epoch": 1.0963320218301815, - "grad_norm": 0.6860079765319824, - "learning_rate": 1.2691119854465456e-05, - "loss": 0.077, - "step": 43190 - }, - { - "epoch": 1.0964589414900368, - "grad_norm": 0.4452047049999237, - "learning_rate": 1.2690273723399755e-05, - "loss": 0.0648, - "step": 43195 - }, - { - "epoch": 1.0965858611498922, - "grad_norm": 0.6008821129798889, - "learning_rate": 1.2689427592334053e-05, - "loss": 0.0546, - "step": 43200 - }, - { - "epoch": 1.0967127808097474, - "grad_norm": 0.43728822469711304, - "learning_rate": 1.2688581461268351e-05, - "loss": 0.0521, - "step": 43205 - }, - { - "epoch": 1.0968397004696027, - "grad_norm": 0.5676054954528809, - "learning_rate": 1.2687735330202648e-05, - "loss": 0.0716, - "step": 43210 - }, - { - "epoch": 1.0969666201294581, - "grad_norm": 0.6722614169120789, - "learning_rate": 1.2686889199136947e-05, - "loss": 0.0545, - "step": 43215 - }, - { - "epoch": 1.0970935397893133, - "grad_norm": 0.8931460976600647, - "learning_rate": 1.2686043068071245e-05, - "loss": 0.052, - "step": 43220 - }, - { - "epoch": 1.0972204594491686, - "grad_norm": 0.46017706394195557, - "learning_rate": 1.2685196937005543e-05, - "loss": 0.0399, - "step": 43225 - }, - { - "epoch": 1.097347379109024, - "grad_norm": 0.9307083487510681, - "learning_rate": 1.268435080593984e-05, - "loss": 0.0571, - "step": 43230 - }, - { - "epoch": 1.0974742987688793, - "grad_norm": 0.545055627822876, - "learning_rate": 1.2683504674874138e-05, - "loss": 0.0571, - "step": 43235 - }, - { - "epoch": 1.0976012184287347, - "grad_norm": 0.3731945753097534, - "learning_rate": 1.2682658543808437e-05, - "loss": 0.0427, - "step": 43240 - }, - { - "epoch": 1.09772813808859, - "grad_norm": 0.5009360313415527, - "learning_rate": 1.2681812412742735e-05, - "loss": 0.0568, - "step": 43245 - }, - { - "epoch": 1.0978550577484452, - "grad_norm": 0.6181452870368958, - "learning_rate": 1.2680966281677032e-05, - "loss": 0.0597, - "step": 43250 - }, - { - "epoch": 1.0979819774083006, - "grad_norm": 0.39705681800842285, - "learning_rate": 1.268012015061133e-05, - "loss": 0.0362, - "step": 43255 - }, - { - "epoch": 1.0981088970681558, - "grad_norm": 0.5278443098068237, - "learning_rate": 1.2679274019545629e-05, - "loss": 0.0513, - "step": 43260 - }, - { - "epoch": 1.098235816728011, - "grad_norm": 0.8361928462982178, - "learning_rate": 1.2678427888479927e-05, - "loss": 0.0677, - "step": 43265 - }, - { - "epoch": 1.0983627363878665, - "grad_norm": 0.6105250716209412, - "learning_rate": 1.2677581757414224e-05, - "loss": 0.0643, - "step": 43270 - }, - { - "epoch": 1.0984896560477218, - "grad_norm": 0.27495014667510986, - "learning_rate": 1.2676735626348522e-05, - "loss": 0.0479, - "step": 43275 - }, - { - "epoch": 1.0986165757075772, - "grad_norm": 0.35950738191604614, - "learning_rate": 1.267588949528282e-05, - "loss": 0.0717, - "step": 43280 - }, - { - "epoch": 1.0987434953674324, - "grad_norm": 0.48001107573509216, - "learning_rate": 1.2675043364217119e-05, - "loss": 0.0526, - "step": 43285 - }, - { - "epoch": 1.0988704150272877, - "grad_norm": 0.5502468347549438, - "learning_rate": 1.2674197233151416e-05, - "loss": 0.0599, - "step": 43290 - }, - { - "epoch": 1.0989973346871431, - "grad_norm": 0.4317964017391205, - "learning_rate": 1.2673351102085714e-05, - "loss": 0.0511, - "step": 43295 - }, - { - "epoch": 1.0991242543469983, - "grad_norm": 0.6130411624908447, - "learning_rate": 1.2672504971020013e-05, - "loss": 0.0454, - "step": 43300 - }, - { - "epoch": 1.0992511740068536, - "grad_norm": 1.9165648221969604, - "learning_rate": 1.2671658839954311e-05, - "loss": 0.0491, - "step": 43305 - }, - { - "epoch": 1.099378093666709, - "grad_norm": 0.3179037272930145, - "learning_rate": 1.2670812708888606e-05, - "loss": 0.0406, - "step": 43310 - }, - { - "epoch": 1.0995050133265643, - "grad_norm": 0.3773842453956604, - "learning_rate": 1.2669966577822906e-05, - "loss": 0.0399, - "step": 43315 - }, - { - "epoch": 1.0996319329864197, - "grad_norm": 1.4410313367843628, - "learning_rate": 1.2669120446757204e-05, - "loss": 0.0482, - "step": 43320 - }, - { - "epoch": 1.099758852646275, - "grad_norm": 0.4465900659561157, - "learning_rate": 1.2668274315691503e-05, - "loss": 0.0475, - "step": 43325 - }, - { - "epoch": 1.0998857723061302, - "grad_norm": 0.3889230191707611, - "learning_rate": 1.2667428184625798e-05, - "loss": 0.0532, - "step": 43330 - }, - { - "epoch": 1.1000126919659856, - "grad_norm": 0.4456382095813751, - "learning_rate": 1.2666582053560096e-05, - "loss": 0.0519, - "step": 43335 - }, - { - "epoch": 1.1001396116258408, - "grad_norm": 0.43935176730155945, - "learning_rate": 1.2665735922494395e-05, - "loss": 0.0725, - "step": 43340 - }, - { - "epoch": 1.100266531285696, - "grad_norm": 0.5282996296882629, - "learning_rate": 1.2664889791428693e-05, - "loss": 0.0452, - "step": 43345 - }, - { - "epoch": 1.1003934509455515, - "grad_norm": 0.3641092777252197, - "learning_rate": 1.266404366036299e-05, - "loss": 0.055, - "step": 43350 - }, - { - "epoch": 1.1005203706054068, - "grad_norm": 0.537857711315155, - "learning_rate": 1.2663197529297288e-05, - "loss": 0.0677, - "step": 43355 - }, - { - "epoch": 1.100647290265262, - "grad_norm": 0.2938585877418518, - "learning_rate": 1.2662351398231587e-05, - "loss": 0.0602, - "step": 43360 - }, - { - "epoch": 1.1007742099251174, - "grad_norm": 0.8500050902366638, - "learning_rate": 1.2661505267165885e-05, - "loss": 0.0718, - "step": 43365 - }, - { - "epoch": 1.1009011295849727, - "grad_norm": 0.5758900046348572, - "learning_rate": 1.2660659136100183e-05, - "loss": 0.0398, - "step": 43370 - }, - { - "epoch": 1.101028049244828, - "grad_norm": 1.4076330661773682, - "learning_rate": 1.265981300503448e-05, - "loss": 0.0627, - "step": 43375 - }, - { - "epoch": 1.1011549689046833, - "grad_norm": 0.440023273229599, - "learning_rate": 1.2658966873968779e-05, - "loss": 0.0466, - "step": 43380 - }, - { - "epoch": 1.1012818885645386, - "grad_norm": 0.6822456121444702, - "learning_rate": 1.2658120742903077e-05, - "loss": 0.0664, - "step": 43385 - }, - { - "epoch": 1.101408808224394, - "grad_norm": 0.4510314166545868, - "learning_rate": 1.2657274611837375e-05, - "loss": 0.0499, - "step": 43390 - }, - { - "epoch": 1.1015357278842492, - "grad_norm": 0.6842350363731384, - "learning_rate": 1.2656428480771672e-05, - "loss": 0.0607, - "step": 43395 - }, - { - "epoch": 1.1016626475441045, - "grad_norm": 0.5944797992706299, - "learning_rate": 1.265558234970597e-05, - "loss": 0.0484, - "step": 43400 - }, - { - "epoch": 1.10178956720396, - "grad_norm": 0.8767343759536743, - "learning_rate": 1.2654736218640269e-05, - "loss": 0.0705, - "step": 43405 - }, - { - "epoch": 1.1019164868638152, - "grad_norm": 0.4723272919654846, - "learning_rate": 1.2653890087574567e-05, - "loss": 0.0465, - "step": 43410 - }, - { - "epoch": 1.1020434065236706, - "grad_norm": 0.599750280380249, - "learning_rate": 1.2653043956508864e-05, - "loss": 0.07, - "step": 43415 - }, - { - "epoch": 1.1021703261835258, - "grad_norm": 0.41753557324409485, - "learning_rate": 1.2652197825443162e-05, - "loss": 0.0393, - "step": 43420 - }, - { - "epoch": 1.102297245843381, - "grad_norm": 0.6172342300415039, - "learning_rate": 1.265135169437746e-05, - "loss": 0.0494, - "step": 43425 - }, - { - "epoch": 1.1024241655032365, - "grad_norm": 0.4631459712982178, - "learning_rate": 1.265050556331176e-05, - "loss": 0.07, - "step": 43430 - }, - { - "epoch": 1.1025510851630917, - "grad_norm": 0.44799497723579407, - "learning_rate": 1.2649659432246056e-05, - "loss": 0.0606, - "step": 43435 - }, - { - "epoch": 1.102678004822947, - "grad_norm": 0.4862203001976013, - "learning_rate": 1.2648813301180354e-05, - "loss": 0.0552, - "step": 43440 - }, - { - "epoch": 1.1028049244828024, - "grad_norm": 0.629924476146698, - "learning_rate": 1.2647967170114653e-05, - "loss": 0.0516, - "step": 43445 - }, - { - "epoch": 1.1029318441426577, - "grad_norm": 0.4508002698421478, - "learning_rate": 1.2647121039048951e-05, - "loss": 0.048, - "step": 43450 - }, - { - "epoch": 1.103058763802513, - "grad_norm": 0.39667943120002747, - "learning_rate": 1.2646274907983248e-05, - "loss": 0.0482, - "step": 43455 - }, - { - "epoch": 1.1031856834623683, - "grad_norm": 0.7152621746063232, - "learning_rate": 1.2645428776917546e-05, - "loss": 0.0574, - "step": 43460 - }, - { - "epoch": 1.1033126031222236, - "grad_norm": 0.684857189655304, - "learning_rate": 1.2644582645851845e-05, - "loss": 0.0649, - "step": 43465 - }, - { - "epoch": 1.103439522782079, - "grad_norm": 0.5542153716087341, - "learning_rate": 1.2643736514786143e-05, - "loss": 0.0452, - "step": 43470 - }, - { - "epoch": 1.1035664424419342, - "grad_norm": 1.2927578687667847, - "learning_rate": 1.2642890383720438e-05, - "loss": 0.0503, - "step": 43475 - }, - { - "epoch": 1.1036933621017895, - "grad_norm": 0.37921789288520813, - "learning_rate": 1.2642044252654736e-05, - "loss": 0.0561, - "step": 43480 - }, - { - "epoch": 1.103820281761645, - "grad_norm": 0.4734017550945282, - "learning_rate": 1.2641198121589035e-05, - "loss": 0.0382, - "step": 43485 - }, - { - "epoch": 1.1039472014215002, - "grad_norm": 0.4440435469150543, - "learning_rate": 1.2640351990523335e-05, - "loss": 0.0501, - "step": 43490 - }, - { - "epoch": 1.1040741210813554, - "grad_norm": 0.5940810441970825, - "learning_rate": 1.263950585945763e-05, - "loss": 0.0477, - "step": 43495 - }, - { - "epoch": 1.1042010407412108, - "grad_norm": 0.48502492904663086, - "learning_rate": 1.2638659728391928e-05, - "loss": 0.046, - "step": 43500 - }, - { - "epoch": 1.104327960401066, - "grad_norm": 0.22388999164104462, - "learning_rate": 1.2637813597326227e-05, - "loss": 0.0713, - "step": 43505 - }, - { - "epoch": 1.1044548800609215, - "grad_norm": 0.47475922107696533, - "learning_rate": 1.2636967466260525e-05, - "loss": 0.0624, - "step": 43510 - }, - { - "epoch": 1.1045817997207767, - "grad_norm": 0.4245753288269043, - "learning_rate": 1.2636121335194822e-05, - "loss": 0.0343, - "step": 43515 - }, - { - "epoch": 1.104708719380632, - "grad_norm": 0.48156219720840454, - "learning_rate": 1.263527520412912e-05, - "loss": 0.0443, - "step": 43520 - }, - { - "epoch": 1.1048356390404874, - "grad_norm": 0.3724766671657562, - "learning_rate": 1.2634429073063419e-05, - "loss": 0.0463, - "step": 43525 - }, - { - "epoch": 1.1049625587003427, - "grad_norm": 0.3570571541786194, - "learning_rate": 1.2633582941997717e-05, - "loss": 0.0592, - "step": 43530 - }, - { - "epoch": 1.105089478360198, - "grad_norm": 0.8485097289085388, - "learning_rate": 1.2632736810932014e-05, - "loss": 0.0562, - "step": 43535 - }, - { - "epoch": 1.1052163980200533, - "grad_norm": 0.3111598491668701, - "learning_rate": 1.2631890679866312e-05, - "loss": 0.0473, - "step": 43540 - }, - { - "epoch": 1.1053433176799086, - "grad_norm": 0.291252464056015, - "learning_rate": 1.263104454880061e-05, - "loss": 0.0418, - "step": 43545 - }, - { - "epoch": 1.105470237339764, - "grad_norm": 0.3402673304080963, - "learning_rate": 1.2630198417734909e-05, - "loss": 0.0539, - "step": 43550 - }, - { - "epoch": 1.1055971569996192, - "grad_norm": 0.7206306457519531, - "learning_rate": 1.2629352286669206e-05, - "loss": 0.0577, - "step": 43555 - }, - { - "epoch": 1.1057240766594745, - "grad_norm": 0.5750927329063416, - "learning_rate": 1.2628506155603504e-05, - "loss": 0.0739, - "step": 43560 - }, - { - "epoch": 1.10585099631933, - "grad_norm": 0.2595106363296509, - "learning_rate": 1.2627660024537802e-05, - "loss": 0.0564, - "step": 43565 - }, - { - "epoch": 1.1059779159791852, - "grad_norm": 0.5153880715370178, - "learning_rate": 1.2626813893472101e-05, - "loss": 0.0395, - "step": 43570 - }, - { - "epoch": 1.1061048356390404, - "grad_norm": 0.5051741600036621, - "learning_rate": 1.2625967762406398e-05, - "loss": 0.0507, - "step": 43575 - }, - { - "epoch": 1.1062317552988958, - "grad_norm": 0.3593094050884247, - "learning_rate": 1.2625121631340696e-05, - "loss": 0.0508, - "step": 43580 - }, - { - "epoch": 1.106358674958751, - "grad_norm": 0.48539555072784424, - "learning_rate": 1.2624275500274994e-05, - "loss": 0.0448, - "step": 43585 - }, - { - "epoch": 1.1064855946186065, - "grad_norm": 0.6815574765205383, - "learning_rate": 1.2623429369209293e-05, - "loss": 0.0464, - "step": 43590 - }, - { - "epoch": 1.1066125142784617, - "grad_norm": 0.5537622570991516, - "learning_rate": 1.262258323814359e-05, - "loss": 0.0389, - "step": 43595 - }, - { - "epoch": 1.106739433938317, - "grad_norm": 0.469745010137558, - "learning_rate": 1.2621737107077888e-05, - "loss": 0.0382, - "step": 43600 - }, - { - "epoch": 1.1068663535981724, - "grad_norm": 0.6139605045318604, - "learning_rate": 1.2620890976012186e-05, - "loss": 0.0576, - "step": 43605 - }, - { - "epoch": 1.1069932732580277, - "grad_norm": 0.5346916317939758, - "learning_rate": 1.2620044844946485e-05, - "loss": 0.0574, - "step": 43610 - }, - { - "epoch": 1.1071201929178829, - "grad_norm": 0.5219326615333557, - "learning_rate": 1.261919871388078e-05, - "loss": 0.0498, - "step": 43615 - }, - { - "epoch": 1.1072471125777383, - "grad_norm": 0.3434651792049408, - "learning_rate": 1.2618352582815078e-05, - "loss": 0.0404, - "step": 43620 - }, - { - "epoch": 1.1073740322375936, - "grad_norm": 0.49155381321907043, - "learning_rate": 1.2617506451749377e-05, - "loss": 0.0371, - "step": 43625 - }, - { - "epoch": 1.107500951897449, - "grad_norm": 0.46253183484077454, - "learning_rate": 1.2616660320683675e-05, - "loss": 0.0488, - "step": 43630 - }, - { - "epoch": 1.1076278715573042, - "grad_norm": 0.4149441421031952, - "learning_rate": 1.2615814189617972e-05, - "loss": 0.0429, - "step": 43635 - }, - { - "epoch": 1.1077547912171595, - "grad_norm": 0.9162107110023499, - "learning_rate": 1.261496805855227e-05, - "loss": 0.0578, - "step": 43640 - }, - { - "epoch": 1.107881710877015, - "grad_norm": 0.26620617508888245, - "learning_rate": 1.2614121927486568e-05, - "loss": 0.0493, - "step": 43645 - }, - { - "epoch": 1.1080086305368702, - "grad_norm": 0.37828657031059265, - "learning_rate": 1.2613275796420867e-05, - "loss": 0.0564, - "step": 43650 - }, - { - "epoch": 1.1081355501967254, - "grad_norm": 0.36389321088790894, - "learning_rate": 1.2612429665355164e-05, - "loss": 0.0423, - "step": 43655 - }, - { - "epoch": 1.1082624698565808, - "grad_norm": 0.37057819962501526, - "learning_rate": 1.2611583534289462e-05, - "loss": 0.0438, - "step": 43660 - }, - { - "epoch": 1.108389389516436, - "grad_norm": 0.4144640862941742, - "learning_rate": 1.261073740322376e-05, - "loss": 0.0493, - "step": 43665 - }, - { - "epoch": 1.1085163091762915, - "grad_norm": 0.3773114085197449, - "learning_rate": 1.2609891272158059e-05, - "loss": 0.0501, - "step": 43670 - }, - { - "epoch": 1.1086432288361467, - "grad_norm": 0.3933560848236084, - "learning_rate": 1.2609045141092355e-05, - "loss": 0.0519, - "step": 43675 - }, - { - "epoch": 1.108770148496002, - "grad_norm": 0.6292005777359009, - "learning_rate": 1.2608199010026654e-05, - "loss": 0.0628, - "step": 43680 - }, - { - "epoch": 1.1088970681558574, - "grad_norm": 0.6834890246391296, - "learning_rate": 1.2607352878960952e-05, - "loss": 0.0709, - "step": 43685 - }, - { - "epoch": 1.1090239878157127, - "grad_norm": 0.5275284051895142, - "learning_rate": 1.260650674789525e-05, - "loss": 0.0507, - "step": 43690 - }, - { - "epoch": 1.1091509074755679, - "grad_norm": 0.6396077275276184, - "learning_rate": 1.2605660616829547e-05, - "loss": 0.0445, - "step": 43695 - }, - { - "epoch": 1.1092778271354233, - "grad_norm": 0.48945337533950806, - "learning_rate": 1.2604814485763846e-05, - "loss": 0.05, - "step": 43700 - }, - { - "epoch": 1.1094047467952786, - "grad_norm": 0.4549679458141327, - "learning_rate": 1.2603968354698144e-05, - "loss": 0.0588, - "step": 43705 - }, - { - "epoch": 1.1095316664551338, - "grad_norm": 0.48566535115242004, - "learning_rate": 1.2603122223632443e-05, - "loss": 0.0449, - "step": 43710 - }, - { - "epoch": 1.1096585861149892, - "grad_norm": 0.35559195280075073, - "learning_rate": 1.260227609256674e-05, - "loss": 0.0529, - "step": 43715 - }, - { - "epoch": 1.1097855057748445, - "grad_norm": 0.5000796318054199, - "learning_rate": 1.2601429961501038e-05, - "loss": 0.0558, - "step": 43720 - }, - { - "epoch": 1.1099124254347, - "grad_norm": 0.7394737005233765, - "learning_rate": 1.2600583830435336e-05, - "loss": 0.0518, - "step": 43725 - }, - { - "epoch": 1.1100393450945552, - "grad_norm": 0.8451775312423706, - "learning_rate": 1.2599737699369634e-05, - "loss": 0.0502, - "step": 43730 - }, - { - "epoch": 1.1101662647544104, - "grad_norm": 0.41479864716529846, - "learning_rate": 1.259889156830393e-05, - "loss": 0.0565, - "step": 43735 - }, - { - "epoch": 1.1102931844142658, - "grad_norm": 0.46780726313591003, - "learning_rate": 1.259804543723823e-05, - "loss": 0.0717, - "step": 43740 - }, - { - "epoch": 1.110420104074121, - "grad_norm": 0.5175195336341858, - "learning_rate": 1.2597199306172528e-05, - "loss": 0.0695, - "step": 43745 - }, - { - "epoch": 1.1105470237339763, - "grad_norm": 0.3236537575721741, - "learning_rate": 1.2596353175106826e-05, - "loss": 0.0484, - "step": 43750 - }, - { - "epoch": 1.1106739433938317, - "grad_norm": 0.6930049657821655, - "learning_rate": 1.2595507044041121e-05, - "loss": 0.0693, - "step": 43755 - }, - { - "epoch": 1.110800863053687, - "grad_norm": 0.3294721841812134, - "learning_rate": 1.259466091297542e-05, - "loss": 0.0535, - "step": 43760 - }, - { - "epoch": 1.1109277827135424, - "grad_norm": 0.5089268088340759, - "learning_rate": 1.2593814781909718e-05, - "loss": 0.0447, - "step": 43765 - }, - { - "epoch": 1.1110547023733977, - "grad_norm": 0.3697616755962372, - "learning_rate": 1.2592968650844017e-05, - "loss": 0.0477, - "step": 43770 - }, - { - "epoch": 1.1111816220332529, - "grad_norm": 0.6374106407165527, - "learning_rate": 1.2592122519778313e-05, - "loss": 0.0504, - "step": 43775 - }, - { - "epoch": 1.1113085416931083, - "grad_norm": 0.4760130047798157, - "learning_rate": 1.2591276388712612e-05, - "loss": 0.0578, - "step": 43780 - }, - { - "epoch": 1.1114354613529636, - "grad_norm": 0.3770494759082794, - "learning_rate": 1.259043025764691e-05, - "loss": 0.0448, - "step": 43785 - }, - { - "epoch": 1.1115623810128188, - "grad_norm": 0.4655259847640991, - "learning_rate": 1.2589584126581209e-05, - "loss": 0.0526, - "step": 43790 - }, - { - "epoch": 1.1116893006726742, - "grad_norm": 0.8696359395980835, - "learning_rate": 1.2588737995515505e-05, - "loss": 0.0697, - "step": 43795 - }, - { - "epoch": 1.1118162203325295, - "grad_norm": 0.42933547496795654, - "learning_rate": 1.2587891864449804e-05, - "loss": 0.0499, - "step": 43800 - }, - { - "epoch": 1.111943139992385, - "grad_norm": 0.2352372705936432, - "learning_rate": 1.2587045733384102e-05, - "loss": 0.036, - "step": 43805 - }, - { - "epoch": 1.1120700596522402, - "grad_norm": 0.3568854033946991, - "learning_rate": 1.25861996023184e-05, - "loss": 0.0612, - "step": 43810 - }, - { - "epoch": 1.1121969793120954, - "grad_norm": 0.3008568584918976, - "learning_rate": 1.2585353471252697e-05, - "loss": 0.0483, - "step": 43815 - }, - { - "epoch": 1.1123238989719508, - "grad_norm": 0.46842625737190247, - "learning_rate": 1.2584507340186996e-05, - "loss": 0.0502, - "step": 43820 - }, - { - "epoch": 1.112450818631806, - "grad_norm": 0.5424591898918152, - "learning_rate": 1.2583661209121294e-05, - "loss": 0.0575, - "step": 43825 - }, - { - "epoch": 1.1125777382916613, - "grad_norm": 0.47487497329711914, - "learning_rate": 1.2582815078055592e-05, - "loss": 0.0662, - "step": 43830 - }, - { - "epoch": 1.1127046579515167, - "grad_norm": 0.7904238700866699, - "learning_rate": 1.2581968946989889e-05, - "loss": 0.0437, - "step": 43835 - }, - { - "epoch": 1.112831577611372, - "grad_norm": 0.30014121532440186, - "learning_rate": 1.2581122815924187e-05, - "loss": 0.0369, - "step": 43840 - }, - { - "epoch": 1.1129584972712272, - "grad_norm": 0.5552923083305359, - "learning_rate": 1.2580276684858486e-05, - "loss": 0.0513, - "step": 43845 - }, - { - "epoch": 1.1130854169310826, - "grad_norm": 1.420127511024475, - "learning_rate": 1.2579430553792784e-05, - "loss": 0.0532, - "step": 43850 - }, - { - "epoch": 1.1132123365909379, - "grad_norm": 0.616949737071991, - "learning_rate": 1.2578584422727081e-05, - "loss": 0.0515, - "step": 43855 - }, - { - "epoch": 1.1133392562507933, - "grad_norm": 0.4324182868003845, - "learning_rate": 1.257773829166138e-05, - "loss": 0.0525, - "step": 43860 - }, - { - "epoch": 1.1134661759106486, - "grad_norm": 0.42960092425346375, - "learning_rate": 1.2576892160595678e-05, - "loss": 0.0603, - "step": 43865 - }, - { - "epoch": 1.1135930955705038, - "grad_norm": 0.40651044249534607, - "learning_rate": 1.2576046029529976e-05, - "loss": 0.031, - "step": 43870 - }, - { - "epoch": 1.1137200152303592, - "grad_norm": 0.5171717405319214, - "learning_rate": 1.2575199898464275e-05, - "loss": 0.0639, - "step": 43875 - }, - { - "epoch": 1.1138469348902145, - "grad_norm": 0.3765088617801666, - "learning_rate": 1.2574353767398571e-05, - "loss": 0.0517, - "step": 43880 - }, - { - "epoch": 1.11397385455007, - "grad_norm": 0.47007253766059875, - "learning_rate": 1.257350763633287e-05, - "loss": 0.0395, - "step": 43885 - }, - { - "epoch": 1.1141007742099251, - "grad_norm": 0.4947088956832886, - "learning_rate": 1.2572661505267168e-05, - "loss": 0.062, - "step": 43890 - }, - { - "epoch": 1.1142276938697804, - "grad_norm": 0.4928905665874481, - "learning_rate": 1.2571815374201467e-05, - "loss": 0.0556, - "step": 43895 - }, - { - "epoch": 1.1143546135296358, - "grad_norm": 1.071855068206787, - "learning_rate": 1.2570969243135762e-05, - "loss": 0.0629, - "step": 43900 - }, - { - "epoch": 1.114481533189491, - "grad_norm": 0.5248897671699524, - "learning_rate": 1.257012311207006e-05, - "loss": 0.0568, - "step": 43905 - }, - { - "epoch": 1.1146084528493463, - "grad_norm": 0.3847649097442627, - "learning_rate": 1.2569276981004358e-05, - "loss": 0.0503, - "step": 43910 - }, - { - "epoch": 1.1147353725092017, - "grad_norm": 0.6474745869636536, - "learning_rate": 1.2568430849938658e-05, - "loss": 0.0394, - "step": 43915 - }, - { - "epoch": 1.114862292169057, - "grad_norm": 0.3798583745956421, - "learning_rate": 1.2567584718872953e-05, - "loss": 0.0515, - "step": 43920 - }, - { - "epoch": 1.1149892118289122, - "grad_norm": 0.3275640904903412, - "learning_rate": 1.2566738587807252e-05, - "loss": 0.0709, - "step": 43925 - }, - { - "epoch": 1.1151161314887676, - "grad_norm": 0.5060745477676392, - "learning_rate": 1.256589245674155e-05, - "loss": 0.041, - "step": 43930 - }, - { - "epoch": 1.1152430511486229, - "grad_norm": 0.3755316138267517, - "learning_rate": 1.2565046325675849e-05, - "loss": 0.0389, - "step": 43935 - }, - { - "epoch": 1.1153699708084783, - "grad_norm": 0.7065991163253784, - "learning_rate": 1.2564200194610145e-05, - "loss": 0.0427, - "step": 43940 - }, - { - "epoch": 1.1154968904683336, - "grad_norm": 0.3301346004009247, - "learning_rate": 1.2563354063544444e-05, - "loss": 0.0484, - "step": 43945 - }, - { - "epoch": 1.1156238101281888, - "grad_norm": 0.5258830189704895, - "learning_rate": 1.2562507932478742e-05, - "loss": 0.0652, - "step": 43950 - }, - { - "epoch": 1.1157507297880442, - "grad_norm": 0.7843687534332275, - "learning_rate": 1.256166180141304e-05, - "loss": 0.0682, - "step": 43955 - }, - { - "epoch": 1.1158776494478995, - "grad_norm": 0.47177740931510925, - "learning_rate": 1.2560815670347337e-05, - "loss": 0.0531, - "step": 43960 - }, - { - "epoch": 1.1160045691077547, - "grad_norm": 0.4714455306529999, - "learning_rate": 1.2559969539281636e-05, - "loss": 0.0487, - "step": 43965 - }, - { - "epoch": 1.1161314887676101, - "grad_norm": 0.3999737799167633, - "learning_rate": 1.2559123408215934e-05, - "loss": 0.0372, - "step": 43970 - }, - { - "epoch": 1.1162584084274654, - "grad_norm": 0.8014691472053528, - "learning_rate": 1.2558277277150232e-05, - "loss": 0.0621, - "step": 43975 - }, - { - "epoch": 1.1163853280873208, - "grad_norm": 0.40940189361572266, - "learning_rate": 1.255743114608453e-05, - "loss": 0.053, - "step": 43980 - }, - { - "epoch": 1.116512247747176, - "grad_norm": 0.3112299144268036, - "learning_rate": 1.2556585015018828e-05, - "loss": 0.032, - "step": 43985 - }, - { - "epoch": 1.1166391674070313, - "grad_norm": 0.7340404391288757, - "learning_rate": 1.2555738883953126e-05, - "loss": 0.0469, - "step": 43990 - }, - { - "epoch": 1.1167660870668867, - "grad_norm": 0.5560369491577148, - "learning_rate": 1.2554892752887424e-05, - "loss": 0.0526, - "step": 43995 - }, - { - "epoch": 1.116893006726742, - "grad_norm": 0.580773115158081, - "learning_rate": 1.2554046621821721e-05, - "loss": 0.0471, - "step": 44000 - }, - { - "epoch": 1.1170199263865972, - "grad_norm": 0.3875924050807953, - "learning_rate": 1.255320049075602e-05, - "loss": 0.0696, - "step": 44005 - }, - { - "epoch": 1.1171468460464526, - "grad_norm": 0.5625787377357483, - "learning_rate": 1.2552354359690318e-05, - "loss": 0.0551, - "step": 44010 - }, - { - "epoch": 1.1172737657063079, - "grad_norm": 0.4743764102458954, - "learning_rate": 1.2551508228624616e-05, - "loss": 0.0602, - "step": 44015 - }, - { - "epoch": 1.1174006853661633, - "grad_norm": 0.7635930180549622, - "learning_rate": 1.2550662097558913e-05, - "loss": 0.06, - "step": 44020 - }, - { - "epoch": 1.1175276050260186, - "grad_norm": 0.543179988861084, - "learning_rate": 1.2549815966493211e-05, - "loss": 0.0594, - "step": 44025 - }, - { - "epoch": 1.1176545246858738, - "grad_norm": 0.36121687293052673, - "learning_rate": 1.254896983542751e-05, - "loss": 0.046, - "step": 44030 - }, - { - "epoch": 1.1177814443457292, - "grad_norm": 0.32627245783805847, - "learning_rate": 1.2548123704361808e-05, - "loss": 0.0423, - "step": 44035 - }, - { - "epoch": 1.1179083640055845, - "grad_norm": 0.7371002435684204, - "learning_rate": 1.2547277573296103e-05, - "loss": 0.0408, - "step": 44040 - }, - { - "epoch": 1.1180352836654397, - "grad_norm": 0.5517522096633911, - "learning_rate": 1.2546431442230402e-05, - "loss": 0.0604, - "step": 44045 - }, - { - "epoch": 1.1181622033252951, - "grad_norm": 1.0582062005996704, - "learning_rate": 1.25455853111647e-05, - "loss": 0.0663, - "step": 44050 - }, - { - "epoch": 1.1182891229851504, - "grad_norm": 0.5968334078788757, - "learning_rate": 1.2544739180098998e-05, - "loss": 0.0403, - "step": 44055 - }, - { - "epoch": 1.1184160426450056, - "grad_norm": 0.35640019178390503, - "learning_rate": 1.2543893049033295e-05, - "loss": 0.0501, - "step": 44060 - }, - { - "epoch": 1.118542962304861, - "grad_norm": 0.3587621748447418, - "learning_rate": 1.2543046917967594e-05, - "loss": 0.0322, - "step": 44065 - }, - { - "epoch": 1.1186698819647163, - "grad_norm": 0.5785149335861206, - "learning_rate": 1.2542200786901892e-05, - "loss": 0.0513, - "step": 44070 - }, - { - "epoch": 1.1187968016245717, - "grad_norm": 0.549583375453949, - "learning_rate": 1.254135465583619e-05, - "loss": 0.0607, - "step": 44075 - }, - { - "epoch": 1.118923721284427, - "grad_norm": 0.6255220174789429, - "learning_rate": 1.2540508524770487e-05, - "loss": 0.0607, - "step": 44080 - }, - { - "epoch": 1.1190506409442822, - "grad_norm": 0.4887523055076599, - "learning_rate": 1.2539662393704785e-05, - "loss": 0.0529, - "step": 44085 - }, - { - "epoch": 1.1191775606041376, - "grad_norm": 0.34589090943336487, - "learning_rate": 1.2538816262639084e-05, - "loss": 0.0481, - "step": 44090 - }, - { - "epoch": 1.1193044802639929, - "grad_norm": 0.6570618748664856, - "learning_rate": 1.2537970131573382e-05, - "loss": 0.0525, - "step": 44095 - }, - { - "epoch": 1.119431399923848, - "grad_norm": 0.831450879573822, - "learning_rate": 1.2537124000507679e-05, - "loss": 0.0496, - "step": 44100 - }, - { - "epoch": 1.1195583195837036, - "grad_norm": 0.4678487479686737, - "learning_rate": 1.2536277869441977e-05, - "loss": 0.0521, - "step": 44105 - }, - { - "epoch": 1.1196852392435588, - "grad_norm": 0.4116367995738983, - "learning_rate": 1.2535431738376276e-05, - "loss": 0.044, - "step": 44110 - }, - { - "epoch": 1.1198121589034142, - "grad_norm": 0.46668344736099243, - "learning_rate": 1.2534585607310574e-05, - "loss": 0.0539, - "step": 44115 - }, - { - "epoch": 1.1199390785632695, - "grad_norm": 1.377854347229004, - "learning_rate": 1.2533739476244871e-05, - "loss": 0.0452, - "step": 44120 - }, - { - "epoch": 1.1200659982231247, - "grad_norm": 0.30428993701934814, - "learning_rate": 1.253289334517917e-05, - "loss": 0.0679, - "step": 44125 - }, - { - "epoch": 1.1201929178829801, - "grad_norm": 0.4714408218860626, - "learning_rate": 1.2532047214113468e-05, - "loss": 0.0615, - "step": 44130 - }, - { - "epoch": 1.1203198375428354, - "grad_norm": 0.5117413997650146, - "learning_rate": 1.2531201083047766e-05, - "loss": 0.0837, - "step": 44135 - }, - { - "epoch": 1.1204467572026906, - "grad_norm": 0.4515742063522339, - "learning_rate": 1.2530354951982063e-05, - "loss": 0.0604, - "step": 44140 - }, - { - "epoch": 1.120573676862546, - "grad_norm": 0.5166027545928955, - "learning_rate": 1.2529508820916361e-05, - "loss": 0.0512, - "step": 44145 - }, - { - "epoch": 1.1207005965224013, - "grad_norm": 0.6457964181900024, - "learning_rate": 1.252866268985066e-05, - "loss": 0.058, - "step": 44150 - }, - { - "epoch": 1.1208275161822567, - "grad_norm": 0.3932969570159912, - "learning_rate": 1.2527816558784958e-05, - "loss": 0.0495, - "step": 44155 - }, - { - "epoch": 1.120954435842112, - "grad_norm": 0.5344251394271851, - "learning_rate": 1.2526970427719255e-05, - "loss": 0.0465, - "step": 44160 - }, - { - "epoch": 1.1210813555019672, - "grad_norm": 0.45042720437049866, - "learning_rate": 1.2526124296653553e-05, - "loss": 0.0588, - "step": 44165 - }, - { - "epoch": 1.1212082751618226, - "grad_norm": 0.7757139205932617, - "learning_rate": 1.2525278165587852e-05, - "loss": 0.0695, - "step": 44170 - }, - { - "epoch": 1.1213351948216779, - "grad_norm": 0.3993304371833801, - "learning_rate": 1.252443203452215e-05, - "loss": 0.0675, - "step": 44175 - }, - { - "epoch": 1.121462114481533, - "grad_norm": 0.5248726606369019, - "learning_rate": 1.2523585903456445e-05, - "loss": 0.0645, - "step": 44180 - }, - { - "epoch": 1.1215890341413886, - "grad_norm": 0.4171711206436157, - "learning_rate": 1.2522739772390743e-05, - "loss": 0.0566, - "step": 44185 - }, - { - "epoch": 1.1217159538012438, - "grad_norm": 0.34507089853286743, - "learning_rate": 1.2521893641325042e-05, - "loss": 0.0477, - "step": 44190 - }, - { - "epoch": 1.121842873461099, - "grad_norm": 0.584272027015686, - "learning_rate": 1.252104751025934e-05, - "loss": 0.0518, - "step": 44195 - }, - { - "epoch": 1.1219697931209545, - "grad_norm": 0.3318333327770233, - "learning_rate": 1.2520201379193637e-05, - "loss": 0.0604, - "step": 44200 - }, - { - "epoch": 1.1220967127808097, - "grad_norm": 0.5036022067070007, - "learning_rate": 1.2519355248127935e-05, - "loss": 0.0467, - "step": 44205 - }, - { - "epoch": 1.1222236324406651, - "grad_norm": 0.40706202387809753, - "learning_rate": 1.2518509117062234e-05, - "loss": 0.0431, - "step": 44210 - }, - { - "epoch": 1.1223505521005204, - "grad_norm": 0.5295720100402832, - "learning_rate": 1.2517662985996532e-05, - "loss": 0.0397, - "step": 44215 - }, - { - "epoch": 1.1224774717603756, - "grad_norm": 0.4207756817340851, - "learning_rate": 1.2516816854930829e-05, - "loss": 0.0497, - "step": 44220 - }, - { - "epoch": 1.122604391420231, - "grad_norm": 0.48490339517593384, - "learning_rate": 1.2515970723865127e-05, - "loss": 0.0528, - "step": 44225 - }, - { - "epoch": 1.1227313110800863, - "grad_norm": 0.5717187523841858, - "learning_rate": 1.2515124592799426e-05, - "loss": 0.0471, - "step": 44230 - }, - { - "epoch": 1.1228582307399417, - "grad_norm": 0.48141902685165405, - "learning_rate": 1.2514278461733724e-05, - "loss": 0.0929, - "step": 44235 - }, - { - "epoch": 1.122985150399797, - "grad_norm": 0.29294589161872864, - "learning_rate": 1.251343233066802e-05, - "loss": 0.0395, - "step": 44240 - }, - { - "epoch": 1.1231120700596522, - "grad_norm": 0.8421854972839355, - "learning_rate": 1.2512586199602319e-05, - "loss": 0.0658, - "step": 44245 - }, - { - "epoch": 1.1232389897195076, - "grad_norm": 0.34770962595939636, - "learning_rate": 1.2511740068536617e-05, - "loss": 0.0361, - "step": 44250 - }, - { - "epoch": 1.1233659093793629, - "grad_norm": 0.31356602907180786, - "learning_rate": 1.2510893937470916e-05, - "loss": 0.0352, - "step": 44255 - }, - { - "epoch": 1.123492829039218, - "grad_norm": 0.3350070118904114, - "learning_rate": 1.2510047806405213e-05, - "loss": 0.0436, - "step": 44260 - }, - { - "epoch": 1.1236197486990736, - "grad_norm": 0.7140260338783264, - "learning_rate": 1.2509201675339511e-05, - "loss": 0.0412, - "step": 44265 - }, - { - "epoch": 1.1237466683589288, - "grad_norm": 0.28217437863349915, - "learning_rate": 1.250835554427381e-05, - "loss": 0.0444, - "step": 44270 - }, - { - "epoch": 1.123873588018784, - "grad_norm": 0.806566596031189, - "learning_rate": 1.2507509413208108e-05, - "loss": 0.0622, - "step": 44275 - }, - { - "epoch": 1.1240005076786395, - "grad_norm": 0.3467210531234741, - "learning_rate": 1.2506663282142404e-05, - "loss": 0.0474, - "step": 44280 - }, - { - "epoch": 1.1241274273384947, - "grad_norm": 0.40883490443229675, - "learning_rate": 1.2505817151076703e-05, - "loss": 0.0406, - "step": 44285 - }, - { - "epoch": 1.1242543469983501, - "grad_norm": 0.3200833797454834, - "learning_rate": 1.2504971020011001e-05, - "loss": 0.0427, - "step": 44290 - }, - { - "epoch": 1.1243812666582054, - "grad_norm": 0.6907526850700378, - "learning_rate": 1.25041248889453e-05, - "loss": 0.0455, - "step": 44295 - }, - { - "epoch": 1.1245081863180606, - "grad_norm": 0.3448645770549774, - "learning_rate": 1.2503278757879595e-05, - "loss": 0.0527, - "step": 44300 - }, - { - "epoch": 1.124635105977916, - "grad_norm": 0.7096524834632874, - "learning_rate": 1.2502432626813895e-05, - "loss": 0.067, - "step": 44305 - }, - { - "epoch": 1.1247620256377713, - "grad_norm": 0.39133280515670776, - "learning_rate": 1.2501586495748193e-05, - "loss": 0.0533, - "step": 44310 - }, - { - "epoch": 1.1248889452976265, - "grad_norm": 0.38641437888145447, - "learning_rate": 1.2500740364682492e-05, - "loss": 0.0374, - "step": 44315 - }, - { - "epoch": 1.125015864957482, - "grad_norm": 0.4215777516365051, - "learning_rate": 1.2499894233616787e-05, - "loss": 0.039, - "step": 44320 - }, - { - "epoch": 1.1251427846173372, - "grad_norm": 0.8152154684066772, - "learning_rate": 1.2499048102551085e-05, - "loss": 0.0525, - "step": 44325 - }, - { - "epoch": 1.1252697042771924, - "grad_norm": 0.6331118941307068, - "learning_rate": 1.2498201971485383e-05, - "loss": 0.049, - "step": 44330 - }, - { - "epoch": 1.1253966239370479, - "grad_norm": 0.5890223979949951, - "learning_rate": 1.2497355840419682e-05, - "loss": 0.0685, - "step": 44335 - }, - { - "epoch": 1.125523543596903, - "grad_norm": 0.40250420570373535, - "learning_rate": 1.2496509709353979e-05, - "loss": 0.0535, - "step": 44340 - }, - { - "epoch": 1.1256504632567585, - "grad_norm": 0.4972342848777771, - "learning_rate": 1.2495663578288277e-05, - "loss": 0.057, - "step": 44345 - }, - { - "epoch": 1.1257773829166138, - "grad_norm": 1.773880124092102, - "learning_rate": 1.2494817447222575e-05, - "loss": 0.0582, - "step": 44350 - }, - { - "epoch": 1.125904302576469, - "grad_norm": 0.4038180112838745, - "learning_rate": 1.2493971316156874e-05, - "loss": 0.0757, - "step": 44355 - }, - { - "epoch": 1.1260312222363245, - "grad_norm": 0.5077897906303406, - "learning_rate": 1.249312518509117e-05, - "loss": 0.0549, - "step": 44360 - }, - { - "epoch": 1.1261581418961797, - "grad_norm": 0.7027212381362915, - "learning_rate": 1.2492279054025469e-05, - "loss": 0.0405, - "step": 44365 - }, - { - "epoch": 1.1262850615560351, - "grad_norm": 0.6767634749412537, - "learning_rate": 1.2491432922959767e-05, - "loss": 0.0481, - "step": 44370 - }, - { - "epoch": 1.1264119812158904, - "grad_norm": 0.46838143467903137, - "learning_rate": 1.2490586791894066e-05, - "loss": 0.0545, - "step": 44375 - }, - { - "epoch": 1.1265389008757456, - "grad_norm": 0.7383913397789001, - "learning_rate": 1.2489740660828362e-05, - "loss": 0.0486, - "step": 44380 - }, - { - "epoch": 1.126665820535601, - "grad_norm": 0.4356353282928467, - "learning_rate": 1.248889452976266e-05, - "loss": 0.0509, - "step": 44385 - }, - { - "epoch": 1.1267927401954563, - "grad_norm": 0.3427311182022095, - "learning_rate": 1.248804839869696e-05, - "loss": 0.0357, - "step": 44390 - }, - { - "epoch": 1.1269196598553115, - "grad_norm": 0.4186464548110962, - "learning_rate": 1.2487202267631258e-05, - "loss": 0.0564, - "step": 44395 - }, - { - "epoch": 1.127046579515167, - "grad_norm": 0.4071543216705322, - "learning_rate": 1.2486356136565556e-05, - "loss": 0.0404, - "step": 44400 - }, - { - "epoch": 1.1271734991750222, - "grad_norm": 1.4595874547958374, - "learning_rate": 1.2485510005499853e-05, - "loss": 0.0475, - "step": 44405 - }, - { - "epoch": 1.1273004188348774, - "grad_norm": 0.8169602751731873, - "learning_rate": 1.2484663874434151e-05, - "loss": 0.0428, - "step": 44410 - }, - { - "epoch": 1.1274273384947329, - "grad_norm": 0.4044994115829468, - "learning_rate": 1.248381774336845e-05, - "loss": 0.0443, - "step": 44415 - }, - { - "epoch": 1.127554258154588, - "grad_norm": 0.9020789265632629, - "learning_rate": 1.2482971612302748e-05, - "loss": 0.047, - "step": 44420 - }, - { - "epoch": 1.1276811778144435, - "grad_norm": 0.6633614897727966, - "learning_rate": 1.2482125481237045e-05, - "loss": 0.0431, - "step": 44425 - }, - { - "epoch": 1.1278080974742988, - "grad_norm": 0.48653003573417664, - "learning_rate": 1.2481279350171343e-05, - "loss": 0.0681, - "step": 44430 - }, - { - "epoch": 1.127935017134154, - "grad_norm": 0.4067285656929016, - "learning_rate": 1.2480433219105641e-05, - "loss": 0.0509, - "step": 44435 - }, - { - "epoch": 1.1280619367940095, - "grad_norm": 0.3266960084438324, - "learning_rate": 1.247958708803994e-05, - "loss": 0.0546, - "step": 44440 - }, - { - "epoch": 1.1281888564538647, - "grad_norm": 0.3864859342575073, - "learning_rate": 1.2478740956974237e-05, - "loss": 0.0733, - "step": 44445 - }, - { - "epoch": 1.1283157761137201, - "grad_norm": 0.965714156627655, - "learning_rate": 1.2477894825908535e-05, - "loss": 0.0551, - "step": 44450 - }, - { - "epoch": 1.1284426957735754, - "grad_norm": 0.43959760665893555, - "learning_rate": 1.2477048694842833e-05, - "loss": 0.0635, - "step": 44455 - }, - { - "epoch": 1.1285696154334306, - "grad_norm": 0.6000515222549438, - "learning_rate": 1.2476202563777132e-05, - "loss": 0.0612, - "step": 44460 - }, - { - "epoch": 1.128696535093286, - "grad_norm": 0.40297073125839233, - "learning_rate": 1.2475356432711427e-05, - "loss": 0.0649, - "step": 44465 - }, - { - "epoch": 1.1288234547531413, - "grad_norm": 0.5928261876106262, - "learning_rate": 1.2474510301645725e-05, - "loss": 0.0722, - "step": 44470 - }, - { - "epoch": 1.1289503744129965, - "grad_norm": 0.4836398661136627, - "learning_rate": 1.2473664170580024e-05, - "loss": 0.041, - "step": 44475 - }, - { - "epoch": 1.129077294072852, - "grad_norm": 0.34154394268989563, - "learning_rate": 1.2472818039514324e-05, - "loss": 0.0473, - "step": 44480 - }, - { - "epoch": 1.1292042137327072, - "grad_norm": 0.3293076157569885, - "learning_rate": 1.2471971908448619e-05, - "loss": 0.0459, - "step": 44485 - }, - { - "epoch": 1.1293311333925624, - "grad_norm": 0.4858744144439697, - "learning_rate": 1.2471125777382917e-05, - "loss": 0.0419, - "step": 44490 - }, - { - "epoch": 1.1294580530524179, - "grad_norm": 1.0303422212600708, - "learning_rate": 1.2470279646317215e-05, - "loss": 0.0576, - "step": 44495 - }, - { - "epoch": 1.129584972712273, - "grad_norm": 0.577472984790802, - "learning_rate": 1.2469433515251514e-05, - "loss": 0.0498, - "step": 44500 - }, - { - "epoch": 1.1297118923721285, - "grad_norm": 0.5648263692855835, - "learning_rate": 1.246858738418581e-05, - "loss": 0.0581, - "step": 44505 - }, - { - "epoch": 1.1298388120319838, - "grad_norm": 0.6672887802124023, - "learning_rate": 1.2467741253120109e-05, - "loss": 0.0655, - "step": 44510 - }, - { - "epoch": 1.129965731691839, - "grad_norm": 0.7704229354858398, - "learning_rate": 1.2466895122054407e-05, - "loss": 0.053, - "step": 44515 - }, - { - "epoch": 1.1300926513516945, - "grad_norm": 0.5180334448814392, - "learning_rate": 1.2466048990988706e-05, - "loss": 0.0604, - "step": 44520 - }, - { - "epoch": 1.1302195710115497, - "grad_norm": 0.5185772180557251, - "learning_rate": 1.2465202859923002e-05, - "loss": 0.0681, - "step": 44525 - }, - { - "epoch": 1.130346490671405, - "grad_norm": 0.6789932250976562, - "learning_rate": 1.2464356728857301e-05, - "loss": 0.059, - "step": 44530 - }, - { - "epoch": 1.1304734103312604, - "grad_norm": 0.36683177947998047, - "learning_rate": 1.24635105977916e-05, - "loss": 0.0615, - "step": 44535 - }, - { - "epoch": 1.1306003299911156, - "grad_norm": 0.41864484548568726, - "learning_rate": 1.2462664466725898e-05, - "loss": 0.0477, - "step": 44540 - }, - { - "epoch": 1.1307272496509708, - "grad_norm": 0.4695507884025574, - "learning_rate": 1.2461818335660194e-05, - "loss": 0.0628, - "step": 44545 - }, - { - "epoch": 1.1308541693108263, - "grad_norm": 0.23197278380393982, - "learning_rate": 1.2460972204594493e-05, - "loss": 0.033, - "step": 44550 - }, - { - "epoch": 1.1309810889706815, - "grad_norm": 0.44211065769195557, - "learning_rate": 1.2460126073528791e-05, - "loss": 0.0523, - "step": 44555 - }, - { - "epoch": 1.131108008630537, - "grad_norm": 0.5441451668739319, - "learning_rate": 1.245927994246309e-05, - "loss": 0.0436, - "step": 44560 - }, - { - "epoch": 1.1312349282903922, - "grad_norm": 0.3024025857448578, - "learning_rate": 1.2458433811397386e-05, - "loss": 0.0433, - "step": 44565 - }, - { - "epoch": 1.1313618479502474, - "grad_norm": 0.6911824941635132, - "learning_rate": 1.2457587680331685e-05, - "loss": 0.0463, - "step": 44570 - }, - { - "epoch": 1.1314887676101029, - "grad_norm": 0.5398629903793335, - "learning_rate": 1.2456741549265983e-05, - "loss": 0.0716, - "step": 44575 - }, - { - "epoch": 1.131615687269958, - "grad_norm": 0.39086467027664185, - "learning_rate": 1.2455895418200282e-05, - "loss": 0.0614, - "step": 44580 - }, - { - "epoch": 1.1317426069298135, - "grad_norm": 0.2896938920021057, - "learning_rate": 1.2455049287134578e-05, - "loss": 0.0444, - "step": 44585 - }, - { - "epoch": 1.1318695265896688, - "grad_norm": 0.4426625967025757, - "learning_rate": 1.2454203156068877e-05, - "loss": 0.0432, - "step": 44590 - }, - { - "epoch": 1.131996446249524, - "grad_norm": 0.6854705810546875, - "learning_rate": 1.2453357025003175e-05, - "loss": 0.0587, - "step": 44595 - }, - { - "epoch": 1.1321233659093795, - "grad_norm": 0.4132881164550781, - "learning_rate": 1.2452510893937473e-05, - "loss": 0.0491, - "step": 44600 - }, - { - "epoch": 1.1322502855692347, - "grad_norm": 1.3688857555389404, - "learning_rate": 1.2451664762871768e-05, - "loss": 0.0422, - "step": 44605 - }, - { - "epoch": 1.13237720522909, - "grad_norm": 0.3072666823863983, - "learning_rate": 1.2450818631806067e-05, - "loss": 0.039, - "step": 44610 - }, - { - "epoch": 1.1325041248889454, - "grad_norm": 0.5190646648406982, - "learning_rate": 1.2449972500740365e-05, - "loss": 0.0551, - "step": 44615 - }, - { - "epoch": 1.1326310445488006, - "grad_norm": 0.4497527778148651, - "learning_rate": 1.2449126369674664e-05, - "loss": 0.0478, - "step": 44620 - }, - { - "epoch": 1.1327579642086558, - "grad_norm": 0.6427005529403687, - "learning_rate": 1.244828023860896e-05, - "loss": 0.047, - "step": 44625 - }, - { - "epoch": 1.1328848838685113, - "grad_norm": 0.5790553092956543, - "learning_rate": 1.2447434107543259e-05, - "loss": 0.0648, - "step": 44630 - }, - { - "epoch": 1.1330118035283665, - "grad_norm": 0.34975504875183105, - "learning_rate": 1.2446587976477557e-05, - "loss": 0.0458, - "step": 44635 - }, - { - "epoch": 1.133138723188222, - "grad_norm": 0.6916750073432922, - "learning_rate": 1.2445741845411856e-05, - "loss": 0.0818, - "step": 44640 - }, - { - "epoch": 1.1332656428480772, - "grad_norm": 0.41590267419815063, - "learning_rate": 1.2444895714346152e-05, - "loss": 0.0528, - "step": 44645 - }, - { - "epoch": 1.1333925625079324, - "grad_norm": 0.5530275702476501, - "learning_rate": 1.244404958328045e-05, - "loss": 0.0463, - "step": 44650 - }, - { - "epoch": 1.1335194821677879, - "grad_norm": 0.48469144105911255, - "learning_rate": 1.2443203452214749e-05, - "loss": 0.0463, - "step": 44655 - }, - { - "epoch": 1.133646401827643, - "grad_norm": 0.44549447298049927, - "learning_rate": 1.2442357321149047e-05, - "loss": 0.0433, - "step": 44660 - }, - { - "epoch": 1.1337733214874983, - "grad_norm": 0.4469276964664459, - "learning_rate": 1.2441511190083344e-05, - "loss": 0.0623, - "step": 44665 - }, - { - "epoch": 1.1339002411473538, - "grad_norm": 0.573004961013794, - "learning_rate": 1.2440665059017643e-05, - "loss": 0.0533, - "step": 44670 - }, - { - "epoch": 1.134027160807209, - "grad_norm": 0.4197206497192383, - "learning_rate": 1.2439818927951941e-05, - "loss": 0.0484, - "step": 44675 - }, - { - "epoch": 1.1341540804670642, - "grad_norm": 1.3997578620910645, - "learning_rate": 1.243897279688624e-05, - "loss": 0.0681, - "step": 44680 - }, - { - "epoch": 1.1342810001269197, - "grad_norm": 0.8051838278770447, - "learning_rate": 1.2438126665820536e-05, - "loss": 0.0831, - "step": 44685 - }, - { - "epoch": 1.134407919786775, - "grad_norm": 0.42057064175605774, - "learning_rate": 1.2437280534754834e-05, - "loss": 0.0496, - "step": 44690 - }, - { - "epoch": 1.1345348394466304, - "grad_norm": 0.261934757232666, - "learning_rate": 1.2436434403689133e-05, - "loss": 0.053, - "step": 44695 - }, - { - "epoch": 1.1346617591064856, - "grad_norm": 0.4998563230037689, - "learning_rate": 1.2435588272623431e-05, - "loss": 0.0629, - "step": 44700 - }, - { - "epoch": 1.1347886787663408, - "grad_norm": 0.4371684789657593, - "learning_rate": 1.2434742141557728e-05, - "loss": 0.046, - "step": 44705 - }, - { - "epoch": 1.1349155984261963, - "grad_norm": 0.514193594455719, - "learning_rate": 1.2433896010492026e-05, - "loss": 0.067, - "step": 44710 - }, - { - "epoch": 1.1350425180860515, - "grad_norm": 0.6377493143081665, - "learning_rate": 1.2433049879426325e-05, - "loss": 0.0605, - "step": 44715 - }, - { - "epoch": 1.135169437745907, - "grad_norm": 0.5883370637893677, - "learning_rate": 1.2432203748360623e-05, - "loss": 0.0435, - "step": 44720 - }, - { - "epoch": 1.1352963574057622, - "grad_norm": 0.7864179015159607, - "learning_rate": 1.243135761729492e-05, - "loss": 0.0402, - "step": 44725 - }, - { - "epoch": 1.1354232770656174, - "grad_norm": 0.3504839241504669, - "learning_rate": 1.2430511486229218e-05, - "loss": 0.0358, - "step": 44730 - }, - { - "epoch": 1.1355501967254729, - "grad_norm": 0.3276433050632477, - "learning_rate": 1.2429665355163517e-05, - "loss": 0.0503, - "step": 44735 - }, - { - "epoch": 1.135677116385328, - "grad_norm": 0.6173525452613831, - "learning_rate": 1.2428819224097815e-05, - "loss": 0.0454, - "step": 44740 - }, - { - "epoch": 1.1358040360451833, - "grad_norm": 0.5093593001365662, - "learning_rate": 1.242797309303211e-05, - "loss": 0.0487, - "step": 44745 - }, - { - "epoch": 1.1359309557050388, - "grad_norm": 1.4711940288543701, - "learning_rate": 1.2427126961966409e-05, - "loss": 0.0674, - "step": 44750 - }, - { - "epoch": 1.136057875364894, - "grad_norm": 0.5368785858154297, - "learning_rate": 1.2426280830900707e-05, - "loss": 0.0553, - "step": 44755 - }, - { - "epoch": 1.1361847950247492, - "grad_norm": 0.5299468636512756, - "learning_rate": 1.2425434699835005e-05, - "loss": 0.0405, - "step": 44760 - }, - { - "epoch": 1.1363117146846047, - "grad_norm": 0.34081101417541504, - "learning_rate": 1.2424588568769302e-05, - "loss": 0.0563, - "step": 44765 - }, - { - "epoch": 1.13643863434446, - "grad_norm": 0.29639896750450134, - "learning_rate": 1.24237424377036e-05, - "loss": 0.0387, - "step": 44770 - }, - { - "epoch": 1.1365655540043154, - "grad_norm": 0.48763564229011536, - "learning_rate": 1.2422896306637899e-05, - "loss": 0.0468, - "step": 44775 - }, - { - "epoch": 1.1366924736641706, - "grad_norm": 0.45191967487335205, - "learning_rate": 1.2422050175572197e-05, - "loss": 0.045, - "step": 44780 - }, - { - "epoch": 1.1368193933240258, - "grad_norm": 0.5769696235656738, - "learning_rate": 1.2421204044506494e-05, - "loss": 0.053, - "step": 44785 - }, - { - "epoch": 1.1369463129838813, - "grad_norm": 0.5370128750801086, - "learning_rate": 1.2420357913440792e-05, - "loss": 0.0599, - "step": 44790 - }, - { - "epoch": 1.1370732326437365, - "grad_norm": 0.7275730967521667, - "learning_rate": 1.241951178237509e-05, - "loss": 0.0674, - "step": 44795 - }, - { - "epoch": 1.137200152303592, - "grad_norm": 0.4656013250350952, - "learning_rate": 1.241866565130939e-05, - "loss": 0.0388, - "step": 44800 - }, - { - "epoch": 1.1373270719634472, - "grad_norm": 0.5228630900382996, - "learning_rate": 1.2417819520243686e-05, - "loss": 0.0806, - "step": 44805 - }, - { - "epoch": 1.1374539916233024, - "grad_norm": 0.37429681420326233, - "learning_rate": 1.2416973389177984e-05, - "loss": 0.0566, - "step": 44810 - }, - { - "epoch": 1.1375809112831576, - "grad_norm": 0.41060522198677063, - "learning_rate": 1.2416127258112283e-05, - "loss": 0.0527, - "step": 44815 - }, - { - "epoch": 1.137707830943013, - "grad_norm": 0.4963805079460144, - "learning_rate": 1.2415281127046581e-05, - "loss": 0.045, - "step": 44820 - }, - { - "epoch": 1.1378347506028683, - "grad_norm": 0.7669214606285095, - "learning_rate": 1.2414434995980878e-05, - "loss": 0.0568, - "step": 44825 - }, - { - "epoch": 1.1379616702627238, - "grad_norm": 0.32996857166290283, - "learning_rate": 1.2413588864915176e-05, - "loss": 0.0469, - "step": 44830 - }, - { - "epoch": 1.138088589922579, - "grad_norm": 0.3804533779621124, - "learning_rate": 1.2412742733849475e-05, - "loss": 0.0544, - "step": 44835 - }, - { - "epoch": 1.1382155095824342, - "grad_norm": 0.8034431338310242, - "learning_rate": 1.2411896602783773e-05, - "loss": 0.0755, - "step": 44840 - }, - { - "epoch": 1.1383424292422897, - "grad_norm": 0.5794672966003418, - "learning_rate": 1.241105047171807e-05, - "loss": 0.0681, - "step": 44845 - }, - { - "epoch": 1.138469348902145, - "grad_norm": 1.0967291593551636, - "learning_rate": 1.2410204340652368e-05, - "loss": 0.054, - "step": 44850 - }, - { - "epoch": 1.1385962685620004, - "grad_norm": 0.3291395306587219, - "learning_rate": 1.2409358209586667e-05, - "loss": 0.0495, - "step": 44855 - }, - { - "epoch": 1.1387231882218556, - "grad_norm": 0.5030404329299927, - "learning_rate": 1.2408512078520965e-05, - "loss": 0.0564, - "step": 44860 - }, - { - "epoch": 1.1388501078817108, - "grad_norm": 0.3546353280544281, - "learning_rate": 1.240766594745526e-05, - "loss": 0.0647, - "step": 44865 - }, - { - "epoch": 1.1389770275415663, - "grad_norm": 0.5138427019119263, - "learning_rate": 1.240681981638956e-05, - "loss": 0.0647, - "step": 44870 - }, - { - "epoch": 1.1391039472014215, - "grad_norm": 0.4728758633136749, - "learning_rate": 1.2405973685323858e-05, - "loss": 0.0632, - "step": 44875 - }, - { - "epoch": 1.1392308668612767, - "grad_norm": 0.5887004733085632, - "learning_rate": 1.2405127554258157e-05, - "loss": 0.0505, - "step": 44880 - }, - { - "epoch": 1.1393577865211322, - "grad_norm": 0.27240318059921265, - "learning_rate": 1.2404281423192452e-05, - "loss": 0.0502, - "step": 44885 - }, - { - "epoch": 1.1394847061809874, - "grad_norm": 0.31376248598098755, - "learning_rate": 1.240343529212675e-05, - "loss": 0.0528, - "step": 44890 - }, - { - "epoch": 1.1396116258408426, - "grad_norm": 0.5028325319290161, - "learning_rate": 1.2402589161061049e-05, - "loss": 0.0544, - "step": 44895 - }, - { - "epoch": 1.139738545500698, - "grad_norm": 0.5239415764808655, - "learning_rate": 1.2401743029995347e-05, - "loss": 0.0598, - "step": 44900 - }, - { - "epoch": 1.1398654651605533, - "grad_norm": 0.4289778172969818, - "learning_rate": 1.2400896898929647e-05, - "loss": 0.051, - "step": 44905 - }, - { - "epoch": 1.1399923848204088, - "grad_norm": 0.4097871780395508, - "learning_rate": 1.2400050767863942e-05, - "loss": 0.0323, - "step": 44910 - }, - { - "epoch": 1.140119304480264, - "grad_norm": 0.5614014863967896, - "learning_rate": 1.239920463679824e-05, - "loss": 0.0585, - "step": 44915 - }, - { - "epoch": 1.1402462241401192, - "grad_norm": 0.730863630771637, - "learning_rate": 1.2398358505732539e-05, - "loss": 0.0585, - "step": 44920 - }, - { - "epoch": 1.1403731437999747, - "grad_norm": 0.5454990267753601, - "learning_rate": 1.2397512374666837e-05, - "loss": 0.077, - "step": 44925 - }, - { - "epoch": 1.14050006345983, - "grad_norm": 0.44220757484436035, - "learning_rate": 1.2396666243601134e-05, - "loss": 0.0532, - "step": 44930 - }, - { - "epoch": 1.1406269831196854, - "grad_norm": 0.3598521053791046, - "learning_rate": 1.2395820112535432e-05, - "loss": 0.0341, - "step": 44935 - }, - { - "epoch": 1.1407539027795406, - "grad_norm": 0.4376247823238373, - "learning_rate": 1.2394973981469731e-05, - "loss": 0.0424, - "step": 44940 - }, - { - "epoch": 1.1408808224393958, - "grad_norm": 0.4905691146850586, - "learning_rate": 1.239412785040403e-05, - "loss": 0.0776, - "step": 44945 - }, - { - "epoch": 1.1410077420992513, - "grad_norm": 0.3746574819087982, - "learning_rate": 1.2393281719338326e-05, - "loss": 0.0433, - "step": 44950 - }, - { - "epoch": 1.1411346617591065, - "grad_norm": 0.45356205105781555, - "learning_rate": 1.2392435588272624e-05, - "loss": 0.0417, - "step": 44955 - }, - { - "epoch": 1.1412615814189617, - "grad_norm": 0.4757192134857178, - "learning_rate": 1.2391589457206923e-05, - "loss": 0.0599, - "step": 44960 - }, - { - "epoch": 1.1413885010788172, - "grad_norm": 0.4824245274066925, - "learning_rate": 1.2390743326141221e-05, - "loss": 0.0626, - "step": 44965 - }, - { - "epoch": 1.1415154207386724, - "grad_norm": 0.5039149522781372, - "learning_rate": 1.2389897195075518e-05, - "loss": 0.0586, - "step": 44970 - }, - { - "epoch": 1.1416423403985276, - "grad_norm": 0.5294495224952698, - "learning_rate": 1.2389051064009816e-05, - "loss": 0.0538, - "step": 44975 - }, - { - "epoch": 1.141769260058383, - "grad_norm": 0.40989595651626587, - "learning_rate": 1.2388204932944115e-05, - "loss": 0.0608, - "step": 44980 - }, - { - "epoch": 1.1418961797182383, - "grad_norm": 0.4472127854824066, - "learning_rate": 1.2387358801878413e-05, - "loss": 0.0468, - "step": 44985 - }, - { - "epoch": 1.1420230993780938, - "grad_norm": 0.40840834379196167, - "learning_rate": 1.238651267081271e-05, - "loss": 0.0441, - "step": 44990 - }, - { - "epoch": 1.142150019037949, - "grad_norm": 0.45408111810684204, - "learning_rate": 1.2385666539747008e-05, - "loss": 0.053, - "step": 44995 - }, - { - "epoch": 1.1422769386978042, - "grad_norm": 0.40923818945884705, - "learning_rate": 1.2384820408681307e-05, - "loss": 0.059, - "step": 45000 - }, - { - "epoch": 1.1424038583576597, - "grad_norm": 0.5655351281166077, - "learning_rate": 1.2383974277615605e-05, - "loss": 0.0686, - "step": 45005 - }, - { - "epoch": 1.142530778017515, - "grad_norm": 0.5115942358970642, - "learning_rate": 1.2383128146549902e-05, - "loss": 0.0462, - "step": 45010 - }, - { - "epoch": 1.1426576976773701, - "grad_norm": 0.48032721877098083, - "learning_rate": 1.23822820154842e-05, - "loss": 0.047, - "step": 45015 - }, - { - "epoch": 1.1427846173372256, - "grad_norm": 0.5937841534614563, - "learning_rate": 1.2381435884418499e-05, - "loss": 0.0586, - "step": 45020 - }, - { - "epoch": 1.1429115369970808, - "grad_norm": 0.45257946848869324, - "learning_rate": 1.2380589753352797e-05, - "loss": 0.0608, - "step": 45025 - }, - { - "epoch": 1.143038456656936, - "grad_norm": 0.4841551184654236, - "learning_rate": 1.2379743622287092e-05, - "loss": 0.0374, - "step": 45030 - }, - { - "epoch": 1.1431653763167915, - "grad_norm": 0.6999565958976746, - "learning_rate": 1.237889749122139e-05, - "loss": 0.081, - "step": 45035 - }, - { - "epoch": 1.1432922959766467, - "grad_norm": 0.40945684909820557, - "learning_rate": 1.2378051360155689e-05, - "loss": 0.0551, - "step": 45040 - }, - { - "epoch": 1.1434192156365022, - "grad_norm": 1.1249444484710693, - "learning_rate": 1.2377205229089989e-05, - "loss": 0.0429, - "step": 45045 - }, - { - "epoch": 1.1435461352963574, - "grad_norm": 0.9723420143127441, - "learning_rate": 1.2376359098024284e-05, - "loss": 0.0737, - "step": 45050 - }, - { - "epoch": 1.1436730549562126, - "grad_norm": 0.4846292734146118, - "learning_rate": 1.2375512966958582e-05, - "loss": 0.0545, - "step": 45055 - }, - { - "epoch": 1.143799974616068, - "grad_norm": 0.5056084394454956, - "learning_rate": 1.237466683589288e-05, - "loss": 0.051, - "step": 45060 - }, - { - "epoch": 1.1439268942759233, - "grad_norm": 0.4098723828792572, - "learning_rate": 1.2373820704827179e-05, - "loss": 0.0514, - "step": 45065 - }, - { - "epoch": 1.1440538139357788, - "grad_norm": 0.6237350106239319, - "learning_rate": 1.2372974573761476e-05, - "loss": 0.0539, - "step": 45070 - }, - { - "epoch": 1.144180733595634, - "grad_norm": 0.6745663285255432, - "learning_rate": 1.2372128442695774e-05, - "loss": 0.0441, - "step": 45075 - }, - { - "epoch": 1.1443076532554892, - "grad_norm": 0.4981277883052826, - "learning_rate": 1.2371282311630073e-05, - "loss": 0.0619, - "step": 45080 - }, - { - "epoch": 1.1444345729153447, - "grad_norm": 0.7724320292472839, - "learning_rate": 1.2370436180564371e-05, - "loss": 0.0631, - "step": 45085 - }, - { - "epoch": 1.1445614925752, - "grad_norm": 0.5387504696846008, - "learning_rate": 1.2369590049498668e-05, - "loss": 0.0828, - "step": 45090 - }, - { - "epoch": 1.1446884122350551, - "grad_norm": 0.5277214050292969, - "learning_rate": 1.2368743918432966e-05, - "loss": 0.0618, - "step": 45095 - }, - { - "epoch": 1.1448153318949106, - "grad_norm": 0.664419949054718, - "learning_rate": 1.2367897787367264e-05, - "loss": 0.0532, - "step": 45100 - }, - { - "epoch": 1.1449422515547658, - "grad_norm": 2.09210205078125, - "learning_rate": 1.2367051656301563e-05, - "loss": 0.0514, - "step": 45105 - }, - { - "epoch": 1.145069171214621, - "grad_norm": 0.31030967831611633, - "learning_rate": 1.236620552523586e-05, - "loss": 0.076, - "step": 45110 - }, - { - "epoch": 1.1451960908744765, - "grad_norm": 0.33368074893951416, - "learning_rate": 1.2365359394170158e-05, - "loss": 0.0388, - "step": 45115 - }, - { - "epoch": 1.1453230105343317, - "grad_norm": 0.5805456042289734, - "learning_rate": 1.2364513263104456e-05, - "loss": 0.0549, - "step": 45120 - }, - { - "epoch": 1.1454499301941872, - "grad_norm": 0.465120404958725, - "learning_rate": 1.2363667132038755e-05, - "loss": 0.0586, - "step": 45125 - }, - { - "epoch": 1.1455768498540424, - "grad_norm": 0.5093876123428345, - "learning_rate": 1.2362821000973052e-05, - "loss": 0.0681, - "step": 45130 - }, - { - "epoch": 1.1457037695138976, - "grad_norm": 0.3782198429107666, - "learning_rate": 1.236197486990735e-05, - "loss": 0.0316, - "step": 45135 - }, - { - "epoch": 1.145830689173753, - "grad_norm": 0.40352576971054077, - "learning_rate": 1.2361128738841648e-05, - "loss": 0.0543, - "step": 45140 - }, - { - "epoch": 1.1459576088336083, - "grad_norm": 0.3535306453704834, - "learning_rate": 1.2360282607775947e-05, - "loss": 0.0491, - "step": 45145 - }, - { - "epoch": 1.1460845284934638, - "grad_norm": 0.3292386829853058, - "learning_rate": 1.2359436476710243e-05, - "loss": 0.0554, - "step": 45150 - }, - { - "epoch": 1.146211448153319, - "grad_norm": 0.34595564007759094, - "learning_rate": 1.2358590345644542e-05, - "loss": 0.0495, - "step": 45155 - }, - { - "epoch": 1.1463383678131742, - "grad_norm": 0.4772505462169647, - "learning_rate": 1.235774421457884e-05, - "loss": 0.0546, - "step": 45160 - }, - { - "epoch": 1.1464652874730294, - "grad_norm": 0.40754029154777527, - "learning_rate": 1.2356898083513139e-05, - "loss": 0.0531, - "step": 45165 - }, - { - "epoch": 1.146592207132885, - "grad_norm": 0.7055578231811523, - "learning_rate": 1.2356051952447434e-05, - "loss": 0.0597, - "step": 45170 - }, - { - "epoch": 1.1467191267927401, - "grad_norm": 0.5436520576477051, - "learning_rate": 1.2355205821381732e-05, - "loss": 0.0409, - "step": 45175 - }, - { - "epoch": 1.1468460464525956, - "grad_norm": 0.5887549519538879, - "learning_rate": 1.235435969031603e-05, - "loss": 0.0821, - "step": 45180 - }, - { - "epoch": 1.1469729661124508, - "grad_norm": 0.7870082855224609, - "learning_rate": 1.2353513559250329e-05, - "loss": 0.0477, - "step": 45185 - }, - { - "epoch": 1.147099885772306, - "grad_norm": 0.49568676948547363, - "learning_rate": 1.2352667428184626e-05, - "loss": 0.0532, - "step": 45190 - }, - { - "epoch": 1.1472268054321615, - "grad_norm": 0.4995211362838745, - "learning_rate": 1.2351821297118924e-05, - "loss": 0.0459, - "step": 45195 - }, - { - "epoch": 1.1473537250920167, - "grad_norm": 0.4400921165943146, - "learning_rate": 1.2350975166053222e-05, - "loss": 0.0426, - "step": 45200 - }, - { - "epoch": 1.1474806447518722, - "grad_norm": 0.4122545123100281, - "learning_rate": 1.235012903498752e-05, - "loss": 0.0538, - "step": 45205 - }, - { - "epoch": 1.1476075644117274, - "grad_norm": 0.49364563822746277, - "learning_rate": 1.2349282903921817e-05, - "loss": 0.0404, - "step": 45210 - }, - { - "epoch": 1.1477344840715826, - "grad_norm": 0.6261132955551147, - "learning_rate": 1.2348436772856116e-05, - "loss": 0.0599, - "step": 45215 - }, - { - "epoch": 1.147861403731438, - "grad_norm": 0.5583484172821045, - "learning_rate": 1.2347590641790414e-05, - "loss": 0.0519, - "step": 45220 - }, - { - "epoch": 1.1479883233912933, - "grad_norm": 0.4697798192501068, - "learning_rate": 1.2346744510724713e-05, - "loss": 0.0623, - "step": 45225 - }, - { - "epoch": 1.1481152430511485, - "grad_norm": 0.5087701082229614, - "learning_rate": 1.234589837965901e-05, - "loss": 0.0584, - "step": 45230 - }, - { - "epoch": 1.148242162711004, - "grad_norm": 0.38028132915496826, - "learning_rate": 1.2345052248593308e-05, - "loss": 0.0452, - "step": 45235 - }, - { - "epoch": 1.1483690823708592, - "grad_norm": 1.0578316450119019, - "learning_rate": 1.2344206117527606e-05, - "loss": 0.0546, - "step": 45240 - }, - { - "epoch": 1.1484960020307144, - "grad_norm": 0.5359830856323242, - "learning_rate": 1.2343359986461905e-05, - "loss": 0.0551, - "step": 45245 - }, - { - "epoch": 1.14862292169057, - "grad_norm": 0.7231450080871582, - "learning_rate": 1.2342513855396201e-05, - "loss": 0.0676, - "step": 45250 - }, - { - "epoch": 1.1487498413504251, - "grad_norm": 0.552224338054657, - "learning_rate": 1.23416677243305e-05, - "loss": 0.0461, - "step": 45255 - }, - { - "epoch": 1.1488767610102806, - "grad_norm": 0.7272548079490662, - "learning_rate": 1.2340821593264798e-05, - "loss": 0.0546, - "step": 45260 - }, - { - "epoch": 1.1490036806701358, - "grad_norm": 0.7458736896514893, - "learning_rate": 1.2339975462199097e-05, - "loss": 0.0759, - "step": 45265 - }, - { - "epoch": 1.149130600329991, - "grad_norm": 0.35718604922294617, - "learning_rate": 1.2339129331133393e-05, - "loss": 0.0588, - "step": 45270 - }, - { - "epoch": 1.1492575199898465, - "grad_norm": 1.0222764015197754, - "learning_rate": 1.2338283200067692e-05, - "loss": 0.061, - "step": 45275 - }, - { - "epoch": 1.1493844396497017, - "grad_norm": 0.43837764859199524, - "learning_rate": 1.233743706900199e-05, - "loss": 0.0392, - "step": 45280 - }, - { - "epoch": 1.1495113593095572, - "grad_norm": 1.5293949842453003, - "learning_rate": 1.2336590937936288e-05, - "loss": 0.0615, - "step": 45285 - }, - { - "epoch": 1.1496382789694124, - "grad_norm": 0.2999506890773773, - "learning_rate": 1.2335744806870583e-05, - "loss": 0.0471, - "step": 45290 - }, - { - "epoch": 1.1497651986292676, - "grad_norm": 0.6950856447219849, - "learning_rate": 1.2334898675804884e-05, - "loss": 0.0647, - "step": 45295 - }, - { - "epoch": 1.149892118289123, - "grad_norm": 0.5984799265861511, - "learning_rate": 1.2334052544739182e-05, - "loss": 0.0442, - "step": 45300 - }, - { - "epoch": 1.1500190379489783, - "grad_norm": 0.48928147554397583, - "learning_rate": 1.233320641367348e-05, - "loss": 0.0565, - "step": 45305 - }, - { - "epoch": 1.1501459576088335, - "grad_norm": 0.42710864543914795, - "learning_rate": 1.2332360282607775e-05, - "loss": 0.0561, - "step": 45310 - }, - { - "epoch": 1.150272877268689, - "grad_norm": 0.38290858268737793, - "learning_rate": 1.2331514151542074e-05, - "loss": 0.0615, - "step": 45315 - }, - { - "epoch": 1.1503997969285442, - "grad_norm": 0.3793279826641083, - "learning_rate": 1.2330668020476372e-05, - "loss": 0.0536, - "step": 45320 - }, - { - "epoch": 1.1505267165883994, - "grad_norm": 0.20643997192382812, - "learning_rate": 1.232982188941067e-05, - "loss": 0.0464, - "step": 45325 - }, - { - "epoch": 1.150653636248255, - "grad_norm": 0.5953384041786194, - "learning_rate": 1.2328975758344967e-05, - "loss": 0.0777, - "step": 45330 - }, - { - "epoch": 1.1507805559081101, - "grad_norm": 1.837375521659851, - "learning_rate": 1.2328129627279266e-05, - "loss": 0.0461, - "step": 45335 - }, - { - "epoch": 1.1509074755679656, - "grad_norm": 1.254894495010376, - "learning_rate": 1.2327283496213564e-05, - "loss": 0.057, - "step": 45340 - }, - { - "epoch": 1.1510343952278208, - "grad_norm": 0.3867485523223877, - "learning_rate": 1.2326437365147862e-05, - "loss": 0.0429, - "step": 45345 - }, - { - "epoch": 1.151161314887676, - "grad_norm": 0.4041844308376312, - "learning_rate": 1.232559123408216e-05, - "loss": 0.0557, - "step": 45350 - }, - { - "epoch": 1.1512882345475315, - "grad_norm": 0.56447833776474, - "learning_rate": 1.2324745103016458e-05, - "loss": 0.0496, - "step": 45355 - }, - { - "epoch": 1.1514151542073867, - "grad_norm": 0.6188561320304871, - "learning_rate": 1.2323898971950756e-05, - "loss": 0.0754, - "step": 45360 - }, - { - "epoch": 1.151542073867242, - "grad_norm": 0.540888249874115, - "learning_rate": 1.2323052840885054e-05, - "loss": 0.0494, - "step": 45365 - }, - { - "epoch": 1.1516689935270974, - "grad_norm": 0.5042921304702759, - "learning_rate": 1.2322206709819351e-05, - "loss": 0.0575, - "step": 45370 - }, - { - "epoch": 1.1517959131869526, - "grad_norm": 0.4195232689380646, - "learning_rate": 1.232136057875365e-05, - "loss": 0.0502, - "step": 45375 - }, - { - "epoch": 1.1519228328468079, - "grad_norm": 0.8155618906021118, - "learning_rate": 1.2320514447687948e-05, - "loss": 0.0396, - "step": 45380 - }, - { - "epoch": 1.1520497525066633, - "grad_norm": 0.439095139503479, - "learning_rate": 1.2319668316622246e-05, - "loss": 0.0417, - "step": 45385 - }, - { - "epoch": 1.1521766721665185, - "grad_norm": 0.4812045991420746, - "learning_rate": 1.2318822185556543e-05, - "loss": 0.0387, - "step": 45390 - }, - { - "epoch": 1.152303591826374, - "grad_norm": 0.5256929397583008, - "learning_rate": 1.2317976054490841e-05, - "loss": 0.0589, - "step": 45395 - }, - { - "epoch": 1.1524305114862292, - "grad_norm": 0.7390984892845154, - "learning_rate": 1.231712992342514e-05, - "loss": 0.0667, - "step": 45400 - }, - { - "epoch": 1.1525574311460844, - "grad_norm": 0.43867942690849304, - "learning_rate": 1.2316283792359438e-05, - "loss": 0.0416, - "step": 45405 - }, - { - "epoch": 1.15268435080594, - "grad_norm": 0.35359498858451843, - "learning_rate": 1.2315437661293735e-05, - "loss": 0.0684, - "step": 45410 - }, - { - "epoch": 1.1528112704657951, - "grad_norm": 1.8719310760498047, - "learning_rate": 1.2314591530228033e-05, - "loss": 0.0458, - "step": 45415 - }, - { - "epoch": 1.1529381901256506, - "grad_norm": 0.23878343403339386, - "learning_rate": 1.2313745399162332e-05, - "loss": 0.034, - "step": 45420 - }, - { - "epoch": 1.1530651097855058, - "grad_norm": 0.6470513939857483, - "learning_rate": 1.231289926809663e-05, - "loss": 0.0458, - "step": 45425 - }, - { - "epoch": 1.153192029445361, - "grad_norm": 0.2439955770969391, - "learning_rate": 1.2312053137030929e-05, - "loss": 0.0513, - "step": 45430 - }, - { - "epoch": 1.1533189491052165, - "grad_norm": 0.5053472518920898, - "learning_rate": 1.2311207005965225e-05, - "loss": 0.0492, - "step": 45435 - }, - { - "epoch": 1.1534458687650717, - "grad_norm": 0.5889406204223633, - "learning_rate": 1.2310360874899524e-05, - "loss": 0.0654, - "step": 45440 - }, - { - "epoch": 1.153572788424927, - "grad_norm": 0.4186616539955139, - "learning_rate": 1.2309514743833822e-05, - "loss": 0.0414, - "step": 45445 - }, - { - "epoch": 1.1536997080847824, - "grad_norm": 0.22391054034233093, - "learning_rate": 1.230866861276812e-05, - "loss": 0.035, - "step": 45450 - }, - { - "epoch": 1.1538266277446376, - "grad_norm": 0.5253928899765015, - "learning_rate": 1.2307822481702415e-05, - "loss": 0.0621, - "step": 45455 - }, - { - "epoch": 1.1539535474044929, - "grad_norm": 0.5603539347648621, - "learning_rate": 1.2306976350636714e-05, - "loss": 0.0544, - "step": 45460 - }, - { - "epoch": 1.1540804670643483, - "grad_norm": 0.7588367462158203, - "learning_rate": 1.2306130219571012e-05, - "loss": 0.0394, - "step": 45465 - }, - { - "epoch": 1.1542073867242035, - "grad_norm": 0.5547429323196411, - "learning_rate": 1.2305284088505312e-05, - "loss": 0.0507, - "step": 45470 - }, - { - "epoch": 1.154334306384059, - "grad_norm": 1.5254241228103638, - "learning_rate": 1.2304437957439607e-05, - "loss": 0.0424, - "step": 45475 - }, - { - "epoch": 1.1544612260439142, - "grad_norm": 0.5461959838867188, - "learning_rate": 1.2303591826373906e-05, - "loss": 0.0398, - "step": 45480 - }, - { - "epoch": 1.1545881457037694, - "grad_norm": 0.4635024070739746, - "learning_rate": 1.2302745695308204e-05, - "loss": 0.0563, - "step": 45485 - }, - { - "epoch": 1.154715065363625, - "grad_norm": 0.5635072588920593, - "learning_rate": 1.2301899564242503e-05, - "loss": 0.0586, - "step": 45490 - }, - { - "epoch": 1.1548419850234801, - "grad_norm": 0.551912248134613, - "learning_rate": 1.23010534331768e-05, - "loss": 0.0663, - "step": 45495 - }, - { - "epoch": 1.1549689046833356, - "grad_norm": 0.5642452239990234, - "learning_rate": 1.2300207302111098e-05, - "loss": 0.0297, - "step": 45500 - }, - { - "epoch": 1.1550958243431908, - "grad_norm": 0.7362051010131836, - "learning_rate": 1.2299361171045396e-05, - "loss": 0.051, - "step": 45505 - }, - { - "epoch": 1.155222744003046, - "grad_norm": 0.2940097153186798, - "learning_rate": 1.2298515039979694e-05, - "loss": 0.0476, - "step": 45510 - }, - { - "epoch": 1.1553496636629013, - "grad_norm": 0.36543136835098267, - "learning_rate": 1.2297668908913991e-05, - "loss": 0.0387, - "step": 45515 - }, - { - "epoch": 1.1554765833227567, - "grad_norm": 0.8393925428390503, - "learning_rate": 1.229682277784829e-05, - "loss": 0.0619, - "step": 45520 - }, - { - "epoch": 1.155603502982612, - "grad_norm": 0.3325777053833008, - "learning_rate": 1.2295976646782588e-05, - "loss": 0.0551, - "step": 45525 - }, - { - "epoch": 1.1557304226424674, - "grad_norm": 0.5995887517929077, - "learning_rate": 1.2295130515716886e-05, - "loss": 0.0534, - "step": 45530 - }, - { - "epoch": 1.1558573423023226, - "grad_norm": 0.503773033618927, - "learning_rate": 1.2294284384651183e-05, - "loss": 0.0453, - "step": 45535 - }, - { - "epoch": 1.1559842619621779, - "grad_norm": 0.3905814588069916, - "learning_rate": 1.2293438253585482e-05, - "loss": 0.0557, - "step": 45540 - }, - { - "epoch": 1.1561111816220333, - "grad_norm": 0.43706977367401123, - "learning_rate": 1.229259212251978e-05, - "loss": 0.0531, - "step": 45545 - }, - { - "epoch": 1.1562381012818885, - "grad_norm": 0.5451383590698242, - "learning_rate": 1.2291745991454078e-05, - "loss": 0.0595, - "step": 45550 - }, - { - "epoch": 1.156365020941744, - "grad_norm": 0.4398757517337799, - "learning_rate": 1.2290899860388375e-05, - "loss": 0.0597, - "step": 45555 - }, - { - "epoch": 1.1564919406015992, - "grad_norm": 0.5554032325744629, - "learning_rate": 1.2290053729322673e-05, - "loss": 0.0595, - "step": 45560 - }, - { - "epoch": 1.1566188602614544, - "grad_norm": 0.26522862911224365, - "learning_rate": 1.2289207598256972e-05, - "loss": 0.0457, - "step": 45565 - }, - { - "epoch": 1.15674577992131, - "grad_norm": 0.45199519395828247, - "learning_rate": 1.228836146719127e-05, - "loss": 0.055, - "step": 45570 - }, - { - "epoch": 1.1568726995811651, - "grad_norm": 0.4042201638221741, - "learning_rate": 1.2287515336125567e-05, - "loss": 0.0602, - "step": 45575 - }, - { - "epoch": 1.1569996192410203, - "grad_norm": 0.31569039821624756, - "learning_rate": 1.2286669205059865e-05, - "loss": 0.0547, - "step": 45580 - }, - { - "epoch": 1.1571265389008758, - "grad_norm": 0.4781976342201233, - "learning_rate": 1.2285823073994164e-05, - "loss": 0.0405, - "step": 45585 - }, - { - "epoch": 1.157253458560731, - "grad_norm": 0.39769408106803894, - "learning_rate": 1.2284976942928462e-05, - "loss": 0.0545, - "step": 45590 - }, - { - "epoch": 1.1573803782205863, - "grad_norm": 0.9112207293510437, - "learning_rate": 1.2284130811862757e-05, - "loss": 0.0558, - "step": 45595 - }, - { - "epoch": 1.1575072978804417, - "grad_norm": 0.45035475492477417, - "learning_rate": 1.2283284680797056e-05, - "loss": 0.0365, - "step": 45600 - }, - { - "epoch": 1.157634217540297, - "grad_norm": 0.7163953185081482, - "learning_rate": 1.2282438549731354e-05, - "loss": 0.0622, - "step": 45605 - }, - { - "epoch": 1.1577611372001524, - "grad_norm": 0.43341735005378723, - "learning_rate": 1.2281592418665652e-05, - "loss": 0.0546, - "step": 45610 - }, - { - "epoch": 1.1578880568600076, - "grad_norm": 0.4398091435432434, - "learning_rate": 1.2280746287599949e-05, - "loss": 0.0527, - "step": 45615 - }, - { - "epoch": 1.1580149765198628, - "grad_norm": 0.5319775342941284, - "learning_rate": 1.2279900156534247e-05, - "loss": 0.0601, - "step": 45620 - }, - { - "epoch": 1.1581418961797183, - "grad_norm": 0.2536514103412628, - "learning_rate": 1.2279054025468546e-05, - "loss": 0.0541, - "step": 45625 - }, - { - "epoch": 1.1582688158395735, - "grad_norm": 0.46632999181747437, - "learning_rate": 1.2278207894402844e-05, - "loss": 0.0543, - "step": 45630 - }, - { - "epoch": 1.158395735499429, - "grad_norm": 0.9431824088096619, - "learning_rate": 1.2277361763337141e-05, - "loss": 0.0618, - "step": 45635 - }, - { - "epoch": 1.1585226551592842, - "grad_norm": 0.7281750440597534, - "learning_rate": 1.227651563227144e-05, - "loss": 0.0512, - "step": 45640 - }, - { - "epoch": 1.1586495748191394, - "grad_norm": 0.4421047568321228, - "learning_rate": 1.2275669501205738e-05, - "loss": 0.052, - "step": 45645 - }, - { - "epoch": 1.158776494478995, - "grad_norm": 0.6964503526687622, - "learning_rate": 1.2274823370140036e-05, - "loss": 0.0413, - "step": 45650 - }, - { - "epoch": 1.1589034141388501, - "grad_norm": 1.2472997903823853, - "learning_rate": 1.2273977239074333e-05, - "loss": 0.0696, - "step": 45655 - }, - { - "epoch": 1.1590303337987053, - "grad_norm": 0.4042445421218872, - "learning_rate": 1.2273131108008631e-05, - "loss": 0.0532, - "step": 45660 - }, - { - "epoch": 1.1591572534585608, - "grad_norm": 0.5411381721496582, - "learning_rate": 1.227228497694293e-05, - "loss": 0.0461, - "step": 45665 - }, - { - "epoch": 1.159284173118416, - "grad_norm": 0.7906307578086853, - "learning_rate": 1.2271438845877228e-05, - "loss": 0.0446, - "step": 45670 - }, - { - "epoch": 1.1594110927782713, - "grad_norm": 0.39631062746047974, - "learning_rate": 1.2270592714811525e-05, - "loss": 0.0466, - "step": 45675 - }, - { - "epoch": 1.1595380124381267, - "grad_norm": 0.4852460026741028, - "learning_rate": 1.2269746583745823e-05, - "loss": 0.074, - "step": 45680 - }, - { - "epoch": 1.159664932097982, - "grad_norm": 0.5696393847465515, - "learning_rate": 1.2268900452680122e-05, - "loss": 0.0464, - "step": 45685 - }, - { - "epoch": 1.1597918517578374, - "grad_norm": 0.46405017375946045, - "learning_rate": 1.226805432161442e-05, - "loss": 0.0555, - "step": 45690 - }, - { - "epoch": 1.1599187714176926, - "grad_norm": 0.49802425503730774, - "learning_rate": 1.2267208190548717e-05, - "loss": 0.053, - "step": 45695 - }, - { - "epoch": 1.1600456910775478, - "grad_norm": 0.34535345435142517, - "learning_rate": 1.2266362059483015e-05, - "loss": 0.044, - "step": 45700 - }, - { - "epoch": 1.1601726107374033, - "grad_norm": 0.5320373773574829, - "learning_rate": 1.2265515928417314e-05, - "loss": 0.0612, - "step": 45705 - }, - { - "epoch": 1.1602995303972585, - "grad_norm": 0.36342108249664307, - "learning_rate": 1.2264669797351612e-05, - "loss": 0.0456, - "step": 45710 - }, - { - "epoch": 1.1604264500571138, - "grad_norm": 0.4477798044681549, - "learning_rate": 1.2263823666285909e-05, - "loss": 0.0693, - "step": 45715 - }, - { - "epoch": 1.1605533697169692, - "grad_norm": 0.48310139775276184, - "learning_rate": 1.2262977535220207e-05, - "loss": 0.0435, - "step": 45720 - }, - { - "epoch": 1.1606802893768244, - "grad_norm": 0.5466583967208862, - "learning_rate": 1.2262131404154505e-05, - "loss": 0.0475, - "step": 45725 - }, - { - "epoch": 1.1608072090366797, - "grad_norm": 0.2789030969142914, - "learning_rate": 1.2261285273088804e-05, - "loss": 0.0494, - "step": 45730 - }, - { - "epoch": 1.1609341286965351, - "grad_norm": 0.6827407479286194, - "learning_rate": 1.2260439142023099e-05, - "loss": 0.06, - "step": 45735 - }, - { - "epoch": 1.1610610483563903, - "grad_norm": 0.47511520981788635, - "learning_rate": 1.2259593010957397e-05, - "loss": 0.0492, - "step": 45740 - }, - { - "epoch": 1.1611879680162458, - "grad_norm": 0.3849601447582245, - "learning_rate": 1.2258746879891696e-05, - "loss": 0.0506, - "step": 45745 - }, - { - "epoch": 1.161314887676101, - "grad_norm": 0.3995727598667145, - "learning_rate": 1.2257900748825994e-05, - "loss": 0.0686, - "step": 45750 - }, - { - "epoch": 1.1614418073359563, - "grad_norm": 0.5203893184661865, - "learning_rate": 1.225705461776029e-05, - "loss": 0.0528, - "step": 45755 - }, - { - "epoch": 1.1615687269958117, - "grad_norm": 0.3326849937438965, - "learning_rate": 1.225620848669459e-05, - "loss": 0.038, - "step": 45760 - }, - { - "epoch": 1.161695646655667, - "grad_norm": 0.38185879588127136, - "learning_rate": 1.2255362355628888e-05, - "loss": 0.0452, - "step": 45765 - }, - { - "epoch": 1.1618225663155224, - "grad_norm": 0.506968080997467, - "learning_rate": 1.2254516224563186e-05, - "loss": 0.0581, - "step": 45770 - }, - { - "epoch": 1.1619494859753776, - "grad_norm": 0.7693882584571838, - "learning_rate": 1.2253670093497483e-05, - "loss": 0.0601, - "step": 45775 - }, - { - "epoch": 1.1620764056352328, - "grad_norm": 0.36541664600372314, - "learning_rate": 1.2252823962431781e-05, - "loss": 0.0614, - "step": 45780 - }, - { - "epoch": 1.1622033252950883, - "grad_norm": 0.4726105332374573, - "learning_rate": 1.225197783136608e-05, - "loss": 0.0661, - "step": 45785 - }, - { - "epoch": 1.1623302449549435, - "grad_norm": 0.5188310146331787, - "learning_rate": 1.2251131700300378e-05, - "loss": 0.0416, - "step": 45790 - }, - { - "epoch": 1.1624571646147988, - "grad_norm": 0.478426992893219, - "learning_rate": 1.2250285569234675e-05, - "loss": 0.0411, - "step": 45795 - }, - { - "epoch": 1.1625840842746542, - "grad_norm": 0.4684242308139801, - "learning_rate": 1.2249439438168973e-05, - "loss": 0.0385, - "step": 45800 - }, - { - "epoch": 1.1627110039345094, - "grad_norm": 0.4757101237773895, - "learning_rate": 1.2248593307103271e-05, - "loss": 0.0474, - "step": 45805 - }, - { - "epoch": 1.1628379235943647, - "grad_norm": 0.5181873440742493, - "learning_rate": 1.224774717603757e-05, - "loss": 0.0668, - "step": 45810 - }, - { - "epoch": 1.1629648432542201, - "grad_norm": 0.4030776917934418, - "learning_rate": 1.2246901044971867e-05, - "loss": 0.0427, - "step": 45815 - }, - { - "epoch": 1.1630917629140753, - "grad_norm": 0.559587299823761, - "learning_rate": 1.2246054913906165e-05, - "loss": 0.0536, - "step": 45820 - }, - { - "epoch": 1.1632186825739308, - "grad_norm": 0.5465173721313477, - "learning_rate": 1.2245208782840463e-05, - "loss": 0.0636, - "step": 45825 - }, - { - "epoch": 1.163345602233786, - "grad_norm": 0.2905937433242798, - "learning_rate": 1.2244362651774762e-05, - "loss": 0.0394, - "step": 45830 - }, - { - "epoch": 1.1634725218936413, - "grad_norm": 0.5870751142501831, - "learning_rate": 1.2243516520709058e-05, - "loss": 0.0512, - "step": 45835 - }, - { - "epoch": 1.1635994415534967, - "grad_norm": 0.45287850499153137, - "learning_rate": 1.2242670389643357e-05, - "loss": 0.0492, - "step": 45840 - }, - { - "epoch": 1.163726361213352, - "grad_norm": 0.7403638958930969, - "learning_rate": 1.2241824258577655e-05, - "loss": 0.045, - "step": 45845 - }, - { - "epoch": 1.1638532808732072, - "grad_norm": 0.8778479099273682, - "learning_rate": 1.2240978127511954e-05, - "loss": 0.0542, - "step": 45850 - }, - { - "epoch": 1.1639802005330626, - "grad_norm": 1.3774387836456299, - "learning_rate": 1.2240131996446249e-05, - "loss": 0.042, - "step": 45855 - }, - { - "epoch": 1.1641071201929178, - "grad_norm": 0.5244563221931458, - "learning_rate": 1.2239285865380549e-05, - "loss": 0.0538, - "step": 45860 - }, - { - "epoch": 1.164234039852773, - "grad_norm": 0.5981637835502625, - "learning_rate": 1.2238439734314847e-05, - "loss": 0.0506, - "step": 45865 - }, - { - "epoch": 1.1643609595126285, - "grad_norm": 0.5227558612823486, - "learning_rate": 1.2237593603249146e-05, - "loss": 0.0543, - "step": 45870 - }, - { - "epoch": 1.1644878791724838, - "grad_norm": 0.5167489051818848, - "learning_rate": 1.223674747218344e-05, - "loss": 0.0376, - "step": 45875 - }, - { - "epoch": 1.1646147988323392, - "grad_norm": 0.716677725315094, - "learning_rate": 1.2235901341117739e-05, - "loss": 0.0534, - "step": 45880 - }, - { - "epoch": 1.1647417184921944, - "grad_norm": 0.3491823971271515, - "learning_rate": 1.2235055210052037e-05, - "loss": 0.0582, - "step": 45885 - }, - { - "epoch": 1.1648686381520497, - "grad_norm": 0.4765452444553375, - "learning_rate": 1.2234209078986336e-05, - "loss": 0.0402, - "step": 45890 - }, - { - "epoch": 1.1649955578119051, - "grad_norm": 0.6151904463768005, - "learning_rate": 1.2233362947920632e-05, - "loss": 0.0408, - "step": 45895 - }, - { - "epoch": 1.1651224774717603, - "grad_norm": 0.646713376045227, - "learning_rate": 1.2232516816854931e-05, - "loss": 0.0548, - "step": 45900 - }, - { - "epoch": 1.1652493971316158, - "grad_norm": 0.41358599066734314, - "learning_rate": 1.223167068578923e-05, - "loss": 0.0295, - "step": 45905 - }, - { - "epoch": 1.165376316791471, - "grad_norm": 0.581523597240448, - "learning_rate": 1.2230824554723528e-05, - "loss": 0.0525, - "step": 45910 - }, - { - "epoch": 1.1655032364513263, - "grad_norm": 0.44306713342666626, - "learning_rate": 1.2229978423657824e-05, - "loss": 0.0539, - "step": 45915 - }, - { - "epoch": 1.1656301561111817, - "grad_norm": 0.4595223367214203, - "learning_rate": 1.2229132292592123e-05, - "loss": 0.0569, - "step": 45920 - }, - { - "epoch": 1.165757075771037, - "grad_norm": 0.21557019650936127, - "learning_rate": 1.2228286161526421e-05, - "loss": 0.0429, - "step": 45925 - }, - { - "epoch": 1.1658839954308922, - "grad_norm": 0.3902203440666199, - "learning_rate": 1.222744003046072e-05, - "loss": 0.0606, - "step": 45930 - }, - { - "epoch": 1.1660109150907476, - "grad_norm": 0.3142940402030945, - "learning_rate": 1.2226593899395018e-05, - "loss": 0.0477, - "step": 45935 - }, - { - "epoch": 1.1661378347506028, - "grad_norm": 0.7400667667388916, - "learning_rate": 1.2225747768329315e-05, - "loss": 0.0689, - "step": 45940 - }, - { - "epoch": 1.166264754410458, - "grad_norm": 0.41282713413238525, - "learning_rate": 1.2224901637263613e-05, - "loss": 0.0494, - "step": 45945 - }, - { - "epoch": 1.1663916740703135, - "grad_norm": 0.6214973330497742, - "learning_rate": 1.2224055506197912e-05, - "loss": 0.0608, - "step": 45950 - }, - { - "epoch": 1.1665185937301688, - "grad_norm": 0.16816961765289307, - "learning_rate": 1.222320937513221e-05, - "loss": 0.0316, - "step": 45955 - }, - { - "epoch": 1.1666455133900242, - "grad_norm": 0.4886109232902527, - "learning_rate": 1.2222363244066507e-05, - "loss": 0.0379, - "step": 45960 - }, - { - "epoch": 1.1667724330498794, - "grad_norm": 0.36689722537994385, - "learning_rate": 1.2221517113000805e-05, - "loss": 0.0505, - "step": 45965 - }, - { - "epoch": 1.1668993527097347, - "grad_norm": 0.4546849727630615, - "learning_rate": 1.2220670981935103e-05, - "loss": 0.0471, - "step": 45970 - }, - { - "epoch": 1.1670262723695901, - "grad_norm": 0.5275943875312805, - "learning_rate": 1.2219824850869402e-05, - "loss": 0.0499, - "step": 45975 - }, - { - "epoch": 1.1671531920294453, - "grad_norm": 0.40568801760673523, - "learning_rate": 1.2218978719803699e-05, - "loss": 0.0543, - "step": 45980 - }, - { - "epoch": 1.1672801116893008, - "grad_norm": 0.5737390518188477, - "learning_rate": 1.2218132588737997e-05, - "loss": 0.0512, - "step": 45985 - }, - { - "epoch": 1.167407031349156, - "grad_norm": 0.887035608291626, - "learning_rate": 1.2217286457672295e-05, - "loss": 0.0603, - "step": 45990 - }, - { - "epoch": 1.1675339510090113, - "grad_norm": 0.3877870738506317, - "learning_rate": 1.2216440326606594e-05, - "loss": 0.0684, - "step": 45995 - }, - { - "epoch": 1.1676608706688667, - "grad_norm": 0.6083777546882629, - "learning_rate": 1.221559419554089e-05, - "loss": 0.051, - "step": 46000 - }, - { - "epoch": 1.167787790328722, - "grad_norm": 0.5950170755386353, - "learning_rate": 1.2214748064475189e-05, - "loss": 0.043, - "step": 46005 - }, - { - "epoch": 1.1679147099885772, - "grad_norm": 0.788995623588562, - "learning_rate": 1.2213901933409487e-05, - "loss": 0.0526, - "step": 46010 - }, - { - "epoch": 1.1680416296484326, - "grad_norm": 0.47846561670303345, - "learning_rate": 1.2213055802343786e-05, - "loss": 0.0512, - "step": 46015 - }, - { - "epoch": 1.1681685493082878, - "grad_norm": 0.3852282166481018, - "learning_rate": 1.221220967127808e-05, - "loss": 0.0363, - "step": 46020 - }, - { - "epoch": 1.168295468968143, - "grad_norm": 0.6774734258651733, - "learning_rate": 1.2211363540212379e-05, - "loss": 0.0598, - "step": 46025 - }, - { - "epoch": 1.1684223886279985, - "grad_norm": 0.97516268491745, - "learning_rate": 1.2210517409146677e-05, - "loss": 0.0579, - "step": 46030 - }, - { - "epoch": 1.1685493082878537, - "grad_norm": 0.5187297463417053, - "learning_rate": 1.2209671278080978e-05, - "loss": 0.0393, - "step": 46035 - }, - { - "epoch": 1.1686762279477092, - "grad_norm": 0.4514966309070587, - "learning_rate": 1.2208825147015273e-05, - "loss": 0.0632, - "step": 46040 - }, - { - "epoch": 1.1688031476075644, - "grad_norm": 0.3208334743976593, - "learning_rate": 1.2207979015949571e-05, - "loss": 0.0376, - "step": 46045 - }, - { - "epoch": 1.1689300672674197, - "grad_norm": 0.35380610823631287, - "learning_rate": 1.220713288488387e-05, - "loss": 0.0418, - "step": 46050 - }, - { - "epoch": 1.169056986927275, - "grad_norm": 0.5342519283294678, - "learning_rate": 1.2206286753818168e-05, - "loss": 0.0548, - "step": 46055 - }, - { - "epoch": 1.1691839065871303, - "grad_norm": 0.5419753789901733, - "learning_rate": 1.2205440622752465e-05, - "loss": 0.0534, - "step": 46060 - }, - { - "epoch": 1.1693108262469856, - "grad_norm": 0.44387730956077576, - "learning_rate": 1.2204594491686763e-05, - "loss": 0.0433, - "step": 46065 - }, - { - "epoch": 1.169437745906841, - "grad_norm": 0.432880163192749, - "learning_rate": 1.2203748360621061e-05, - "loss": 0.0449, - "step": 46070 - }, - { - "epoch": 1.1695646655666962, - "grad_norm": 0.4935786724090576, - "learning_rate": 1.220290222955536e-05, - "loss": 0.0542, - "step": 46075 - }, - { - "epoch": 1.1696915852265515, - "grad_norm": 0.6171745657920837, - "learning_rate": 1.2202056098489656e-05, - "loss": 0.0505, - "step": 46080 - }, - { - "epoch": 1.169818504886407, - "grad_norm": 0.3941151797771454, - "learning_rate": 1.2201209967423955e-05, - "loss": 0.0465, - "step": 46085 - }, - { - "epoch": 1.1699454245462622, - "grad_norm": 0.31978318095207214, - "learning_rate": 1.2200363836358253e-05, - "loss": 0.0743, - "step": 46090 - }, - { - "epoch": 1.1700723442061176, - "grad_norm": 0.7607755661010742, - "learning_rate": 1.2199517705292552e-05, - "loss": 0.0421, - "step": 46095 - }, - { - "epoch": 1.1701992638659728, - "grad_norm": 0.3519534170627594, - "learning_rate": 1.2198671574226848e-05, - "loss": 0.0464, - "step": 46100 - }, - { - "epoch": 1.170326183525828, - "grad_norm": 0.43073588609695435, - "learning_rate": 1.2197825443161147e-05, - "loss": 0.0693, - "step": 46105 - }, - { - "epoch": 1.1704531031856835, - "grad_norm": 0.28490230441093445, - "learning_rate": 1.2196979312095445e-05, - "loss": 0.0381, - "step": 46110 - }, - { - "epoch": 1.1705800228455387, - "grad_norm": 0.6038274765014648, - "learning_rate": 1.2196133181029744e-05, - "loss": 0.0428, - "step": 46115 - }, - { - "epoch": 1.1707069425053942, - "grad_norm": 0.4173058867454529, - "learning_rate": 1.219528704996404e-05, - "loss": 0.0544, - "step": 46120 - }, - { - "epoch": 1.1708338621652494, - "grad_norm": 0.29307374358177185, - "learning_rate": 1.2194440918898339e-05, - "loss": 0.035, - "step": 46125 - }, - { - "epoch": 1.1709607818251047, - "grad_norm": 0.39065057039260864, - "learning_rate": 1.2193594787832637e-05, - "loss": 0.0546, - "step": 46130 - }, - { - "epoch": 1.17108770148496, - "grad_norm": 0.3663085699081421, - "learning_rate": 1.2192748656766935e-05, - "loss": 0.0534, - "step": 46135 - }, - { - "epoch": 1.1712146211448153, - "grad_norm": 0.4721587300300598, - "learning_rate": 1.2191902525701232e-05, - "loss": 0.0345, - "step": 46140 - }, - { - "epoch": 1.1713415408046706, - "grad_norm": 0.6377245783805847, - "learning_rate": 1.219105639463553e-05, - "loss": 0.0431, - "step": 46145 - }, - { - "epoch": 1.171468460464526, - "grad_norm": 0.5151833295822144, - "learning_rate": 1.2190210263569829e-05, - "loss": 0.0407, - "step": 46150 - }, - { - "epoch": 1.1715953801243812, - "grad_norm": 0.46106424927711487, - "learning_rate": 1.2189364132504127e-05, - "loss": 0.0609, - "step": 46155 - }, - { - "epoch": 1.1717222997842365, - "grad_norm": 0.4784712493419647, - "learning_rate": 1.2188518001438422e-05, - "loss": 0.0545, - "step": 46160 - }, - { - "epoch": 1.171849219444092, - "grad_norm": 0.5185056328773499, - "learning_rate": 1.218767187037272e-05, - "loss": 0.0518, - "step": 46165 - }, - { - "epoch": 1.1719761391039472, - "grad_norm": 1.3128308057785034, - "learning_rate": 1.218682573930702e-05, - "loss": 0.0579, - "step": 46170 - }, - { - "epoch": 1.1721030587638026, - "grad_norm": 0.3966991901397705, - "learning_rate": 1.2185979608241318e-05, - "loss": 0.0512, - "step": 46175 - }, - { - "epoch": 1.1722299784236578, - "grad_norm": 0.31310975551605225, - "learning_rate": 1.2185133477175614e-05, - "loss": 0.0453, - "step": 46180 - }, - { - "epoch": 1.172356898083513, - "grad_norm": 0.593385636806488, - "learning_rate": 1.2184287346109913e-05, - "loss": 0.0463, - "step": 46185 - }, - { - "epoch": 1.1724838177433685, - "grad_norm": 0.7350988388061523, - "learning_rate": 1.2183441215044211e-05, - "loss": 0.0633, - "step": 46190 - }, - { - "epoch": 1.1726107374032237, - "grad_norm": 0.6505928635597229, - "learning_rate": 1.218259508397851e-05, - "loss": 0.051, - "step": 46195 - }, - { - "epoch": 1.172737657063079, - "grad_norm": 0.4095378518104553, - "learning_rate": 1.2181748952912806e-05, - "loss": 0.0484, - "step": 46200 - }, - { - "epoch": 1.1728645767229344, - "grad_norm": 0.37266236543655396, - "learning_rate": 1.2180902821847105e-05, - "loss": 0.0297, - "step": 46205 - }, - { - "epoch": 1.1729914963827897, - "grad_norm": 0.3460351824760437, - "learning_rate": 1.2180056690781403e-05, - "loss": 0.0471, - "step": 46210 - }, - { - "epoch": 1.1731184160426449, - "grad_norm": 0.4772239327430725, - "learning_rate": 1.2179210559715701e-05, - "loss": 0.0518, - "step": 46215 - }, - { - "epoch": 1.1732453357025003, - "grad_norm": 0.6116874814033508, - "learning_rate": 1.2178364428649998e-05, - "loss": 0.0691, - "step": 46220 - }, - { - "epoch": 1.1733722553623556, - "grad_norm": 0.489736407995224, - "learning_rate": 1.2177518297584297e-05, - "loss": 0.0377, - "step": 46225 - }, - { - "epoch": 1.173499175022211, - "grad_norm": 0.46163126826286316, - "learning_rate": 1.2176672166518595e-05, - "loss": 0.051, - "step": 46230 - }, - { - "epoch": 1.1736260946820662, - "grad_norm": 0.49629130959510803, - "learning_rate": 1.2175826035452893e-05, - "loss": 0.0461, - "step": 46235 - }, - { - "epoch": 1.1737530143419215, - "grad_norm": 0.4801912307739258, - "learning_rate": 1.217497990438719e-05, - "loss": 0.0563, - "step": 46240 - }, - { - "epoch": 1.173879934001777, - "grad_norm": 0.7383812665939331, - "learning_rate": 1.2174133773321488e-05, - "loss": 0.0721, - "step": 46245 - }, - { - "epoch": 1.1740068536616322, - "grad_norm": 0.8737614750862122, - "learning_rate": 1.2173287642255787e-05, - "loss": 0.048, - "step": 46250 - }, - { - "epoch": 1.1741337733214876, - "grad_norm": 0.9262115955352783, - "learning_rate": 1.2172441511190085e-05, - "loss": 0.0573, - "step": 46255 - }, - { - "epoch": 1.1742606929813428, - "grad_norm": 0.5345076322555542, - "learning_rate": 1.2171595380124382e-05, - "loss": 0.0554, - "step": 46260 - }, - { - "epoch": 1.174387612641198, - "grad_norm": 0.41575899720191956, - "learning_rate": 1.217074924905868e-05, - "loss": 0.0534, - "step": 46265 - }, - { - "epoch": 1.1745145323010535, - "grad_norm": 0.6028421521186829, - "learning_rate": 1.2169903117992979e-05, - "loss": 0.0423, - "step": 46270 - }, - { - "epoch": 1.1746414519609087, - "grad_norm": 0.7841980457305908, - "learning_rate": 1.2169056986927277e-05, - "loss": 0.0467, - "step": 46275 - }, - { - "epoch": 1.174768371620764, - "grad_norm": 0.46675142645835876, - "learning_rate": 1.2168210855861574e-05, - "loss": 0.0597, - "step": 46280 - }, - { - "epoch": 1.1748952912806194, - "grad_norm": 0.587067186832428, - "learning_rate": 1.2167364724795872e-05, - "loss": 0.0464, - "step": 46285 - }, - { - "epoch": 1.1750222109404747, - "grad_norm": 0.37594085931777954, - "learning_rate": 1.216651859373017e-05, - "loss": 0.0457, - "step": 46290 - }, - { - "epoch": 1.1751491306003299, - "grad_norm": 0.4698958992958069, - "learning_rate": 1.2165672462664469e-05, - "loss": 0.0629, - "step": 46295 - }, - { - "epoch": 1.1752760502601853, - "grad_norm": 0.5682809352874756, - "learning_rate": 1.2164826331598764e-05, - "loss": 0.0606, - "step": 46300 - }, - { - "epoch": 1.1754029699200406, - "grad_norm": 0.6621158123016357, - "learning_rate": 1.2163980200533062e-05, - "loss": 0.0616, - "step": 46305 - }, - { - "epoch": 1.175529889579896, - "grad_norm": 0.4046812951564789, - "learning_rate": 1.2163134069467361e-05, - "loss": 0.0525, - "step": 46310 - }, - { - "epoch": 1.1756568092397512, - "grad_norm": 0.795898973941803, - "learning_rate": 1.216228793840166e-05, - "loss": 0.0427, - "step": 46315 - }, - { - "epoch": 1.1757837288996065, - "grad_norm": 0.5783666372299194, - "learning_rate": 1.2161441807335956e-05, - "loss": 0.0542, - "step": 46320 - }, - { - "epoch": 1.175910648559462, - "grad_norm": 0.4653014838695526, - "learning_rate": 1.2160595676270254e-05, - "loss": 0.0452, - "step": 46325 - }, - { - "epoch": 1.1760375682193172, - "grad_norm": 0.5434033870697021, - "learning_rate": 1.2159749545204553e-05, - "loss": 0.0476, - "step": 46330 - }, - { - "epoch": 1.1761644878791726, - "grad_norm": 0.6095600128173828, - "learning_rate": 1.2158903414138851e-05, - "loss": 0.0683, - "step": 46335 - }, - { - "epoch": 1.1762914075390278, - "grad_norm": 0.34382811188697815, - "learning_rate": 1.2158057283073148e-05, - "loss": 0.071, - "step": 46340 - }, - { - "epoch": 1.176418327198883, - "grad_norm": 0.48436591029167175, - "learning_rate": 1.2157211152007446e-05, - "loss": 0.051, - "step": 46345 - }, - { - "epoch": 1.1765452468587385, - "grad_norm": 0.6786342859268188, - "learning_rate": 1.2156365020941745e-05, - "loss": 0.039, - "step": 46350 - }, - { - "epoch": 1.1766721665185937, - "grad_norm": 0.8414708971977234, - "learning_rate": 1.2155518889876043e-05, - "loss": 0.048, - "step": 46355 - }, - { - "epoch": 1.176799086178449, - "grad_norm": 0.43822816014289856, - "learning_rate": 1.215467275881034e-05, - "loss": 0.0374, - "step": 46360 - }, - { - "epoch": 1.1769260058383044, - "grad_norm": 0.7844807505607605, - "learning_rate": 1.2153826627744638e-05, - "loss": 0.0598, - "step": 46365 - }, - { - "epoch": 1.1770529254981597, - "grad_norm": 0.40151169896125793, - "learning_rate": 1.2152980496678937e-05, - "loss": 0.0681, - "step": 46370 - }, - { - "epoch": 1.1771798451580149, - "grad_norm": 0.2818983793258667, - "learning_rate": 1.2152134365613235e-05, - "loss": 0.0446, - "step": 46375 - }, - { - "epoch": 1.1773067648178703, - "grad_norm": 0.3505231440067291, - "learning_rate": 1.2151288234547532e-05, - "loss": 0.0449, - "step": 46380 - }, - { - "epoch": 1.1774336844777256, - "grad_norm": 0.5552307963371277, - "learning_rate": 1.215044210348183e-05, - "loss": 0.0561, - "step": 46385 - }, - { - "epoch": 1.177560604137581, - "grad_norm": 0.4108719825744629, - "learning_rate": 1.2149595972416129e-05, - "loss": 0.0612, - "step": 46390 - }, - { - "epoch": 1.1776875237974362, - "grad_norm": 0.49706441164016724, - "learning_rate": 1.2148749841350427e-05, - "loss": 0.0511, - "step": 46395 - }, - { - "epoch": 1.1778144434572915, - "grad_norm": 0.41712096333503723, - "learning_rate": 1.2147903710284724e-05, - "loss": 0.0465, - "step": 46400 - }, - { - "epoch": 1.177941363117147, - "grad_norm": 0.3999241888523102, - "learning_rate": 1.2147057579219022e-05, - "loss": 0.0322, - "step": 46405 - }, - { - "epoch": 1.1780682827770022, - "grad_norm": 0.9710645079612732, - "learning_rate": 1.214621144815332e-05, - "loss": 0.0411, - "step": 46410 - }, - { - "epoch": 1.1781952024368574, - "grad_norm": 0.25197818875312805, - "learning_rate": 1.2145365317087619e-05, - "loss": 0.0519, - "step": 46415 - }, - { - "epoch": 1.1783221220967128, - "grad_norm": 0.3643161952495575, - "learning_rate": 1.2144519186021914e-05, - "loss": 0.0444, - "step": 46420 - }, - { - "epoch": 1.178449041756568, - "grad_norm": 0.4549102187156677, - "learning_rate": 1.2143673054956214e-05, - "loss": 0.0421, - "step": 46425 - }, - { - "epoch": 1.1785759614164233, - "grad_norm": 0.51079261302948, - "learning_rate": 1.2142826923890512e-05, - "loss": 0.0727, - "step": 46430 - }, - { - "epoch": 1.1787028810762787, - "grad_norm": 0.4649351239204407, - "learning_rate": 1.214198079282481e-05, - "loss": 0.056, - "step": 46435 - }, - { - "epoch": 1.178829800736134, - "grad_norm": 0.521880030632019, - "learning_rate": 1.2141134661759106e-05, - "loss": 0.0399, - "step": 46440 - }, - { - "epoch": 1.1789567203959894, - "grad_norm": 0.6243376731872559, - "learning_rate": 1.2140288530693404e-05, - "loss": 0.0466, - "step": 46445 - }, - { - "epoch": 1.1790836400558447, - "grad_norm": 0.3790232241153717, - "learning_rate": 1.2139442399627703e-05, - "loss": 0.0472, - "step": 46450 - }, - { - "epoch": 1.1792105597156999, - "grad_norm": 0.5610066056251526, - "learning_rate": 1.2138596268562001e-05, - "loss": 0.0524, - "step": 46455 - }, - { - "epoch": 1.1793374793755553, - "grad_norm": 0.5182737112045288, - "learning_rate": 1.2137750137496301e-05, - "loss": 0.0746, - "step": 46460 - }, - { - "epoch": 1.1794643990354106, - "grad_norm": 0.8382980823516846, - "learning_rate": 1.2136904006430596e-05, - "loss": 0.0556, - "step": 46465 - }, - { - "epoch": 1.179591318695266, - "grad_norm": 0.6687497496604919, - "learning_rate": 1.2136057875364895e-05, - "loss": 0.0535, - "step": 46470 - }, - { - "epoch": 1.1797182383551212, - "grad_norm": 0.3562037944793701, - "learning_rate": 1.2135211744299193e-05, - "loss": 0.0488, - "step": 46475 - }, - { - "epoch": 1.1798451580149765, - "grad_norm": 0.4231501519680023, - "learning_rate": 1.2134365613233491e-05, - "loss": 0.0422, - "step": 46480 - }, - { - "epoch": 1.179972077674832, - "grad_norm": 0.4198450446128845, - "learning_rate": 1.2133519482167788e-05, - "loss": 0.0723, - "step": 46485 - }, - { - "epoch": 1.1800989973346871, - "grad_norm": 0.36181649565696716, - "learning_rate": 1.2132673351102086e-05, - "loss": 0.0584, - "step": 46490 - }, - { - "epoch": 1.1802259169945424, - "grad_norm": 0.6010124087333679, - "learning_rate": 1.2131827220036385e-05, - "loss": 0.0685, - "step": 46495 - }, - { - "epoch": 1.1803528366543978, - "grad_norm": 0.49659571051597595, - "learning_rate": 1.2130981088970683e-05, - "loss": 0.049, - "step": 46500 - }, - { - "epoch": 1.180479756314253, - "grad_norm": 0.5864565372467041, - "learning_rate": 1.213013495790498e-05, - "loss": 0.0433, - "step": 46505 - }, - { - "epoch": 1.1806066759741083, - "grad_norm": 0.8984616994857788, - "learning_rate": 1.2129288826839278e-05, - "loss": 0.0653, - "step": 46510 - }, - { - "epoch": 1.1807335956339637, - "grad_norm": 0.5703023076057434, - "learning_rate": 1.2128442695773577e-05, - "loss": 0.0725, - "step": 46515 - }, - { - "epoch": 1.180860515293819, - "grad_norm": 0.6205032467842102, - "learning_rate": 1.2127596564707875e-05, - "loss": 0.0673, - "step": 46520 - }, - { - "epoch": 1.1809874349536744, - "grad_norm": 0.27382972836494446, - "learning_rate": 1.2126750433642172e-05, - "loss": 0.0523, - "step": 46525 - }, - { - "epoch": 1.1811143546135296, - "grad_norm": 0.6421423554420471, - "learning_rate": 1.212590430257647e-05, - "loss": 0.0506, - "step": 46530 - }, - { - "epoch": 1.1812412742733849, - "grad_norm": 0.4997011721134186, - "learning_rate": 1.2125058171510769e-05, - "loss": 0.0836, - "step": 46535 - }, - { - "epoch": 1.1813681939332403, - "grad_norm": 0.431621789932251, - "learning_rate": 1.2124212040445067e-05, - "loss": 0.0602, - "step": 46540 - }, - { - "epoch": 1.1814951135930956, - "grad_norm": 0.44210290908813477, - "learning_rate": 1.2123365909379364e-05, - "loss": 0.0312, - "step": 46545 - }, - { - "epoch": 1.1816220332529508, - "grad_norm": 0.4666039049625397, - "learning_rate": 1.2122519778313662e-05, - "loss": 0.0661, - "step": 46550 - }, - { - "epoch": 1.1817489529128062, - "grad_norm": 0.643256664276123, - "learning_rate": 1.212167364724796e-05, - "loss": 0.0709, - "step": 46555 - }, - { - "epoch": 1.1818758725726615, - "grad_norm": 0.4738340973854065, - "learning_rate": 1.2120827516182259e-05, - "loss": 0.0416, - "step": 46560 - }, - { - "epoch": 1.1820027922325167, - "grad_norm": 0.4335121512413025, - "learning_rate": 1.2119981385116556e-05, - "loss": 0.0569, - "step": 46565 - }, - { - "epoch": 1.1821297118923721, - "grad_norm": 0.375980019569397, - "learning_rate": 1.2119135254050854e-05, - "loss": 0.0437, - "step": 46570 - }, - { - "epoch": 1.1822566315522274, - "grad_norm": 0.38304492831230164, - "learning_rate": 1.2118289122985152e-05, - "loss": 0.0527, - "step": 46575 - }, - { - "epoch": 1.1823835512120828, - "grad_norm": 0.5649701356887817, - "learning_rate": 1.2117442991919451e-05, - "loss": 0.0441, - "step": 46580 - }, - { - "epoch": 1.182510470871938, - "grad_norm": 0.5028870105743408, - "learning_rate": 1.2116596860853746e-05, - "loss": 0.0544, - "step": 46585 - }, - { - "epoch": 1.1826373905317933, - "grad_norm": 0.4340355694293976, - "learning_rate": 1.2115750729788044e-05, - "loss": 0.0548, - "step": 46590 - }, - { - "epoch": 1.1827643101916487, - "grad_norm": 0.5675074458122253, - "learning_rate": 1.2114904598722343e-05, - "loss": 0.0349, - "step": 46595 - }, - { - "epoch": 1.182891229851504, - "grad_norm": 0.34279125928878784, - "learning_rate": 1.2114058467656643e-05, - "loss": 0.0555, - "step": 46600 - }, - { - "epoch": 1.1830181495113594, - "grad_norm": 0.3440141975879669, - "learning_rate": 1.2113212336590938e-05, - "loss": 0.053, - "step": 46605 - }, - { - "epoch": 1.1831450691712146, - "grad_norm": 0.39180102944374084, - "learning_rate": 1.2112366205525236e-05, - "loss": 0.047, - "step": 46610 - }, - { - "epoch": 1.1832719888310699, - "grad_norm": 0.5207722187042236, - "learning_rate": 1.2111520074459535e-05, - "loss": 0.0415, - "step": 46615 - }, - { - "epoch": 1.1833989084909253, - "grad_norm": 0.4585883915424347, - "learning_rate": 1.2110673943393833e-05, - "loss": 0.0436, - "step": 46620 - }, - { - "epoch": 1.1835258281507806, - "grad_norm": 0.43848130106925964, - "learning_rate": 1.210982781232813e-05, - "loss": 0.04, - "step": 46625 - }, - { - "epoch": 1.1836527478106358, - "grad_norm": 1.8804773092269897, - "learning_rate": 1.2108981681262428e-05, - "loss": 0.0701, - "step": 46630 - }, - { - "epoch": 1.1837796674704912, - "grad_norm": 0.8152101039886475, - "learning_rate": 1.2108135550196727e-05, - "loss": 0.0472, - "step": 46635 - }, - { - "epoch": 1.1839065871303465, - "grad_norm": 0.3321734070777893, - "learning_rate": 1.2107289419131025e-05, - "loss": 0.0431, - "step": 46640 - }, - { - "epoch": 1.1840335067902017, - "grad_norm": 0.3874434530735016, - "learning_rate": 1.2106443288065322e-05, - "loss": 0.0593, - "step": 46645 - }, - { - "epoch": 1.1841604264500571, - "grad_norm": 0.6017939448356628, - "learning_rate": 1.210559715699962e-05, - "loss": 0.0512, - "step": 46650 - }, - { - "epoch": 1.1842873461099124, - "grad_norm": 0.7233595252037048, - "learning_rate": 1.2104751025933918e-05, - "loss": 0.0607, - "step": 46655 - }, - { - "epoch": 1.1844142657697678, - "grad_norm": 0.36382532119750977, - "learning_rate": 1.2103904894868217e-05, - "loss": 0.0486, - "step": 46660 - }, - { - "epoch": 1.184541185429623, - "grad_norm": 0.5505540370941162, - "learning_rate": 1.2103058763802514e-05, - "loss": 0.0452, - "step": 46665 - }, - { - "epoch": 1.1846681050894783, - "grad_norm": 0.3942480981349945, - "learning_rate": 1.2102212632736812e-05, - "loss": 0.0492, - "step": 46670 - }, - { - "epoch": 1.1847950247493337, - "grad_norm": 0.6089707016944885, - "learning_rate": 1.210136650167111e-05, - "loss": 0.0418, - "step": 46675 - }, - { - "epoch": 1.184921944409189, - "grad_norm": 0.4465332329273224, - "learning_rate": 1.2100520370605409e-05, - "loss": 0.04, - "step": 46680 - }, - { - "epoch": 1.1850488640690444, - "grad_norm": 0.21609529852867126, - "learning_rate": 1.2099674239539705e-05, - "loss": 0.0615, - "step": 46685 - }, - { - "epoch": 1.1851757837288996, - "grad_norm": 0.41398024559020996, - "learning_rate": 1.2098828108474004e-05, - "loss": 0.0534, - "step": 46690 - }, - { - "epoch": 1.1853027033887549, - "grad_norm": 0.3652055859565735, - "learning_rate": 1.2097981977408302e-05, - "loss": 0.0525, - "step": 46695 - }, - { - "epoch": 1.1854296230486103, - "grad_norm": 0.45180046558380127, - "learning_rate": 1.20971358463426e-05, - "loss": 0.0505, - "step": 46700 - }, - { - "epoch": 1.1855565427084656, - "grad_norm": 0.4861522614955902, - "learning_rate": 1.2096289715276897e-05, - "loss": 0.0794, - "step": 46705 - }, - { - "epoch": 1.1856834623683208, - "grad_norm": 0.6862412691116333, - "learning_rate": 1.2095443584211196e-05, - "loss": 0.0494, - "step": 46710 - }, - { - "epoch": 1.1858103820281762, - "grad_norm": 0.6957131624221802, - "learning_rate": 1.2094597453145494e-05, - "loss": 0.0511, - "step": 46715 - }, - { - "epoch": 1.1859373016880315, - "grad_norm": 0.5479734539985657, - "learning_rate": 1.2093751322079793e-05, - "loss": 0.0544, - "step": 46720 - }, - { - "epoch": 1.1860642213478867, - "grad_norm": 0.4933122992515564, - "learning_rate": 1.2092905191014088e-05, - "loss": 0.0382, - "step": 46725 - }, - { - "epoch": 1.1861911410077421, - "grad_norm": 0.4430360198020935, - "learning_rate": 1.2092059059948386e-05, - "loss": 0.0375, - "step": 46730 - }, - { - "epoch": 1.1863180606675974, - "grad_norm": 0.5673303604125977, - "learning_rate": 1.2091212928882684e-05, - "loss": 0.0478, - "step": 46735 - }, - { - "epoch": 1.1864449803274528, - "grad_norm": 0.4458087682723999, - "learning_rate": 1.2090366797816983e-05, - "loss": 0.076, - "step": 46740 - }, - { - "epoch": 1.186571899987308, - "grad_norm": 0.3709859848022461, - "learning_rate": 1.208952066675128e-05, - "loss": 0.0558, - "step": 46745 - }, - { - "epoch": 1.1866988196471633, - "grad_norm": 0.9831788539886475, - "learning_rate": 1.2088674535685578e-05, - "loss": 0.0668, - "step": 46750 - }, - { - "epoch": 1.1868257393070187, - "grad_norm": 0.5601195693016052, - "learning_rate": 1.2087828404619876e-05, - "loss": 0.0642, - "step": 46755 - }, - { - "epoch": 1.186952658966874, - "grad_norm": 0.6115407943725586, - "learning_rate": 1.2086982273554175e-05, - "loss": 0.0496, - "step": 46760 - }, - { - "epoch": 1.1870795786267292, - "grad_norm": 0.40447455644607544, - "learning_rate": 1.2086136142488471e-05, - "loss": 0.0523, - "step": 46765 - }, - { - "epoch": 1.1872064982865846, - "grad_norm": 0.5632847547531128, - "learning_rate": 1.208529001142277e-05, - "loss": 0.0567, - "step": 46770 - }, - { - "epoch": 1.1873334179464399, - "grad_norm": 0.3946261405944824, - "learning_rate": 1.2084443880357068e-05, - "loss": 0.0611, - "step": 46775 - }, - { - "epoch": 1.187460337606295, - "grad_norm": 0.5259038209915161, - "learning_rate": 1.2083597749291367e-05, - "loss": 0.0618, - "step": 46780 - }, - { - "epoch": 1.1875872572661506, - "grad_norm": 0.380156546831131, - "learning_rate": 1.2082751618225663e-05, - "loss": 0.0453, - "step": 46785 - }, - { - "epoch": 1.1877141769260058, - "grad_norm": 0.39473357796669006, - "learning_rate": 1.2081905487159962e-05, - "loss": 0.0351, - "step": 46790 - }, - { - "epoch": 1.1878410965858612, - "grad_norm": 0.41205406188964844, - "learning_rate": 1.208105935609426e-05, - "loss": 0.0418, - "step": 46795 - }, - { - "epoch": 1.1879680162457165, - "grad_norm": 0.5065401196479797, - "learning_rate": 1.2080213225028559e-05, - "loss": 0.0655, - "step": 46800 - }, - { - "epoch": 1.1880949359055717, - "grad_norm": 0.5859723687171936, - "learning_rate": 1.2079367093962855e-05, - "loss": 0.0503, - "step": 46805 - }, - { - "epoch": 1.1882218555654271, - "grad_norm": 0.4413202702999115, - "learning_rate": 1.2078520962897154e-05, - "loss": 0.0586, - "step": 46810 - }, - { - "epoch": 1.1883487752252824, - "grad_norm": 0.4751104414463043, - "learning_rate": 1.2077674831831452e-05, - "loss": 0.0487, - "step": 46815 - }, - { - "epoch": 1.1884756948851378, - "grad_norm": 0.3376339375972748, - "learning_rate": 1.207682870076575e-05, - "loss": 0.0475, - "step": 46820 - }, - { - "epoch": 1.188602614544993, - "grad_norm": 0.37800467014312744, - "learning_rate": 1.2075982569700047e-05, - "loss": 0.0338, - "step": 46825 - }, - { - "epoch": 1.1887295342048483, - "grad_norm": 0.7740526795387268, - "learning_rate": 1.2075136438634346e-05, - "loss": 0.0592, - "step": 46830 - }, - { - "epoch": 1.1888564538647037, - "grad_norm": 0.36284223198890686, - "learning_rate": 1.2074290307568644e-05, - "loss": 0.0449, - "step": 46835 - }, - { - "epoch": 1.188983373524559, - "grad_norm": 0.5259076952934265, - "learning_rate": 1.2073444176502942e-05, - "loss": 0.0576, - "step": 46840 - }, - { - "epoch": 1.1891102931844142, - "grad_norm": 0.5547145009040833, - "learning_rate": 1.2072598045437237e-05, - "loss": 0.0635, - "step": 46845 - }, - { - "epoch": 1.1892372128442696, - "grad_norm": 0.3724014461040497, - "learning_rate": 1.2071751914371537e-05, - "loss": 0.0413, - "step": 46850 - }, - { - "epoch": 1.1893641325041249, - "grad_norm": 0.3426811993122101, - "learning_rate": 1.2070905783305836e-05, - "loss": 0.0308, - "step": 46855 - }, - { - "epoch": 1.18949105216398, - "grad_norm": 0.3868379592895508, - "learning_rate": 1.2070059652240134e-05, - "loss": 0.0448, - "step": 46860 - }, - { - "epoch": 1.1896179718238356, - "grad_norm": 0.4594486951828003, - "learning_rate": 1.206921352117443e-05, - "loss": 0.0431, - "step": 46865 - }, - { - "epoch": 1.1897448914836908, - "grad_norm": 0.4658834636211395, - "learning_rate": 1.2068367390108728e-05, - "loss": 0.0395, - "step": 46870 - }, - { - "epoch": 1.1898718111435462, - "grad_norm": 0.4336414933204651, - "learning_rate": 1.2067521259043026e-05, - "loss": 0.0535, - "step": 46875 - }, - { - "epoch": 1.1899987308034015, - "grad_norm": 0.4616621136665344, - "learning_rate": 1.2066675127977325e-05, - "loss": 0.061, - "step": 46880 - }, - { - "epoch": 1.1901256504632567, - "grad_norm": 0.7979484796524048, - "learning_rate": 1.2065828996911621e-05, - "loss": 0.0499, - "step": 46885 - }, - { - "epoch": 1.1902525701231121, - "grad_norm": 0.8495008945465088, - "learning_rate": 1.206498286584592e-05, - "loss": 0.0457, - "step": 46890 - }, - { - "epoch": 1.1903794897829674, - "grad_norm": 0.45764198899269104, - "learning_rate": 1.2064136734780218e-05, - "loss": 0.0624, - "step": 46895 - }, - { - "epoch": 1.1905064094428226, - "grad_norm": 0.3169819712638855, - "learning_rate": 1.2063290603714516e-05, - "loss": 0.048, - "step": 46900 - }, - { - "epoch": 1.190633329102678, - "grad_norm": 0.3732587695121765, - "learning_rate": 1.2062444472648813e-05, - "loss": 0.045, - "step": 46905 - }, - { - "epoch": 1.1907602487625333, - "grad_norm": 0.3820248246192932, - "learning_rate": 1.2061598341583112e-05, - "loss": 0.0524, - "step": 46910 - }, - { - "epoch": 1.1908871684223885, - "grad_norm": 0.59283846616745, - "learning_rate": 1.206075221051741e-05, - "loss": 0.0528, - "step": 46915 - }, - { - "epoch": 1.191014088082244, - "grad_norm": 0.5270490646362305, - "learning_rate": 1.2059906079451708e-05, - "loss": 0.0464, - "step": 46920 - }, - { - "epoch": 1.1911410077420992, - "grad_norm": 1.0709285736083984, - "learning_rate": 1.2059059948386005e-05, - "loss": 0.0447, - "step": 46925 - }, - { - "epoch": 1.1912679274019546, - "grad_norm": 0.3343476951122284, - "learning_rate": 1.2058213817320303e-05, - "loss": 0.0546, - "step": 46930 - }, - { - "epoch": 1.1913948470618099, - "grad_norm": 0.4691145718097687, - "learning_rate": 1.2057367686254602e-05, - "loss": 0.033, - "step": 46935 - }, - { - "epoch": 1.191521766721665, - "grad_norm": 0.6241704225540161, - "learning_rate": 1.20565215551889e-05, - "loss": 0.0417, - "step": 46940 - }, - { - "epoch": 1.1916486863815205, - "grad_norm": 0.9123190641403198, - "learning_rate": 1.2055675424123197e-05, - "loss": 0.0608, - "step": 46945 - }, - { - "epoch": 1.1917756060413758, - "grad_norm": 0.440929114818573, - "learning_rate": 1.2054829293057495e-05, - "loss": 0.0507, - "step": 46950 - }, - { - "epoch": 1.1919025257012312, - "grad_norm": 0.3586864471435547, - "learning_rate": 1.2053983161991794e-05, - "loss": 0.054, - "step": 46955 - }, - { - "epoch": 1.1920294453610865, - "grad_norm": 0.4635944068431854, - "learning_rate": 1.2053137030926092e-05, - "loss": 0.0399, - "step": 46960 - }, - { - "epoch": 1.1921563650209417, - "grad_norm": 0.3387432098388672, - "learning_rate": 1.205229089986039e-05, - "loss": 0.0373, - "step": 46965 - }, - { - "epoch": 1.1922832846807971, - "grad_norm": 0.7943820953369141, - "learning_rate": 1.2051444768794687e-05, - "loss": 0.0608, - "step": 46970 - }, - { - "epoch": 1.1924102043406524, - "grad_norm": 0.4512821435928345, - "learning_rate": 1.2050598637728986e-05, - "loss": 0.054, - "step": 46975 - }, - { - "epoch": 1.1925371240005076, - "grad_norm": 0.4961985945701599, - "learning_rate": 1.2049752506663284e-05, - "loss": 0.0335, - "step": 46980 - }, - { - "epoch": 1.192664043660363, - "grad_norm": 0.4190046489238739, - "learning_rate": 1.2048906375597582e-05, - "loss": 0.0405, - "step": 46985 - }, - { - "epoch": 1.1927909633202183, - "grad_norm": 0.5347869396209717, - "learning_rate": 1.204806024453188e-05, - "loss": 0.0613, - "step": 46990 - }, - { - "epoch": 1.1929178829800735, - "grad_norm": 0.541271984577179, - "learning_rate": 1.2047214113466178e-05, - "loss": 0.0405, - "step": 46995 - }, - { - "epoch": 1.193044802639929, - "grad_norm": 0.7287310361862183, - "learning_rate": 1.2046367982400476e-05, - "loss": 0.0485, - "step": 47000 - }, - { - "epoch": 1.1931717222997842, - "grad_norm": 0.5430908799171448, - "learning_rate": 1.2045521851334774e-05, - "loss": 0.0429, - "step": 47005 - }, - { - "epoch": 1.1932986419596396, - "grad_norm": 0.5396707653999329, - "learning_rate": 1.204467572026907e-05, - "loss": 0.0383, - "step": 47010 - }, - { - "epoch": 1.1934255616194949, - "grad_norm": 0.5801067352294922, - "learning_rate": 1.2043829589203368e-05, - "loss": 0.0422, - "step": 47015 - }, - { - "epoch": 1.19355248127935, - "grad_norm": 0.4656229019165039, - "learning_rate": 1.2042983458137666e-05, - "loss": 0.0515, - "step": 47020 - }, - { - "epoch": 1.1936794009392055, - "grad_norm": 0.334567666053772, - "learning_rate": 1.2042137327071966e-05, - "loss": 0.0534, - "step": 47025 - }, - { - "epoch": 1.1938063205990608, - "grad_norm": 0.5164675116539001, - "learning_rate": 1.2041291196006261e-05, - "loss": 0.0651, - "step": 47030 - }, - { - "epoch": 1.1939332402589162, - "grad_norm": 0.7277669310569763, - "learning_rate": 1.204044506494056e-05, - "loss": 0.0635, - "step": 47035 - }, - { - "epoch": 1.1940601599187715, - "grad_norm": 0.8274209499359131, - "learning_rate": 1.2039598933874858e-05, - "loss": 0.057, - "step": 47040 - }, - { - "epoch": 1.1941870795786267, - "grad_norm": 0.8020625114440918, - "learning_rate": 1.2038752802809157e-05, - "loss": 0.045, - "step": 47045 - }, - { - "epoch": 1.1943139992384821, - "grad_norm": 0.634892463684082, - "learning_rate": 1.2037906671743453e-05, - "loss": 0.0633, - "step": 47050 - }, - { - "epoch": 1.1944409188983374, - "grad_norm": 0.5035471320152283, - "learning_rate": 1.2037060540677752e-05, - "loss": 0.0441, - "step": 47055 - }, - { - "epoch": 1.1945678385581926, - "grad_norm": 0.579294741153717, - "learning_rate": 1.203621440961205e-05, - "loss": 0.0475, - "step": 47060 - }, - { - "epoch": 1.194694758218048, - "grad_norm": 0.29558151960372925, - "learning_rate": 1.2035368278546348e-05, - "loss": 0.0425, - "step": 47065 - }, - { - "epoch": 1.1948216778779033, - "grad_norm": 0.26265448331832886, - "learning_rate": 1.2034522147480645e-05, - "loss": 0.0546, - "step": 47070 - }, - { - "epoch": 1.1949485975377585, - "grad_norm": 0.5021111965179443, - "learning_rate": 1.2033676016414944e-05, - "loss": 0.057, - "step": 47075 - }, - { - "epoch": 1.195075517197614, - "grad_norm": 0.3791356086730957, - "learning_rate": 1.2032829885349242e-05, - "loss": 0.047, - "step": 47080 - }, - { - "epoch": 1.1952024368574692, - "grad_norm": 0.5976860523223877, - "learning_rate": 1.203198375428354e-05, - "loss": 0.0521, - "step": 47085 - }, - { - "epoch": 1.1953293565173246, - "grad_norm": 0.448356568813324, - "learning_rate": 1.2031137623217837e-05, - "loss": 0.0419, - "step": 47090 - }, - { - "epoch": 1.1954562761771799, - "grad_norm": 0.569506049156189, - "learning_rate": 1.2030291492152135e-05, - "loss": 0.0695, - "step": 47095 - }, - { - "epoch": 1.195583195837035, - "grad_norm": 0.47798824310302734, - "learning_rate": 1.2029445361086434e-05, - "loss": 0.0418, - "step": 47100 - }, - { - "epoch": 1.1957101154968905, - "grad_norm": 0.6610152125358582, - "learning_rate": 1.2028599230020732e-05, - "loss": 0.0453, - "step": 47105 - }, - { - "epoch": 1.1958370351567458, - "grad_norm": 0.3201946020126343, - "learning_rate": 1.2027753098955029e-05, - "loss": 0.0587, - "step": 47110 - }, - { - "epoch": 1.195963954816601, - "grad_norm": 0.575490415096283, - "learning_rate": 1.2026906967889327e-05, - "loss": 0.0546, - "step": 47115 - }, - { - "epoch": 1.1960908744764565, - "grad_norm": 0.4303225874900818, - "learning_rate": 1.2026060836823626e-05, - "loss": 0.0823, - "step": 47120 - }, - { - "epoch": 1.1962177941363117, - "grad_norm": 0.35441967844963074, - "learning_rate": 1.2025214705757924e-05, - "loss": 0.0584, - "step": 47125 - }, - { - "epoch": 1.196344713796167, - "grad_norm": 0.3849914073944092, - "learning_rate": 1.2024368574692221e-05, - "loss": 0.0438, - "step": 47130 - }, - { - "epoch": 1.1964716334560224, - "grad_norm": 1.0432770252227783, - "learning_rate": 1.202352244362652e-05, - "loss": 0.0726, - "step": 47135 - }, - { - "epoch": 1.1965985531158776, - "grad_norm": 0.5871967673301697, - "learning_rate": 1.2022676312560818e-05, - "loss": 0.0518, - "step": 47140 - }, - { - "epoch": 1.196725472775733, - "grad_norm": 0.5031200647354126, - "learning_rate": 1.2021830181495116e-05, - "loss": 0.0371, - "step": 47145 - }, - { - "epoch": 1.1968523924355883, - "grad_norm": 0.653529942035675, - "learning_rate": 1.2020984050429411e-05, - "loss": 0.0381, - "step": 47150 - }, - { - "epoch": 1.1969793120954435, - "grad_norm": 0.3674299418926239, - "learning_rate": 1.202013791936371e-05, - "loss": 0.0509, - "step": 47155 - }, - { - "epoch": 1.197106231755299, - "grad_norm": 0.5317191481590271, - "learning_rate": 1.2019291788298008e-05, - "loss": 0.0648, - "step": 47160 - }, - { - "epoch": 1.1972331514151542, - "grad_norm": 0.23457340896129608, - "learning_rate": 1.2018445657232306e-05, - "loss": 0.0464, - "step": 47165 - }, - { - "epoch": 1.1973600710750096, - "grad_norm": 0.3646846115589142, - "learning_rate": 1.2017599526166603e-05, - "loss": 0.0259, - "step": 47170 - }, - { - "epoch": 1.1974869907348649, - "grad_norm": 0.647325336933136, - "learning_rate": 1.2016753395100901e-05, - "loss": 0.0382, - "step": 47175 - }, - { - "epoch": 1.19761391039472, - "grad_norm": 0.4742783308029175, - "learning_rate": 1.20159072640352e-05, - "loss": 0.0317, - "step": 47180 - }, - { - "epoch": 1.1977408300545755, - "grad_norm": 1.5094106197357178, - "learning_rate": 1.2015061132969498e-05, - "loss": 0.0566, - "step": 47185 - }, - { - "epoch": 1.1978677497144308, - "grad_norm": 0.703137993812561, - "learning_rate": 1.2014215001903795e-05, - "loss": 0.0457, - "step": 47190 - }, - { - "epoch": 1.197994669374286, - "grad_norm": 0.4494548439979553, - "learning_rate": 1.2013368870838093e-05, - "loss": 0.0586, - "step": 47195 - }, - { - "epoch": 1.1981215890341415, - "grad_norm": 0.4626508951187134, - "learning_rate": 1.2012522739772392e-05, - "loss": 0.0698, - "step": 47200 - }, - { - "epoch": 1.1982485086939967, - "grad_norm": 0.5038554668426514, - "learning_rate": 1.201167660870669e-05, - "loss": 0.0676, - "step": 47205 - }, - { - "epoch": 1.198375428353852, - "grad_norm": 0.8120282292366028, - "learning_rate": 1.2010830477640987e-05, - "loss": 0.0647, - "step": 47210 - }, - { - "epoch": 1.1985023480137074, - "grad_norm": 0.4816630482673645, - "learning_rate": 1.2009984346575285e-05, - "loss": 0.06, - "step": 47215 - }, - { - "epoch": 1.1986292676735626, - "grad_norm": 0.47069233655929565, - "learning_rate": 1.2009138215509584e-05, - "loss": 0.0726, - "step": 47220 - }, - { - "epoch": 1.198756187333418, - "grad_norm": 1.3131024837493896, - "learning_rate": 1.2008292084443882e-05, - "loss": 0.0495, - "step": 47225 - }, - { - "epoch": 1.1988831069932733, - "grad_norm": 0.6520063877105713, - "learning_rate": 1.2007445953378179e-05, - "loss": 0.0428, - "step": 47230 - }, - { - "epoch": 1.1990100266531285, - "grad_norm": 0.1905469447374344, - "learning_rate": 1.2006599822312477e-05, - "loss": 0.0446, - "step": 47235 - }, - { - "epoch": 1.199136946312984, - "grad_norm": 0.4855395555496216, - "learning_rate": 1.2005753691246776e-05, - "loss": 0.0516, - "step": 47240 - }, - { - "epoch": 1.1992638659728392, - "grad_norm": 0.5744659900665283, - "learning_rate": 1.2004907560181074e-05, - "loss": 0.0427, - "step": 47245 - }, - { - "epoch": 1.1993907856326944, - "grad_norm": 0.5054256916046143, - "learning_rate": 1.200406142911537e-05, - "loss": 0.0507, - "step": 47250 - }, - { - "epoch": 1.1995177052925499, - "grad_norm": 0.39790093898773193, - "learning_rate": 1.2003215298049669e-05, - "loss": 0.046, - "step": 47255 - }, - { - "epoch": 1.199644624952405, - "grad_norm": 0.8061208724975586, - "learning_rate": 1.2002369166983967e-05, - "loss": 0.0415, - "step": 47260 - }, - { - "epoch": 1.1997715446122603, - "grad_norm": 0.5762823224067688, - "learning_rate": 1.2001523035918266e-05, - "loss": 0.0499, - "step": 47265 - }, - { - "epoch": 1.1998984642721158, - "grad_norm": 0.4823804497718811, - "learning_rate": 1.2000676904852563e-05, - "loss": 0.0453, - "step": 47270 - }, - { - "epoch": 1.200025383931971, - "grad_norm": 0.685077428817749, - "learning_rate": 1.1999830773786861e-05, - "loss": 0.0553, - "step": 47275 - }, - { - "epoch": 1.2001523035918265, - "grad_norm": 0.6340650320053101, - "learning_rate": 1.199898464272116e-05, - "loss": 0.072, - "step": 47280 - }, - { - "epoch": 1.2002792232516817, - "grad_norm": 0.6787698268890381, - "learning_rate": 1.1998138511655458e-05, - "loss": 0.0516, - "step": 47285 - }, - { - "epoch": 1.200406142911537, - "grad_norm": 0.41612759232521057, - "learning_rate": 1.1997292380589753e-05, - "loss": 0.058, - "step": 47290 - }, - { - "epoch": 1.2005330625713924, - "grad_norm": 0.5898323655128479, - "learning_rate": 1.1996446249524051e-05, - "loss": 0.0392, - "step": 47295 - }, - { - "epoch": 1.2006599822312476, - "grad_norm": 0.8691596984863281, - "learning_rate": 1.199560011845835e-05, - "loss": 0.0459, - "step": 47300 - }, - { - "epoch": 1.200786901891103, - "grad_norm": 0.4324185252189636, - "learning_rate": 1.1994753987392648e-05, - "loss": 0.0622, - "step": 47305 - }, - { - "epoch": 1.2009138215509583, - "grad_norm": 0.6998849511146545, - "learning_rate": 1.1993907856326945e-05, - "loss": 0.0499, - "step": 47310 - }, - { - "epoch": 1.2010407412108135, - "grad_norm": 0.6355030536651611, - "learning_rate": 1.1993061725261243e-05, - "loss": 0.0536, - "step": 47315 - }, - { - "epoch": 1.201167660870669, - "grad_norm": 0.4660939574241638, - "learning_rate": 1.1992215594195542e-05, - "loss": 0.0682, - "step": 47320 - }, - { - "epoch": 1.2012945805305242, - "grad_norm": 0.5722331404685974, - "learning_rate": 1.199136946312984e-05, - "loss": 0.036, - "step": 47325 - }, - { - "epoch": 1.2014215001903794, - "grad_norm": 0.4505646824836731, - "learning_rate": 1.1990523332064137e-05, - "loss": 0.0566, - "step": 47330 - }, - { - "epoch": 1.2015484198502349, - "grad_norm": 0.5457966923713684, - "learning_rate": 1.1989677200998435e-05, - "loss": 0.0584, - "step": 47335 - }, - { - "epoch": 1.20167533951009, - "grad_norm": 0.5918678641319275, - "learning_rate": 1.1988831069932733e-05, - "loss": 0.0467, - "step": 47340 - }, - { - "epoch": 1.2018022591699453, - "grad_norm": 0.3172938823699951, - "learning_rate": 1.1987984938867032e-05, - "loss": 0.0531, - "step": 47345 - }, - { - "epoch": 1.2019291788298008, - "grad_norm": 2.488504648208618, - "learning_rate": 1.1987138807801329e-05, - "loss": 0.0566, - "step": 47350 - }, - { - "epoch": 1.202056098489656, - "grad_norm": 0.44837483763694763, - "learning_rate": 1.1986292676735627e-05, - "loss": 0.0412, - "step": 47355 - }, - { - "epoch": 1.2021830181495115, - "grad_norm": 0.35599803924560547, - "learning_rate": 1.1985446545669925e-05, - "loss": 0.0394, - "step": 47360 - }, - { - "epoch": 1.2023099378093667, - "grad_norm": 0.4094404876232147, - "learning_rate": 1.1984600414604224e-05, - "loss": 0.0428, - "step": 47365 - }, - { - "epoch": 1.202436857469222, - "grad_norm": 0.5808391571044922, - "learning_rate": 1.198375428353852e-05, - "loss": 0.0575, - "step": 47370 - }, - { - "epoch": 1.2025637771290774, - "grad_norm": 0.4635372757911682, - "learning_rate": 1.1982908152472819e-05, - "loss": 0.0434, - "step": 47375 - }, - { - "epoch": 1.2026906967889326, - "grad_norm": 0.40716004371643066, - "learning_rate": 1.1982062021407117e-05, - "loss": 0.0475, - "step": 47380 - }, - { - "epoch": 1.202817616448788, - "grad_norm": 0.478582501411438, - "learning_rate": 1.1981215890341416e-05, - "loss": 0.045, - "step": 47385 - }, - { - "epoch": 1.2029445361086433, - "grad_norm": 1.1871063709259033, - "learning_rate": 1.1980369759275712e-05, - "loss": 0.0478, - "step": 47390 - }, - { - "epoch": 1.2030714557684985, - "grad_norm": 0.6162945032119751, - "learning_rate": 1.197952362821001e-05, - "loss": 0.0483, - "step": 47395 - }, - { - "epoch": 1.2031983754283537, - "grad_norm": 0.29496437311172485, - "learning_rate": 1.197867749714431e-05, - "loss": 0.0436, - "step": 47400 - }, - { - "epoch": 1.2033252950882092, - "grad_norm": 0.6284193992614746, - "learning_rate": 1.1977831366078608e-05, - "loss": 0.0748, - "step": 47405 - }, - { - "epoch": 1.2034522147480644, - "grad_norm": 0.3531234562397003, - "learning_rate": 1.1976985235012903e-05, - "loss": 0.0318, - "step": 47410 - }, - { - "epoch": 1.2035791344079199, - "grad_norm": 0.41714370250701904, - "learning_rate": 1.1976139103947203e-05, - "loss": 0.042, - "step": 47415 - }, - { - "epoch": 1.203706054067775, - "grad_norm": 0.47053539752960205, - "learning_rate": 1.1975292972881501e-05, - "loss": 0.052, - "step": 47420 - }, - { - "epoch": 1.2038329737276303, - "grad_norm": 1.2841711044311523, - "learning_rate": 1.19744468418158e-05, - "loss": 0.0663, - "step": 47425 - }, - { - "epoch": 1.2039598933874858, - "grad_norm": 0.3862781524658203, - "learning_rate": 1.1973600710750095e-05, - "loss": 0.057, - "step": 47430 - }, - { - "epoch": 1.204086813047341, - "grad_norm": 0.7344119548797607, - "learning_rate": 1.1972754579684393e-05, - "loss": 0.0535, - "step": 47435 - }, - { - "epoch": 1.2042137327071964, - "grad_norm": 0.4764394462108612, - "learning_rate": 1.1971908448618691e-05, - "loss": 0.0421, - "step": 47440 - }, - { - "epoch": 1.2043406523670517, - "grad_norm": 0.425045371055603, - "learning_rate": 1.197106231755299e-05, - "loss": 0.0415, - "step": 47445 - }, - { - "epoch": 1.204467572026907, - "grad_norm": 0.3669269382953644, - "learning_rate": 1.1970216186487286e-05, - "loss": 0.062, - "step": 47450 - }, - { - "epoch": 1.2045944916867624, - "grad_norm": 0.573101282119751, - "learning_rate": 1.1969370055421585e-05, - "loss": 0.0549, - "step": 47455 - }, - { - "epoch": 1.2047214113466176, - "grad_norm": 0.7194697260856628, - "learning_rate": 1.1968523924355883e-05, - "loss": 0.043, - "step": 47460 - }, - { - "epoch": 1.2048483310064728, - "grad_norm": 0.5318601727485657, - "learning_rate": 1.1967677793290182e-05, - "loss": 0.0501, - "step": 47465 - }, - { - "epoch": 1.2049752506663283, - "grad_norm": 0.28540101647377014, - "learning_rate": 1.1966831662224478e-05, - "loss": 0.0445, - "step": 47470 - }, - { - "epoch": 1.2051021703261835, - "grad_norm": 0.567482054233551, - "learning_rate": 1.1965985531158777e-05, - "loss": 0.0556, - "step": 47475 - }, - { - "epoch": 1.2052290899860387, - "grad_norm": 0.8097561001777649, - "learning_rate": 1.1965139400093075e-05, - "loss": 0.0398, - "step": 47480 - }, - { - "epoch": 1.2053560096458942, - "grad_norm": 0.5292410850524902, - "learning_rate": 1.1964293269027374e-05, - "loss": 0.0388, - "step": 47485 - }, - { - "epoch": 1.2054829293057494, - "grad_norm": 0.3682163655757904, - "learning_rate": 1.1963447137961672e-05, - "loss": 0.0606, - "step": 47490 - }, - { - "epoch": 1.2056098489656049, - "grad_norm": 0.3315012753009796, - "learning_rate": 1.1962601006895969e-05, - "loss": 0.0412, - "step": 47495 - }, - { - "epoch": 1.20573676862546, - "grad_norm": 0.4615030288696289, - "learning_rate": 1.1961754875830267e-05, - "loss": 0.0472, - "step": 47500 - }, - { - "epoch": 1.2058636882853153, - "grad_norm": 0.3342849016189575, - "learning_rate": 1.1960908744764565e-05, - "loss": 0.0386, - "step": 47505 - }, - { - "epoch": 1.2059906079451708, - "grad_norm": 0.40003257989883423, - "learning_rate": 1.1960062613698864e-05, - "loss": 0.0504, - "step": 47510 - }, - { - "epoch": 1.206117527605026, - "grad_norm": 0.36771196126937866, - "learning_rate": 1.195921648263316e-05, - "loss": 0.0504, - "step": 47515 - }, - { - "epoch": 1.2062444472648814, - "grad_norm": 0.34656810760498047, - "learning_rate": 1.1958370351567459e-05, - "loss": 0.0679, - "step": 47520 - }, - { - "epoch": 1.2063713669247367, - "grad_norm": 0.4208267629146576, - "learning_rate": 1.1957524220501757e-05, - "loss": 0.0365, - "step": 47525 - }, - { - "epoch": 1.206498286584592, - "grad_norm": 0.42899075150489807, - "learning_rate": 1.1956678089436056e-05, - "loss": 0.0515, - "step": 47530 - }, - { - "epoch": 1.2066252062444474, - "grad_norm": 1.027299165725708, - "learning_rate": 1.1955831958370352e-05, - "loss": 0.0544, - "step": 47535 - }, - { - "epoch": 1.2067521259043026, - "grad_norm": 1.133080244064331, - "learning_rate": 1.1954985827304651e-05, - "loss": 0.0587, - "step": 47540 - }, - { - "epoch": 1.2068790455641578, - "grad_norm": 0.46309590339660645, - "learning_rate": 1.195413969623895e-05, - "loss": 0.0644, - "step": 47545 - }, - { - "epoch": 1.2070059652240133, - "grad_norm": 0.5490356683731079, - "learning_rate": 1.1953293565173248e-05, - "loss": 0.0596, - "step": 47550 - }, - { - "epoch": 1.2071328848838685, - "grad_norm": 0.375426322221756, - "learning_rate": 1.1952447434107544e-05, - "loss": 0.0506, - "step": 47555 - }, - { - "epoch": 1.2072598045437237, - "grad_norm": 0.8154984712600708, - "learning_rate": 1.1951601303041843e-05, - "loss": 0.0406, - "step": 47560 - }, - { - "epoch": 1.2073867242035792, - "grad_norm": 0.40651753544807434, - "learning_rate": 1.1950755171976141e-05, - "loss": 0.0447, - "step": 47565 - }, - { - "epoch": 1.2075136438634344, - "grad_norm": 0.8356378078460693, - "learning_rate": 1.194990904091044e-05, - "loss": 0.0608, - "step": 47570 - }, - { - "epoch": 1.2076405635232899, - "grad_norm": 0.3222847878932953, - "learning_rate": 1.1949062909844735e-05, - "loss": 0.0485, - "step": 47575 - }, - { - "epoch": 1.207767483183145, - "grad_norm": 1.0584497451782227, - "learning_rate": 1.1948216778779033e-05, - "loss": 0.0618, - "step": 47580 - }, - { - "epoch": 1.2078944028430003, - "grad_norm": 0.35221683979034424, - "learning_rate": 1.1947370647713331e-05, - "loss": 0.0495, - "step": 47585 - }, - { - "epoch": 1.2080213225028558, - "grad_norm": 0.4601605236530304, - "learning_rate": 1.1946524516647632e-05, - "loss": 0.0362, - "step": 47590 - }, - { - "epoch": 1.208148242162711, - "grad_norm": 0.4430946111679077, - "learning_rate": 1.1945678385581927e-05, - "loss": 0.0539, - "step": 47595 - }, - { - "epoch": 1.2082751618225662, - "grad_norm": 0.6887397170066833, - "learning_rate": 1.1944832254516225e-05, - "loss": 0.0665, - "step": 47600 - }, - { - "epoch": 1.2084020814824217, - "grad_norm": 0.5945390462875366, - "learning_rate": 1.1943986123450523e-05, - "loss": 0.0497, - "step": 47605 - }, - { - "epoch": 1.208529001142277, - "grad_norm": 0.33281856775283813, - "learning_rate": 1.1943139992384822e-05, - "loss": 0.0498, - "step": 47610 - }, - { - "epoch": 1.2086559208021321, - "grad_norm": 0.6176923513412476, - "learning_rate": 1.1942293861319118e-05, - "loss": 0.0575, - "step": 47615 - }, - { - "epoch": 1.2087828404619876, - "grad_norm": 0.4175015091896057, - "learning_rate": 1.1941447730253417e-05, - "loss": 0.0594, - "step": 47620 - }, - { - "epoch": 1.2089097601218428, - "grad_norm": 1.287824034690857, - "learning_rate": 1.1940601599187715e-05, - "loss": 0.0622, - "step": 47625 - }, - { - "epoch": 1.2090366797816983, - "grad_norm": 0.6162910461425781, - "learning_rate": 1.1939755468122014e-05, - "loss": 0.0618, - "step": 47630 - }, - { - "epoch": 1.2091635994415535, - "grad_norm": 0.5616070628166199, - "learning_rate": 1.193890933705631e-05, - "loss": 0.0507, - "step": 47635 - }, - { - "epoch": 1.2092905191014087, - "grad_norm": 0.5687158107757568, - "learning_rate": 1.1938063205990609e-05, - "loss": 0.0614, - "step": 47640 - }, - { - "epoch": 1.2094174387612642, - "grad_norm": 0.45072081685066223, - "learning_rate": 1.1937217074924907e-05, - "loss": 0.0435, - "step": 47645 - }, - { - "epoch": 1.2095443584211194, - "grad_norm": 0.5256980657577515, - "learning_rate": 1.1936370943859206e-05, - "loss": 0.0438, - "step": 47650 - }, - { - "epoch": 1.2096712780809749, - "grad_norm": 0.2803470492362976, - "learning_rate": 1.1935524812793502e-05, - "loss": 0.0466, - "step": 47655 - }, - { - "epoch": 1.20979819774083, - "grad_norm": 2.513427972793579, - "learning_rate": 1.19346786817278e-05, - "loss": 0.043, - "step": 47660 - }, - { - "epoch": 1.2099251174006853, - "grad_norm": 0.5604929327964783, - "learning_rate": 1.1933832550662099e-05, - "loss": 0.0496, - "step": 47665 - }, - { - "epoch": 1.2100520370605408, - "grad_norm": 0.44559329748153687, - "learning_rate": 1.1932986419596397e-05, - "loss": 0.0501, - "step": 47670 - }, - { - "epoch": 1.210178956720396, - "grad_norm": 1.9487465620040894, - "learning_rate": 1.1932140288530694e-05, - "loss": 0.074, - "step": 47675 - }, - { - "epoch": 1.2103058763802512, - "grad_norm": 0.4831107556819916, - "learning_rate": 1.1931294157464993e-05, - "loss": 0.0457, - "step": 47680 - }, - { - "epoch": 1.2104327960401067, - "grad_norm": 0.6473428010940552, - "learning_rate": 1.1930448026399291e-05, - "loss": 0.0518, - "step": 47685 - }, - { - "epoch": 1.210559715699962, - "grad_norm": 0.3753412365913391, - "learning_rate": 1.192960189533359e-05, - "loss": 0.0712, - "step": 47690 - }, - { - "epoch": 1.2106866353598171, - "grad_norm": 0.3820205330848694, - "learning_rate": 1.1928755764267886e-05, - "loss": 0.0409, - "step": 47695 - }, - { - "epoch": 1.2108135550196726, - "grad_norm": 0.3531240224838257, - "learning_rate": 1.1927909633202185e-05, - "loss": 0.0312, - "step": 47700 - }, - { - "epoch": 1.2109404746795278, - "grad_norm": 0.6117851138114929, - "learning_rate": 1.1927063502136483e-05, - "loss": 0.0515, - "step": 47705 - }, - { - "epoch": 1.2110673943393833, - "grad_norm": 0.4322781562805176, - "learning_rate": 1.1926217371070781e-05, - "loss": 0.0498, - "step": 47710 - }, - { - "epoch": 1.2111943139992385, - "grad_norm": 0.7937289476394653, - "learning_rate": 1.1925371240005076e-05, - "loss": 0.0666, - "step": 47715 - }, - { - "epoch": 1.2113212336590937, - "grad_norm": 0.49008092284202576, - "learning_rate": 1.1924525108939375e-05, - "loss": 0.0476, - "step": 47720 - }, - { - "epoch": 1.2114481533189492, - "grad_norm": 0.5920543670654297, - "learning_rate": 1.1923678977873673e-05, - "loss": 0.0552, - "step": 47725 - }, - { - "epoch": 1.2115750729788044, - "grad_norm": 0.3105852007865906, - "learning_rate": 1.1922832846807972e-05, - "loss": 0.0431, - "step": 47730 - }, - { - "epoch": 1.2117019926386599, - "grad_norm": 0.6602287292480469, - "learning_rate": 1.1921986715742268e-05, - "loss": 0.036, - "step": 47735 - }, - { - "epoch": 1.211828912298515, - "grad_norm": 0.401636004447937, - "learning_rate": 1.1921140584676567e-05, - "loss": 0.0377, - "step": 47740 - }, - { - "epoch": 1.2119558319583703, - "grad_norm": 0.4863969385623932, - "learning_rate": 1.1920294453610865e-05, - "loss": 0.0432, - "step": 47745 - }, - { - "epoch": 1.2120827516182255, - "grad_norm": 0.6260619759559631, - "learning_rate": 1.1919448322545163e-05, - "loss": 0.0501, - "step": 47750 - }, - { - "epoch": 1.212209671278081, - "grad_norm": 0.46534374356269836, - "learning_rate": 1.191860219147946e-05, - "loss": 0.0448, - "step": 47755 - }, - { - "epoch": 1.2123365909379362, - "grad_norm": 0.7587950825691223, - "learning_rate": 1.1917756060413759e-05, - "loss": 0.049, - "step": 47760 - }, - { - "epoch": 1.2124635105977917, - "grad_norm": 0.6223604679107666, - "learning_rate": 1.1916909929348057e-05, - "loss": 0.0362, - "step": 47765 - }, - { - "epoch": 1.212590430257647, - "grad_norm": 0.699390172958374, - "learning_rate": 1.1916063798282355e-05, - "loss": 0.082, - "step": 47770 - }, - { - "epoch": 1.2127173499175021, - "grad_norm": 0.41752728819847107, - "learning_rate": 1.1915217667216652e-05, - "loss": 0.0335, - "step": 47775 - }, - { - "epoch": 1.2128442695773576, - "grad_norm": 0.6131992340087891, - "learning_rate": 1.191437153615095e-05, - "loss": 0.0578, - "step": 47780 - }, - { - "epoch": 1.2129711892372128, - "grad_norm": 0.8584136366844177, - "learning_rate": 1.1913525405085249e-05, - "loss": 0.0577, - "step": 47785 - }, - { - "epoch": 1.2130981088970683, - "grad_norm": 0.3749255836009979, - "learning_rate": 1.1912679274019547e-05, - "loss": 0.0345, - "step": 47790 - }, - { - "epoch": 1.2132250285569235, - "grad_norm": 0.6018322706222534, - "learning_rate": 1.1911833142953844e-05, - "loss": 0.0449, - "step": 47795 - }, - { - "epoch": 1.2133519482167787, - "grad_norm": 0.7348560094833374, - "learning_rate": 1.1910987011888142e-05, - "loss": 0.0554, - "step": 47800 - }, - { - "epoch": 1.2134788678766342, - "grad_norm": 0.3346385359764099, - "learning_rate": 1.191014088082244e-05, - "loss": 0.061, - "step": 47805 - }, - { - "epoch": 1.2136057875364894, - "grad_norm": 0.4131503701210022, - "learning_rate": 1.190929474975674e-05, - "loss": 0.0403, - "step": 47810 - }, - { - "epoch": 1.2137327071963446, - "grad_norm": 0.2861380875110626, - "learning_rate": 1.1908448618691036e-05, - "loss": 0.0569, - "step": 47815 - }, - { - "epoch": 1.2138596268562, - "grad_norm": 0.3742963671684265, - "learning_rate": 1.1907602487625334e-05, - "loss": 0.0432, - "step": 47820 - }, - { - "epoch": 1.2139865465160553, - "grad_norm": 0.9287290573120117, - "learning_rate": 1.1906756356559633e-05, - "loss": 0.051, - "step": 47825 - }, - { - "epoch": 1.2141134661759105, - "grad_norm": 1.1919538974761963, - "learning_rate": 1.1905910225493931e-05, - "loss": 0.0472, - "step": 47830 - }, - { - "epoch": 1.214240385835766, - "grad_norm": 0.555343508720398, - "learning_rate": 1.1905064094428228e-05, - "loss": 0.0425, - "step": 47835 - }, - { - "epoch": 1.2143673054956212, - "grad_norm": 0.3687288463115692, - "learning_rate": 1.1904217963362526e-05, - "loss": 0.0274, - "step": 47840 - }, - { - "epoch": 1.2144942251554767, - "grad_norm": 1.837007999420166, - "learning_rate": 1.1903371832296825e-05, - "loss": 0.0569, - "step": 47845 - }, - { - "epoch": 1.214621144815332, - "grad_norm": 0.46443799138069153, - "learning_rate": 1.1902525701231123e-05, - "loss": 0.0536, - "step": 47850 - }, - { - "epoch": 1.2147480644751871, - "grad_norm": 0.8378704786300659, - "learning_rate": 1.1901679570165418e-05, - "loss": 0.0391, - "step": 47855 - }, - { - "epoch": 1.2148749841350426, - "grad_norm": 0.5092437863349915, - "learning_rate": 1.1900833439099716e-05, - "loss": 0.0506, - "step": 47860 - }, - { - "epoch": 1.2150019037948978, - "grad_norm": 0.3941427767276764, - "learning_rate": 1.1899987308034015e-05, - "loss": 0.0532, - "step": 47865 - }, - { - "epoch": 1.2151288234547533, - "grad_norm": 0.655997097492218, - "learning_rate": 1.1899141176968313e-05, - "loss": 0.0555, - "step": 47870 - }, - { - "epoch": 1.2152557431146085, - "grad_norm": 0.5069984197616577, - "learning_rate": 1.189829504590261e-05, - "loss": 0.0492, - "step": 47875 - }, - { - "epoch": 1.2153826627744637, - "grad_norm": 0.3666694462299347, - "learning_rate": 1.1897448914836908e-05, - "loss": 0.0453, - "step": 47880 - }, - { - "epoch": 1.2155095824343192, - "grad_norm": 0.28162577748298645, - "learning_rate": 1.1896602783771207e-05, - "loss": 0.0411, - "step": 47885 - }, - { - "epoch": 1.2156365020941744, - "grad_norm": 0.6651009917259216, - "learning_rate": 1.1895756652705505e-05, - "loss": 0.0439, - "step": 47890 - }, - { - "epoch": 1.2157634217540296, - "grad_norm": 0.36290109157562256, - "learning_rate": 1.1894910521639802e-05, - "loss": 0.0476, - "step": 47895 - }, - { - "epoch": 1.215890341413885, - "grad_norm": 0.40675750374794006, - "learning_rate": 1.18940643905741e-05, - "loss": 0.0448, - "step": 47900 - }, - { - "epoch": 1.2160172610737403, - "grad_norm": 0.691447377204895, - "learning_rate": 1.1893218259508399e-05, - "loss": 0.0574, - "step": 47905 - }, - { - "epoch": 1.2161441807335955, - "grad_norm": 0.6962319016456604, - "learning_rate": 1.1892372128442697e-05, - "loss": 0.0482, - "step": 47910 - }, - { - "epoch": 1.216271100393451, - "grad_norm": 0.45008185505867004, - "learning_rate": 1.1891525997376994e-05, - "loss": 0.0506, - "step": 47915 - }, - { - "epoch": 1.2163980200533062, - "grad_norm": 0.5712681412696838, - "learning_rate": 1.1890679866311292e-05, - "loss": 0.0426, - "step": 47920 - }, - { - "epoch": 1.2165249397131617, - "grad_norm": 0.3680890202522278, - "learning_rate": 1.188983373524559e-05, - "loss": 0.0606, - "step": 47925 - }, - { - "epoch": 1.216651859373017, - "grad_norm": 0.3379134237766266, - "learning_rate": 1.1888987604179889e-05, - "loss": 0.0402, - "step": 47930 - }, - { - "epoch": 1.2167787790328721, - "grad_norm": 0.5836665034294128, - "learning_rate": 1.1888141473114186e-05, - "loss": 0.043, - "step": 47935 - }, - { - "epoch": 1.2169056986927276, - "grad_norm": 0.5189544558525085, - "learning_rate": 1.1887295342048484e-05, - "loss": 0.0536, - "step": 47940 - }, - { - "epoch": 1.2170326183525828, - "grad_norm": 0.2991081476211548, - "learning_rate": 1.1886449210982782e-05, - "loss": 0.0541, - "step": 47945 - }, - { - "epoch": 1.217159538012438, - "grad_norm": 0.4432829022407532, - "learning_rate": 1.1885603079917081e-05, - "loss": 0.0584, - "step": 47950 - }, - { - "epoch": 1.2172864576722935, - "grad_norm": 0.4524320065975189, - "learning_rate": 1.1884756948851378e-05, - "loss": 0.0533, - "step": 47955 - }, - { - "epoch": 1.2174133773321487, - "grad_norm": 0.49395841360092163, - "learning_rate": 1.1883910817785676e-05, - "loss": 0.0588, - "step": 47960 - }, - { - "epoch": 1.217540296992004, - "grad_norm": 0.4318881034851074, - "learning_rate": 1.1883064686719974e-05, - "loss": 0.0496, - "step": 47965 - }, - { - "epoch": 1.2176672166518594, - "grad_norm": 0.7145828604698181, - "learning_rate": 1.1882218555654273e-05, - "loss": 0.0622, - "step": 47970 - }, - { - "epoch": 1.2177941363117146, - "grad_norm": 0.4024706184864044, - "learning_rate": 1.1881372424588568e-05, - "loss": 0.045, - "step": 47975 - }, - { - "epoch": 1.21792105597157, - "grad_norm": 0.37237033247947693, - "learning_rate": 1.1880526293522868e-05, - "loss": 0.0445, - "step": 47980 - }, - { - "epoch": 1.2180479756314253, - "grad_norm": 0.3930632174015045, - "learning_rate": 1.1879680162457166e-05, - "loss": 0.0484, - "step": 47985 - }, - { - "epoch": 1.2181748952912805, - "grad_norm": 0.5728297233581543, - "learning_rate": 1.1878834031391465e-05, - "loss": 0.0582, - "step": 47990 - }, - { - "epoch": 1.218301814951136, - "grad_norm": 0.6818700432777405, - "learning_rate": 1.187798790032576e-05, - "loss": 0.0502, - "step": 47995 - }, - { - "epoch": 1.2184287346109912, - "grad_norm": 0.43697500228881836, - "learning_rate": 1.1877141769260058e-05, - "loss": 0.0362, - "step": 48000 - }, - { - "epoch": 1.2185556542708467, - "grad_norm": 0.6180932521820068, - "learning_rate": 1.1876295638194357e-05, - "loss": 0.068, - "step": 48005 - }, - { - "epoch": 1.218682573930702, - "grad_norm": 0.43118447065353394, - "learning_rate": 1.1875449507128655e-05, - "loss": 0.0359, - "step": 48010 - }, - { - "epoch": 1.2188094935905571, - "grad_norm": 0.5021272897720337, - "learning_rate": 1.1874603376062955e-05, - "loss": 0.032, - "step": 48015 - }, - { - "epoch": 1.2189364132504126, - "grad_norm": 0.341580867767334, - "learning_rate": 1.187375724499725e-05, - "loss": 0.0573, - "step": 48020 - }, - { - "epoch": 1.2190633329102678, - "grad_norm": 0.3890722095966339, - "learning_rate": 1.1872911113931548e-05, - "loss": 0.0522, - "step": 48025 - }, - { - "epoch": 1.219190252570123, - "grad_norm": 0.48187339305877686, - "learning_rate": 1.1872064982865847e-05, - "loss": 0.0567, - "step": 48030 - }, - { - "epoch": 1.2193171722299785, - "grad_norm": 0.5387169122695923, - "learning_rate": 1.1871218851800145e-05, - "loss": 0.0686, - "step": 48035 - }, - { - "epoch": 1.2194440918898337, - "grad_norm": 1.374630093574524, - "learning_rate": 1.1870372720734442e-05, - "loss": 0.0482, - "step": 48040 - }, - { - "epoch": 1.219571011549689, - "grad_norm": 0.40712013840675354, - "learning_rate": 1.186952658966874e-05, - "loss": 0.0558, - "step": 48045 - }, - { - "epoch": 1.2196979312095444, - "grad_norm": 0.31175002455711365, - "learning_rate": 1.1868680458603039e-05, - "loss": 0.0392, - "step": 48050 - }, - { - "epoch": 1.2198248508693996, - "grad_norm": 0.3998873829841614, - "learning_rate": 1.1867834327537337e-05, - "loss": 0.0574, - "step": 48055 - }, - { - "epoch": 1.219951770529255, - "grad_norm": 0.4143379032611847, - "learning_rate": 1.1866988196471634e-05, - "loss": 0.0677, - "step": 48060 - }, - { - "epoch": 1.2200786901891103, - "grad_norm": 0.482231080532074, - "learning_rate": 1.1866142065405932e-05, - "loss": 0.0407, - "step": 48065 - }, - { - "epoch": 1.2202056098489655, - "grad_norm": 0.4256278872489929, - "learning_rate": 1.186529593434023e-05, - "loss": 0.0381, - "step": 48070 - }, - { - "epoch": 1.220332529508821, - "grad_norm": 0.8753464818000793, - "learning_rate": 1.1864449803274529e-05, - "loss": 0.0616, - "step": 48075 - }, - { - "epoch": 1.2204594491686762, - "grad_norm": 0.6562080383300781, - "learning_rate": 1.1863603672208826e-05, - "loss": 0.0474, - "step": 48080 - }, - { - "epoch": 1.2205863688285314, - "grad_norm": 0.6746869683265686, - "learning_rate": 1.1862757541143124e-05, - "loss": 0.062, - "step": 48085 - }, - { - "epoch": 1.220713288488387, - "grad_norm": 0.32743507623672485, - "learning_rate": 1.1861911410077423e-05, - "loss": 0.0386, - "step": 48090 - }, - { - "epoch": 1.2208402081482421, - "grad_norm": 0.3912975490093231, - "learning_rate": 1.1861065279011721e-05, - "loss": 0.0424, - "step": 48095 - }, - { - "epoch": 1.2209671278080974, - "grad_norm": 0.5421993136405945, - "learning_rate": 1.1860219147946018e-05, - "loss": 0.05, - "step": 48100 - }, - { - "epoch": 1.2210940474679528, - "grad_norm": 0.4203738570213318, - "learning_rate": 1.1859373016880316e-05, - "loss": 0.0525, - "step": 48105 - }, - { - "epoch": 1.221220967127808, - "grad_norm": 0.4044235646724701, - "learning_rate": 1.1858526885814615e-05, - "loss": 0.055, - "step": 48110 - }, - { - "epoch": 1.2213478867876635, - "grad_norm": 0.3478151261806488, - "learning_rate": 1.1857680754748913e-05, - "loss": 0.0528, - "step": 48115 - }, - { - "epoch": 1.2214748064475187, - "grad_norm": 0.5759983658790588, - "learning_rate": 1.185683462368321e-05, - "loss": 0.0503, - "step": 48120 - }, - { - "epoch": 1.221601726107374, - "grad_norm": 0.7162481546401978, - "learning_rate": 1.1855988492617508e-05, - "loss": 0.056, - "step": 48125 - }, - { - "epoch": 1.2217286457672294, - "grad_norm": 0.8707003593444824, - "learning_rate": 1.1855142361551806e-05, - "loss": 0.0443, - "step": 48130 - }, - { - "epoch": 1.2218555654270846, - "grad_norm": 0.32720276713371277, - "learning_rate": 1.1854296230486105e-05, - "loss": 0.036, - "step": 48135 - }, - { - "epoch": 1.22198248508694, - "grad_norm": 0.5202739238739014, - "learning_rate": 1.18534500994204e-05, - "loss": 0.0543, - "step": 48140 - }, - { - "epoch": 1.2221094047467953, - "grad_norm": 0.3443850874900818, - "learning_rate": 1.1852603968354698e-05, - "loss": 0.0409, - "step": 48145 - }, - { - "epoch": 1.2222363244066505, - "grad_norm": 0.532314658164978, - "learning_rate": 1.1851757837288997e-05, - "loss": 0.0505, - "step": 48150 - }, - { - "epoch": 1.222363244066506, - "grad_norm": 0.4341469407081604, - "learning_rate": 1.1850911706223295e-05, - "loss": 0.0597, - "step": 48155 - }, - { - "epoch": 1.2224901637263612, - "grad_norm": 0.4148404598236084, - "learning_rate": 1.1850065575157592e-05, - "loss": 0.0483, - "step": 48160 - }, - { - "epoch": 1.2226170833862164, - "grad_norm": 0.3770654499530792, - "learning_rate": 1.184921944409189e-05, - "loss": 0.0529, - "step": 48165 - }, - { - "epoch": 1.222744003046072, - "grad_norm": 0.40711233019828796, - "learning_rate": 1.1848373313026189e-05, - "loss": 0.0544, - "step": 48170 - }, - { - "epoch": 1.2228709227059271, - "grad_norm": 0.34192317724227905, - "learning_rate": 1.1847527181960487e-05, - "loss": 0.0503, - "step": 48175 - }, - { - "epoch": 1.2229978423657824, - "grad_norm": 0.4521634578704834, - "learning_rate": 1.1846681050894784e-05, - "loss": 0.031, - "step": 48180 - }, - { - "epoch": 1.2231247620256378, - "grad_norm": 0.6709709167480469, - "learning_rate": 1.1845834919829082e-05, - "loss": 0.0385, - "step": 48185 - }, - { - "epoch": 1.223251681685493, - "grad_norm": 0.4154309034347534, - "learning_rate": 1.184498878876338e-05, - "loss": 0.0689, - "step": 48190 - }, - { - "epoch": 1.2233786013453485, - "grad_norm": 0.3707118332386017, - "learning_rate": 1.1844142657697679e-05, - "loss": 0.0493, - "step": 48195 - }, - { - "epoch": 1.2235055210052037, - "grad_norm": 0.39239048957824707, - "learning_rate": 1.1843296526631976e-05, - "loss": 0.0332, - "step": 48200 - }, - { - "epoch": 1.223632440665059, - "grad_norm": 0.48555395007133484, - "learning_rate": 1.1842450395566274e-05, - "loss": 0.037, - "step": 48205 - }, - { - "epoch": 1.2237593603249144, - "grad_norm": 0.4282156229019165, - "learning_rate": 1.1841604264500572e-05, - "loss": 0.0563, - "step": 48210 - }, - { - "epoch": 1.2238862799847696, - "grad_norm": 0.6545154452323914, - "learning_rate": 1.184075813343487e-05, - "loss": 0.0585, - "step": 48215 - }, - { - "epoch": 1.224013199644625, - "grad_norm": 0.4745437800884247, - "learning_rate": 1.1839912002369168e-05, - "loss": 0.0569, - "step": 48220 - }, - { - "epoch": 1.2241401193044803, - "grad_norm": 0.3152703642845154, - "learning_rate": 1.1839065871303466e-05, - "loss": 0.0594, - "step": 48225 - }, - { - "epoch": 1.2242670389643355, - "grad_norm": 0.43473920226097107, - "learning_rate": 1.1838219740237764e-05, - "loss": 0.0425, - "step": 48230 - }, - { - "epoch": 1.224393958624191, - "grad_norm": 0.5615704655647278, - "learning_rate": 1.1837373609172063e-05, - "loss": 0.0439, - "step": 48235 - }, - { - "epoch": 1.2245208782840462, - "grad_norm": 0.49858376383781433, - "learning_rate": 1.183652747810636e-05, - "loss": 0.0524, - "step": 48240 - }, - { - "epoch": 1.2246477979439014, - "grad_norm": 0.6467445492744446, - "learning_rate": 1.1835681347040658e-05, - "loss": 0.0541, - "step": 48245 - }, - { - "epoch": 1.224774717603757, - "grad_norm": 0.44543665647506714, - "learning_rate": 1.1834835215974956e-05, - "loss": 0.0544, - "step": 48250 - }, - { - "epoch": 1.2249016372636121, - "grad_norm": 0.7828807830810547, - "learning_rate": 1.1833989084909255e-05, - "loss": 0.0558, - "step": 48255 - }, - { - "epoch": 1.2250285569234673, - "grad_norm": 0.8498260378837585, - "learning_rate": 1.1833142953843551e-05, - "loss": 0.0653, - "step": 48260 - }, - { - "epoch": 1.2251554765833228, - "grad_norm": 0.409463495016098, - "learning_rate": 1.183229682277785e-05, - "loss": 0.0687, - "step": 48265 - }, - { - "epoch": 1.225282396243178, - "grad_norm": 1.5601547956466675, - "learning_rate": 1.1831450691712148e-05, - "loss": 0.0591, - "step": 48270 - }, - { - "epoch": 1.2254093159030335, - "grad_norm": 0.46836692094802856, - "learning_rate": 1.1830604560646447e-05, - "loss": 0.0517, - "step": 48275 - }, - { - "epoch": 1.2255362355628887, - "grad_norm": 0.5003067851066589, - "learning_rate": 1.1829758429580742e-05, - "loss": 0.0426, - "step": 48280 - }, - { - "epoch": 1.225663155222744, - "grad_norm": 0.5646367073059082, - "learning_rate": 1.182891229851504e-05, - "loss": 0.0582, - "step": 48285 - }, - { - "epoch": 1.2257900748825994, - "grad_norm": 0.5930145978927612, - "learning_rate": 1.1828066167449338e-05, - "loss": 0.0463, - "step": 48290 - }, - { - "epoch": 1.2259169945424546, - "grad_norm": 0.39671626687049866, - "learning_rate": 1.1827220036383637e-05, - "loss": 0.044, - "step": 48295 - }, - { - "epoch": 1.2260439142023098, - "grad_norm": 0.45123833417892456, - "learning_rate": 1.1826373905317933e-05, - "loss": 0.044, - "step": 48300 - }, - { - "epoch": 1.2261708338621653, - "grad_norm": 0.4251686632633209, - "learning_rate": 1.1825527774252232e-05, - "loss": 0.0666, - "step": 48305 - }, - { - "epoch": 1.2262977535220205, - "grad_norm": 0.6554283499717712, - "learning_rate": 1.182468164318653e-05, - "loss": 0.0334, - "step": 48310 - }, - { - "epoch": 1.2264246731818758, - "grad_norm": 1.5035641193389893, - "learning_rate": 1.1823835512120829e-05, - "loss": 0.0485, - "step": 48315 - }, - { - "epoch": 1.2265515928417312, - "grad_norm": 0.5280198454856873, - "learning_rate": 1.1822989381055125e-05, - "loss": 0.0552, - "step": 48320 - }, - { - "epoch": 1.2266785125015864, - "grad_norm": 0.48902636766433716, - "learning_rate": 1.1822143249989424e-05, - "loss": 0.0577, - "step": 48325 - }, - { - "epoch": 1.2268054321614419, - "grad_norm": 0.6264007687568665, - "learning_rate": 1.1821297118923722e-05, - "loss": 0.0538, - "step": 48330 - }, - { - "epoch": 1.2269323518212971, - "grad_norm": 0.454319030046463, - "learning_rate": 1.182045098785802e-05, - "loss": 0.0541, - "step": 48335 - }, - { - "epoch": 1.2270592714811523, - "grad_norm": 0.5194593667984009, - "learning_rate": 1.1819604856792317e-05, - "loss": 0.0638, - "step": 48340 - }, - { - "epoch": 1.2271861911410078, - "grad_norm": 0.8644242286682129, - "learning_rate": 1.1818758725726616e-05, - "loss": 0.0455, - "step": 48345 - }, - { - "epoch": 1.227313110800863, - "grad_norm": 0.5458242297172546, - "learning_rate": 1.1817912594660914e-05, - "loss": 0.0537, - "step": 48350 - }, - { - "epoch": 1.2274400304607185, - "grad_norm": 0.48423853516578674, - "learning_rate": 1.1817066463595212e-05, - "loss": 0.0445, - "step": 48355 - }, - { - "epoch": 1.2275669501205737, - "grad_norm": 0.43780434131622314, - "learning_rate": 1.181622033252951e-05, - "loss": 0.0618, - "step": 48360 - }, - { - "epoch": 1.227693869780429, - "grad_norm": 0.37760844826698303, - "learning_rate": 1.1815374201463808e-05, - "loss": 0.0378, - "step": 48365 - }, - { - "epoch": 1.2278207894402844, - "grad_norm": 0.41244250535964966, - "learning_rate": 1.1814528070398106e-05, - "loss": 0.0419, - "step": 48370 - }, - { - "epoch": 1.2279477091001396, - "grad_norm": 0.703586220741272, - "learning_rate": 1.1813681939332404e-05, - "loss": 0.0547, - "step": 48375 - }, - { - "epoch": 1.2280746287599948, - "grad_norm": 0.45771220326423645, - "learning_rate": 1.1812835808266701e-05, - "loss": 0.047, - "step": 48380 - }, - { - "epoch": 1.2282015484198503, - "grad_norm": 0.3991802930831909, - "learning_rate": 1.1811989677201e-05, - "loss": 0.0781, - "step": 48385 - }, - { - "epoch": 1.2283284680797055, - "grad_norm": 0.3536953032016754, - "learning_rate": 1.1811143546135298e-05, - "loss": 0.0455, - "step": 48390 - }, - { - "epoch": 1.2284553877395608, - "grad_norm": 0.5885934233665466, - "learning_rate": 1.1810297415069596e-05, - "loss": 0.0563, - "step": 48395 - }, - { - "epoch": 1.2285823073994162, - "grad_norm": 0.5985354781150818, - "learning_rate": 1.1809451284003891e-05, - "loss": 0.0512, - "step": 48400 - }, - { - "epoch": 1.2287092270592714, - "grad_norm": 0.44048401713371277, - "learning_rate": 1.1808605152938191e-05, - "loss": 0.0387, - "step": 48405 - }, - { - "epoch": 1.2288361467191269, - "grad_norm": 0.6037269234657288, - "learning_rate": 1.180775902187249e-05, - "loss": 0.0703, - "step": 48410 - }, - { - "epoch": 1.2289630663789821, - "grad_norm": 0.255479633808136, - "learning_rate": 1.1806912890806788e-05, - "loss": 0.0483, - "step": 48415 - }, - { - "epoch": 1.2290899860388373, - "grad_norm": 0.4780646562576294, - "learning_rate": 1.1806066759741083e-05, - "loss": 0.0445, - "step": 48420 - }, - { - "epoch": 1.2292169056986928, - "grad_norm": 0.3280124366283417, - "learning_rate": 1.1805220628675382e-05, - "loss": 0.0541, - "step": 48425 - }, - { - "epoch": 1.229343825358548, - "grad_norm": 0.585932195186615, - "learning_rate": 1.180437449760968e-05, - "loss": 0.0667, - "step": 48430 - }, - { - "epoch": 1.2294707450184033, - "grad_norm": 0.4566034972667694, - "learning_rate": 1.1803528366543978e-05, - "loss": 0.0592, - "step": 48435 - }, - { - "epoch": 1.2295976646782587, - "grad_norm": 0.5770353674888611, - "learning_rate": 1.1802682235478275e-05, - "loss": 0.0545, - "step": 48440 - }, - { - "epoch": 1.229724584338114, - "grad_norm": 0.3303113579750061, - "learning_rate": 1.1801836104412574e-05, - "loss": 0.0528, - "step": 48445 - }, - { - "epoch": 1.2298515039979692, - "grad_norm": 0.45778951048851013, - "learning_rate": 1.1800989973346872e-05, - "loss": 0.0838, - "step": 48450 - }, - { - "epoch": 1.2299784236578246, - "grad_norm": 0.38629308342933655, - "learning_rate": 1.180014384228117e-05, - "loss": 0.0494, - "step": 48455 - }, - { - "epoch": 1.2301053433176798, - "grad_norm": 0.5975757837295532, - "learning_rate": 1.1799297711215467e-05, - "loss": 0.0451, - "step": 48460 - }, - { - "epoch": 1.2302322629775353, - "grad_norm": 0.39750218391418457, - "learning_rate": 1.1798451580149765e-05, - "loss": 0.0433, - "step": 48465 - }, - { - "epoch": 1.2303591826373905, - "grad_norm": 0.6133629679679871, - "learning_rate": 1.1797605449084064e-05, - "loss": 0.0477, - "step": 48470 - }, - { - "epoch": 1.2304861022972458, - "grad_norm": 0.4775794446468353, - "learning_rate": 1.1796759318018362e-05, - "loss": 0.0453, - "step": 48475 - }, - { - "epoch": 1.2306130219571012, - "grad_norm": 0.3138022720813751, - "learning_rate": 1.1795913186952659e-05, - "loss": 0.0401, - "step": 48480 - }, - { - "epoch": 1.2307399416169564, - "grad_norm": 0.4992977976799011, - "learning_rate": 1.1795067055886957e-05, - "loss": 0.0423, - "step": 48485 - }, - { - "epoch": 1.2308668612768119, - "grad_norm": 0.504375696182251, - "learning_rate": 1.1794220924821256e-05, - "loss": 0.0546, - "step": 48490 - }, - { - "epoch": 1.2309937809366671, - "grad_norm": 0.7147983312606812, - "learning_rate": 1.1793374793755554e-05, - "loss": 0.0434, - "step": 48495 - }, - { - "epoch": 1.2311207005965223, - "grad_norm": 0.48199477791786194, - "learning_rate": 1.1792528662689851e-05, - "loss": 0.0402, - "step": 48500 - }, - { - "epoch": 1.2312476202563778, - "grad_norm": 0.3553607761859894, - "learning_rate": 1.179168253162415e-05, - "loss": 0.0405, - "step": 48505 - }, - { - "epoch": 1.231374539916233, - "grad_norm": 0.4430590867996216, - "learning_rate": 1.1790836400558448e-05, - "loss": 0.0413, - "step": 48510 - }, - { - "epoch": 1.2315014595760883, - "grad_norm": 0.6555122137069702, - "learning_rate": 1.1789990269492746e-05, - "loss": 0.0856, - "step": 48515 - }, - { - "epoch": 1.2316283792359437, - "grad_norm": 0.6027165651321411, - "learning_rate": 1.1789144138427045e-05, - "loss": 0.0468, - "step": 48520 - }, - { - "epoch": 1.231755298895799, - "grad_norm": 1.7853502035140991, - "learning_rate": 1.1788298007361341e-05, - "loss": 0.047, - "step": 48525 - }, - { - "epoch": 1.2318822185556542, - "grad_norm": 0.7901225090026855, - "learning_rate": 1.178745187629564e-05, - "loss": 0.0625, - "step": 48530 - }, - { - "epoch": 1.2320091382155096, - "grad_norm": 0.34949514269828796, - "learning_rate": 1.1786605745229938e-05, - "loss": 0.0546, - "step": 48535 - }, - { - "epoch": 1.2321360578753648, - "grad_norm": 0.4670979678630829, - "learning_rate": 1.1785759614164236e-05, - "loss": 0.0456, - "step": 48540 - }, - { - "epoch": 1.2322629775352203, - "grad_norm": 1.2711520195007324, - "learning_rate": 1.1784913483098533e-05, - "loss": 0.0456, - "step": 48545 - }, - { - "epoch": 1.2323898971950755, - "grad_norm": 0.6172618865966797, - "learning_rate": 1.1784067352032832e-05, - "loss": 0.0692, - "step": 48550 - }, - { - "epoch": 1.2325168168549308, - "grad_norm": 0.42608824372291565, - "learning_rate": 1.178322122096713e-05, - "loss": 0.0704, - "step": 48555 - }, - { - "epoch": 1.2326437365147862, - "grad_norm": 0.5658891201019287, - "learning_rate": 1.1782375089901428e-05, - "loss": 0.0573, - "step": 48560 - }, - { - "epoch": 1.2327706561746414, - "grad_norm": 1.0735822916030884, - "learning_rate": 1.1781528958835723e-05, - "loss": 0.0488, - "step": 48565 - }, - { - "epoch": 1.2328975758344969, - "grad_norm": 0.464346706867218, - "learning_rate": 1.1780682827770022e-05, - "loss": 0.0459, - "step": 48570 - }, - { - "epoch": 1.2330244954943521, - "grad_norm": 0.488875150680542, - "learning_rate": 1.177983669670432e-05, - "loss": 0.0601, - "step": 48575 - }, - { - "epoch": 1.2331514151542073, - "grad_norm": 0.445644736289978, - "learning_rate": 1.177899056563862e-05, - "loss": 0.0423, - "step": 48580 - }, - { - "epoch": 1.2332783348140628, - "grad_norm": 0.43384864926338196, - "learning_rate": 1.1778144434572915e-05, - "loss": 0.0325, - "step": 48585 - }, - { - "epoch": 1.233405254473918, - "grad_norm": 0.5357537865638733, - "learning_rate": 1.1777298303507214e-05, - "loss": 0.051, - "step": 48590 - }, - { - "epoch": 1.2335321741337733, - "grad_norm": 0.3894411623477936, - "learning_rate": 1.1776452172441512e-05, - "loss": 0.0419, - "step": 48595 - }, - { - "epoch": 1.2336590937936287, - "grad_norm": 1.1612788438796997, - "learning_rate": 1.177560604137581e-05, - "loss": 0.0628, - "step": 48600 - }, - { - "epoch": 1.233786013453484, - "grad_norm": 0.3953167498111725, - "learning_rate": 1.1774759910310107e-05, - "loss": 0.0578, - "step": 48605 - }, - { - "epoch": 1.2339129331133392, - "grad_norm": 0.4063102602958679, - "learning_rate": 1.1773913779244406e-05, - "loss": 0.0511, - "step": 48610 - }, - { - "epoch": 1.2340398527731946, - "grad_norm": 0.5163678526878357, - "learning_rate": 1.1773067648178704e-05, - "loss": 0.0546, - "step": 48615 - }, - { - "epoch": 1.2341667724330498, - "grad_norm": 0.4458777606487274, - "learning_rate": 1.1772221517113002e-05, - "loss": 0.041, - "step": 48620 - }, - { - "epoch": 1.2342936920929053, - "grad_norm": 0.7325732111930847, - "learning_rate": 1.1771375386047299e-05, - "loss": 0.0445, - "step": 48625 - }, - { - "epoch": 1.2344206117527605, - "grad_norm": 0.5761170387268066, - "learning_rate": 1.1770529254981598e-05, - "loss": 0.0462, - "step": 48630 - }, - { - "epoch": 1.2345475314126158, - "grad_norm": 0.5805464386940002, - "learning_rate": 1.1769683123915896e-05, - "loss": 0.0435, - "step": 48635 - }, - { - "epoch": 1.2346744510724712, - "grad_norm": 0.46998855471611023, - "learning_rate": 1.1768836992850194e-05, - "loss": 0.0435, - "step": 48640 - }, - { - "epoch": 1.2348013707323264, - "grad_norm": 0.45207479596138, - "learning_rate": 1.1767990861784491e-05, - "loss": 0.0478, - "step": 48645 - }, - { - "epoch": 1.2349282903921817, - "grad_norm": 0.3603888154029846, - "learning_rate": 1.176714473071879e-05, - "loss": 0.034, - "step": 48650 - }, - { - "epoch": 1.2350552100520371, - "grad_norm": 0.5992320775985718, - "learning_rate": 1.1766298599653088e-05, - "loss": 0.0512, - "step": 48655 - }, - { - "epoch": 1.2351821297118923, - "grad_norm": 0.5187745690345764, - "learning_rate": 1.1765452468587386e-05, - "loss": 0.0608, - "step": 48660 - }, - { - "epoch": 1.2353090493717476, - "grad_norm": 0.7792990803718567, - "learning_rate": 1.1764606337521683e-05, - "loss": 0.0296, - "step": 48665 - }, - { - "epoch": 1.235435969031603, - "grad_norm": 0.4659191071987152, - "learning_rate": 1.1763760206455981e-05, - "loss": 0.0435, - "step": 48670 - }, - { - "epoch": 1.2355628886914582, - "grad_norm": 0.7345532774925232, - "learning_rate": 1.176291407539028e-05, - "loss": 0.0518, - "step": 48675 - }, - { - "epoch": 1.2356898083513137, - "grad_norm": 0.6375783085823059, - "learning_rate": 1.1762067944324578e-05, - "loss": 0.0462, - "step": 48680 - }, - { - "epoch": 1.235816728011169, - "grad_norm": 1.3004748821258545, - "learning_rate": 1.1761221813258875e-05, - "loss": 0.039, - "step": 48685 - }, - { - "epoch": 1.2359436476710242, - "grad_norm": 0.7052150368690491, - "learning_rate": 1.1760375682193173e-05, - "loss": 0.0485, - "step": 48690 - }, - { - "epoch": 1.2360705673308796, - "grad_norm": 0.7878131866455078, - "learning_rate": 1.1759529551127472e-05, - "loss": 0.0503, - "step": 48695 - }, - { - "epoch": 1.2361974869907348, - "grad_norm": 0.7362515330314636, - "learning_rate": 1.175868342006177e-05, - "loss": 0.0583, - "step": 48700 - }, - { - "epoch": 1.2363244066505903, - "grad_norm": 0.5726087689399719, - "learning_rate": 1.1757837288996065e-05, - "loss": 0.0584, - "step": 48705 - }, - { - "epoch": 1.2364513263104455, - "grad_norm": 0.5364107489585876, - "learning_rate": 1.1756991157930363e-05, - "loss": 0.0497, - "step": 48710 - }, - { - "epoch": 1.2365782459703007, - "grad_norm": 0.5426493287086487, - "learning_rate": 1.1756145026864662e-05, - "loss": 0.0496, - "step": 48715 - }, - { - "epoch": 1.2367051656301562, - "grad_norm": 0.6297512054443359, - "learning_rate": 1.175529889579896e-05, - "loss": 0.0565, - "step": 48720 - }, - { - "epoch": 1.2368320852900114, - "grad_norm": 0.48507025837898254, - "learning_rate": 1.1754452764733257e-05, - "loss": 0.0537, - "step": 48725 - }, - { - "epoch": 1.2369590049498667, - "grad_norm": 0.5204402208328247, - "learning_rate": 1.1753606633667555e-05, - "loss": 0.0587, - "step": 48730 - }, - { - "epoch": 1.237085924609722, - "grad_norm": 0.5657681226730347, - "learning_rate": 1.1752760502601854e-05, - "loss": 0.0454, - "step": 48735 - }, - { - "epoch": 1.2372128442695773, - "grad_norm": 0.7229506969451904, - "learning_rate": 1.1751914371536152e-05, - "loss": 0.0512, - "step": 48740 - }, - { - "epoch": 1.2373397639294326, - "grad_norm": 0.5599811673164368, - "learning_rate": 1.1751068240470449e-05, - "loss": 0.057, - "step": 48745 - }, - { - "epoch": 1.237466683589288, - "grad_norm": 0.3286034166812897, - "learning_rate": 1.1750222109404747e-05, - "loss": 0.0517, - "step": 48750 - }, - { - "epoch": 1.2375936032491432, - "grad_norm": 0.7236649394035339, - "learning_rate": 1.1749375978339046e-05, - "loss": 0.0684, - "step": 48755 - }, - { - "epoch": 1.2377205229089987, - "grad_norm": 0.5150246024131775, - "learning_rate": 1.1748529847273344e-05, - "loss": 0.0594, - "step": 48760 - }, - { - "epoch": 1.237847442568854, - "grad_norm": 0.63215172290802, - "learning_rate": 1.174768371620764e-05, - "loss": 0.0477, - "step": 48765 - }, - { - "epoch": 1.2379743622287092, - "grad_norm": 0.5500482320785522, - "learning_rate": 1.174683758514194e-05, - "loss": 0.0453, - "step": 48770 - }, - { - "epoch": 1.2381012818885646, - "grad_norm": 0.6645134091377258, - "learning_rate": 1.1745991454076238e-05, - "loss": 0.0501, - "step": 48775 - }, - { - "epoch": 1.2382282015484198, - "grad_norm": 0.40620630979537964, - "learning_rate": 1.1745145323010536e-05, - "loss": 0.0403, - "step": 48780 - }, - { - "epoch": 1.238355121208275, - "grad_norm": 0.34900638461112976, - "learning_rate": 1.1744299191944833e-05, - "loss": 0.0397, - "step": 48785 - }, - { - "epoch": 1.2384820408681305, - "grad_norm": 0.4716511070728302, - "learning_rate": 1.1743453060879131e-05, - "loss": 0.0687, - "step": 48790 - }, - { - "epoch": 1.2386089605279857, - "grad_norm": 0.3968251943588257, - "learning_rate": 1.174260692981343e-05, - "loss": 0.0343, - "step": 48795 - }, - { - "epoch": 1.238735880187841, - "grad_norm": 0.2908875346183777, - "learning_rate": 1.1741760798747728e-05, - "loss": 0.0441, - "step": 48800 - }, - { - "epoch": 1.2388627998476964, - "grad_norm": 0.3802693486213684, - "learning_rate": 1.1740914667682025e-05, - "loss": 0.0508, - "step": 48805 - }, - { - "epoch": 1.2389897195075517, - "grad_norm": 0.668181836605072, - "learning_rate": 1.1740068536616323e-05, - "loss": 0.0501, - "step": 48810 - }, - { - "epoch": 1.239116639167407, - "grad_norm": 0.676470935344696, - "learning_rate": 1.1739222405550621e-05, - "loss": 0.0319, - "step": 48815 - }, - { - "epoch": 1.2392435588272623, - "grad_norm": 0.8016480803489685, - "learning_rate": 1.173837627448492e-05, - "loss": 0.046, - "step": 48820 - }, - { - "epoch": 1.2393704784871176, - "grad_norm": 0.2875024676322937, - "learning_rate": 1.1737530143419217e-05, - "loss": 0.045, - "step": 48825 - }, - { - "epoch": 1.239497398146973, - "grad_norm": 0.6343921422958374, - "learning_rate": 1.1736684012353515e-05, - "loss": 0.051, - "step": 48830 - }, - { - "epoch": 1.2396243178068282, - "grad_norm": 0.4090186059474945, - "learning_rate": 1.1735837881287813e-05, - "loss": 0.0579, - "step": 48835 - }, - { - "epoch": 1.2397512374666837, - "grad_norm": 0.3573387563228607, - "learning_rate": 1.1734991750222112e-05, - "loss": 0.0483, - "step": 48840 - }, - { - "epoch": 1.239878157126539, - "grad_norm": 0.6261357665061951, - "learning_rate": 1.1734145619156407e-05, - "loss": 0.0666, - "step": 48845 - }, - { - "epoch": 1.2400050767863942, - "grad_norm": 0.3487824499607086, - "learning_rate": 1.1733299488090705e-05, - "loss": 0.0417, - "step": 48850 - }, - { - "epoch": 1.2401319964462496, - "grad_norm": 0.5083988308906555, - "learning_rate": 1.1732453357025004e-05, - "loss": 0.0458, - "step": 48855 - }, - { - "epoch": 1.2402589161061048, - "grad_norm": 0.5172750949859619, - "learning_rate": 1.1731607225959302e-05, - "loss": 0.0721, - "step": 48860 - }, - { - "epoch": 1.24038583576596, - "grad_norm": 0.4964565932750702, - "learning_rate": 1.1730761094893599e-05, - "loss": 0.049, - "step": 48865 - }, - { - "epoch": 1.2405127554258155, - "grad_norm": 0.47690799832344055, - "learning_rate": 1.1729914963827897e-05, - "loss": 0.0367, - "step": 48870 - }, - { - "epoch": 1.2406396750856707, - "grad_norm": 0.5080900192260742, - "learning_rate": 1.1729068832762195e-05, - "loss": 0.072, - "step": 48875 - }, - { - "epoch": 1.240766594745526, - "grad_norm": 0.9104970693588257, - "learning_rate": 1.1728222701696494e-05, - "loss": 0.0328, - "step": 48880 - }, - { - "epoch": 1.2408935144053814, - "grad_norm": 0.3930104076862335, - "learning_rate": 1.172737657063079e-05, - "loss": 0.0357, - "step": 48885 - }, - { - "epoch": 1.2410204340652367, - "grad_norm": 0.465192049741745, - "learning_rate": 1.1726530439565089e-05, - "loss": 0.0443, - "step": 48890 - }, - { - "epoch": 1.241147353725092, - "grad_norm": 0.6286647915840149, - "learning_rate": 1.1725684308499387e-05, - "loss": 0.0371, - "step": 48895 - }, - { - "epoch": 1.2412742733849473, - "grad_norm": 1.4850399494171143, - "learning_rate": 1.1724838177433686e-05, - "loss": 0.0359, - "step": 48900 - }, - { - "epoch": 1.2414011930448026, - "grad_norm": 0.3768984377384186, - "learning_rate": 1.1723992046367983e-05, - "loss": 0.0361, - "step": 48905 - }, - { - "epoch": 1.241528112704658, - "grad_norm": 0.8926663994789124, - "learning_rate": 1.1723145915302281e-05, - "loss": 0.0595, - "step": 48910 - }, - { - "epoch": 1.2416550323645132, - "grad_norm": 0.33912330865859985, - "learning_rate": 1.172229978423658e-05, - "loss": 0.0528, - "step": 48915 - }, - { - "epoch": 1.2417819520243687, - "grad_norm": 0.6454929113388062, - "learning_rate": 1.1721453653170878e-05, - "loss": 0.0656, - "step": 48920 - }, - { - "epoch": 1.241908871684224, - "grad_norm": 0.6379013061523438, - "learning_rate": 1.1720607522105174e-05, - "loss": 0.0631, - "step": 48925 - }, - { - "epoch": 1.2420357913440792, - "grad_norm": 0.23221291601657867, - "learning_rate": 1.1719761391039473e-05, - "loss": 0.0262, - "step": 48930 - }, - { - "epoch": 1.2421627110039346, - "grad_norm": 0.48519957065582275, - "learning_rate": 1.1718915259973771e-05, - "loss": 0.0562, - "step": 48935 - }, - { - "epoch": 1.2422896306637898, - "grad_norm": 0.5518013834953308, - "learning_rate": 1.171806912890807e-05, - "loss": 0.049, - "step": 48940 - }, - { - "epoch": 1.242416550323645, - "grad_norm": 0.3747818171977997, - "learning_rate": 1.1717222997842366e-05, - "loss": 0.0421, - "step": 48945 - }, - { - "epoch": 1.2425434699835005, - "grad_norm": 0.5751658082008362, - "learning_rate": 1.1716376866776665e-05, - "loss": 0.0546, - "step": 48950 - }, - { - "epoch": 1.2426703896433557, - "grad_norm": 0.31894636154174805, - "learning_rate": 1.1715530735710963e-05, - "loss": 0.0376, - "step": 48955 - }, - { - "epoch": 1.242797309303211, - "grad_norm": 0.5194088220596313, - "learning_rate": 1.1714684604645262e-05, - "loss": 0.0383, - "step": 48960 - }, - { - "epoch": 1.2429242289630664, - "grad_norm": 0.5446136593818665, - "learning_rate": 1.1713838473579557e-05, - "loss": 0.0445, - "step": 48965 - }, - { - "epoch": 1.2430511486229217, - "grad_norm": 0.43265053629875183, - "learning_rate": 1.1712992342513857e-05, - "loss": 0.0395, - "step": 48970 - }, - { - "epoch": 1.243178068282777, - "grad_norm": 0.837386429309845, - "learning_rate": 1.1712146211448155e-05, - "loss": 0.0444, - "step": 48975 - }, - { - "epoch": 1.2433049879426323, - "grad_norm": 0.5028703808784485, - "learning_rate": 1.1711300080382453e-05, - "loss": 0.058, - "step": 48980 - }, - { - "epoch": 1.2434319076024876, - "grad_norm": 0.49422457814216614, - "learning_rate": 1.1710453949316748e-05, - "loss": 0.0512, - "step": 48985 - }, - { - "epoch": 1.243558827262343, - "grad_norm": 0.3969539403915405, - "learning_rate": 1.1709607818251047e-05, - "loss": 0.0632, - "step": 48990 - }, - { - "epoch": 1.2436857469221982, - "grad_norm": 0.47344541549682617, - "learning_rate": 1.1708761687185345e-05, - "loss": 0.0435, - "step": 48995 - }, - { - "epoch": 1.2438126665820535, - "grad_norm": 0.5679357647895813, - "learning_rate": 1.1707915556119644e-05, - "loss": 0.0324, - "step": 49000 - }, - { - "epoch": 1.243939586241909, - "grad_norm": 0.3739964962005615, - "learning_rate": 1.170706942505394e-05, - "loss": 0.0347, - "step": 49005 - }, - { - "epoch": 1.2440665059017642, - "grad_norm": 0.32338669896125793, - "learning_rate": 1.1706223293988239e-05, - "loss": 0.0458, - "step": 49010 - }, - { - "epoch": 1.2441934255616194, - "grad_norm": 0.4455201029777527, - "learning_rate": 1.1705377162922537e-05, - "loss": 0.0409, - "step": 49015 - }, - { - "epoch": 1.2443203452214748, - "grad_norm": 0.33780887722969055, - "learning_rate": 1.1704531031856836e-05, - "loss": 0.0683, - "step": 49020 - }, - { - "epoch": 1.24444726488133, - "grad_norm": 0.4013279378414154, - "learning_rate": 1.1703684900791132e-05, - "loss": 0.0632, - "step": 49025 - }, - { - "epoch": 1.2445741845411855, - "grad_norm": 1.351067304611206, - "learning_rate": 1.170283876972543e-05, - "loss": 0.0591, - "step": 49030 - }, - { - "epoch": 1.2447011042010407, - "grad_norm": 0.4595890939235687, - "learning_rate": 1.1701992638659729e-05, - "loss": 0.0494, - "step": 49035 - }, - { - "epoch": 1.244828023860896, - "grad_norm": 1.2798521518707275, - "learning_rate": 1.1701146507594028e-05, - "loss": 0.068, - "step": 49040 - }, - { - "epoch": 1.2449549435207514, - "grad_norm": 0.6841833591461182, - "learning_rate": 1.1700300376528326e-05, - "loss": 0.0609, - "step": 49045 - }, - { - "epoch": 1.2450818631806067, - "grad_norm": 0.3546845614910126, - "learning_rate": 1.1699454245462623e-05, - "loss": 0.0554, - "step": 49050 - }, - { - "epoch": 1.245208782840462, - "grad_norm": 0.402447909116745, - "learning_rate": 1.1698608114396921e-05, - "loss": 0.0594, - "step": 49055 - }, - { - "epoch": 1.2453357025003173, - "grad_norm": 0.7004242539405823, - "learning_rate": 1.169776198333122e-05, - "loss": 0.0595, - "step": 49060 - }, - { - "epoch": 1.2454626221601726, - "grad_norm": 0.4299854040145874, - "learning_rate": 1.1696915852265518e-05, - "loss": 0.0426, - "step": 49065 - }, - { - "epoch": 1.245589541820028, - "grad_norm": 0.34800174832344055, - "learning_rate": 1.1696069721199815e-05, - "loss": 0.0378, - "step": 49070 - }, - { - "epoch": 1.2457164614798832, - "grad_norm": 0.4346165657043457, - "learning_rate": 1.1695223590134113e-05, - "loss": 0.0364, - "step": 49075 - }, - { - "epoch": 1.2458433811397385, - "grad_norm": 0.5641934275627136, - "learning_rate": 1.1694377459068411e-05, - "loss": 0.0478, - "step": 49080 - }, - { - "epoch": 1.245970300799594, - "grad_norm": 0.8280352354049683, - "learning_rate": 1.169353132800271e-05, - "loss": 0.0495, - "step": 49085 - }, - { - "epoch": 1.2460972204594492, - "grad_norm": 0.5798157453536987, - "learning_rate": 1.1692685196937006e-05, - "loss": 0.0591, - "step": 49090 - }, - { - "epoch": 1.2462241401193044, - "grad_norm": 0.41891756653785706, - "learning_rate": 1.1691839065871305e-05, - "loss": 0.0465, - "step": 49095 - }, - { - "epoch": 1.2463510597791598, - "grad_norm": 1.5284864902496338, - "learning_rate": 1.1690992934805603e-05, - "loss": 0.0557, - "step": 49100 - }, - { - "epoch": 1.246477979439015, - "grad_norm": 0.5609028339385986, - "learning_rate": 1.1690146803739902e-05, - "loss": 0.0572, - "step": 49105 - }, - { - "epoch": 1.2466048990988705, - "grad_norm": 0.6673361659049988, - "learning_rate": 1.1689300672674198e-05, - "loss": 0.0442, - "step": 49110 - }, - { - "epoch": 1.2467318187587257, - "grad_norm": 0.6723892092704773, - "learning_rate": 1.1688454541608497e-05, - "loss": 0.063, - "step": 49115 - }, - { - "epoch": 1.246858738418581, - "grad_norm": 0.6024113893508911, - "learning_rate": 1.1687608410542795e-05, - "loss": 0.0464, - "step": 49120 - }, - { - "epoch": 1.2469856580784364, - "grad_norm": 0.7270237803459167, - "learning_rate": 1.1686762279477094e-05, - "loss": 0.0655, - "step": 49125 - }, - { - "epoch": 1.2471125777382916, - "grad_norm": 0.27216020226478577, - "learning_rate": 1.1685916148411389e-05, - "loss": 0.0748, - "step": 49130 - }, - { - "epoch": 1.2472394973981469, - "grad_norm": 0.649540364742279, - "learning_rate": 1.1685070017345687e-05, - "loss": 0.0634, - "step": 49135 - }, - { - "epoch": 1.2473664170580023, - "grad_norm": 0.4576028287410736, - "learning_rate": 1.1684223886279985e-05, - "loss": 0.0515, - "step": 49140 - }, - { - "epoch": 1.2474933367178576, - "grad_norm": 0.4624655544757843, - "learning_rate": 1.1683377755214285e-05, - "loss": 0.0465, - "step": 49145 - }, - { - "epoch": 1.2476202563777128, - "grad_norm": 0.4836116135120392, - "learning_rate": 1.168253162414858e-05, - "loss": 0.0649, - "step": 49150 - }, - { - "epoch": 1.2477471760375682, - "grad_norm": 0.7106992602348328, - "learning_rate": 1.1681685493082879e-05, - "loss": 0.0557, - "step": 49155 - }, - { - "epoch": 1.2478740956974235, - "grad_norm": 0.6342343688011169, - "learning_rate": 1.1680839362017177e-05, - "loss": 0.0616, - "step": 49160 - }, - { - "epoch": 1.248001015357279, - "grad_norm": 0.8220714330673218, - "learning_rate": 1.1679993230951476e-05, - "loss": 0.0443, - "step": 49165 - }, - { - "epoch": 1.2481279350171341, - "grad_norm": 0.7600146532058716, - "learning_rate": 1.1679147099885772e-05, - "loss": 0.0454, - "step": 49170 - }, - { - "epoch": 1.2482548546769894, - "grad_norm": 0.5008277893066406, - "learning_rate": 1.167830096882007e-05, - "loss": 0.0528, - "step": 49175 - }, - { - "epoch": 1.2483817743368448, - "grad_norm": 0.47833484411239624, - "learning_rate": 1.167745483775437e-05, - "loss": 0.0408, - "step": 49180 - }, - { - "epoch": 1.2485086939967, - "grad_norm": 0.6322722434997559, - "learning_rate": 1.1676608706688668e-05, - "loss": 0.0546, - "step": 49185 - }, - { - "epoch": 1.2486356136565555, - "grad_norm": 0.4379027485847473, - "learning_rate": 1.1675762575622964e-05, - "loss": 0.0605, - "step": 49190 - }, - { - "epoch": 1.2487625333164107, - "grad_norm": 0.5004573464393616, - "learning_rate": 1.1674916444557263e-05, - "loss": 0.0397, - "step": 49195 - }, - { - "epoch": 1.248889452976266, - "grad_norm": 0.4876289367675781, - "learning_rate": 1.1674070313491561e-05, - "loss": 0.0578, - "step": 49200 - }, - { - "epoch": 1.2490163726361214, - "grad_norm": 0.7394903898239136, - "learning_rate": 1.167322418242586e-05, - "loss": 0.0642, - "step": 49205 - }, - { - "epoch": 1.2491432922959766, - "grad_norm": 0.24068079888820648, - "learning_rate": 1.1672378051360156e-05, - "loss": 0.0411, - "step": 49210 - }, - { - "epoch": 1.2492702119558319, - "grad_norm": 0.5618467926979065, - "learning_rate": 1.1671531920294455e-05, - "loss": 0.0488, - "step": 49215 - }, - { - "epoch": 1.2493971316156873, - "grad_norm": 0.3541858196258545, - "learning_rate": 1.1670685789228753e-05, - "loss": 0.0379, - "step": 49220 - }, - { - "epoch": 1.2495240512755426, - "grad_norm": 0.3258123993873596, - "learning_rate": 1.1669839658163051e-05, - "loss": 0.0448, - "step": 49225 - }, - { - "epoch": 1.2496509709353978, - "grad_norm": 0.37367019057273865, - "learning_rate": 1.1668993527097348e-05, - "loss": 0.057, - "step": 49230 - }, - { - "epoch": 1.2497778905952532, - "grad_norm": 0.39462634921073914, - "learning_rate": 1.1668147396031647e-05, - "loss": 0.0448, - "step": 49235 - }, - { - "epoch": 1.2499048102551085, - "grad_norm": 0.4568432569503784, - "learning_rate": 1.1667301264965945e-05, - "loss": 0.0423, - "step": 49240 - }, - { - "epoch": 1.250031729914964, - "grad_norm": 0.5384783744812012, - "learning_rate": 1.1666455133900243e-05, - "loss": 0.0556, - "step": 49245 - }, - { - "epoch": 1.2501586495748191, - "grad_norm": 0.5168209671974182, - "learning_rate": 1.166560900283454e-05, - "loss": 0.0492, - "step": 49250 - }, - { - "epoch": 1.2502855692346744, - "grad_norm": 1.3571245670318604, - "learning_rate": 1.1664762871768838e-05, - "loss": 0.0512, - "step": 49255 - }, - { - "epoch": 1.2504124888945298, - "grad_norm": 0.42729616165161133, - "learning_rate": 1.1663916740703137e-05, - "loss": 0.0508, - "step": 49260 - }, - { - "epoch": 1.250539408554385, - "grad_norm": 0.41345539689064026, - "learning_rate": 1.1663070609637435e-05, - "loss": 0.0496, - "step": 49265 - }, - { - "epoch": 1.2506663282142405, - "grad_norm": 0.30451786518096924, - "learning_rate": 1.166222447857173e-05, - "loss": 0.0491, - "step": 49270 - }, - { - "epoch": 1.2507932478740957, - "grad_norm": 0.2627016603946686, - "learning_rate": 1.1661378347506029e-05, - "loss": 0.0537, - "step": 49275 - }, - { - "epoch": 1.250920167533951, - "grad_norm": 0.6579365730285645, - "learning_rate": 1.1660532216440327e-05, - "loss": 0.0504, - "step": 49280 - }, - { - "epoch": 1.2510470871938062, - "grad_norm": 0.921524167060852, - "learning_rate": 1.1659686085374625e-05, - "loss": 0.0431, - "step": 49285 - }, - { - "epoch": 1.2511740068536616, - "grad_norm": 1.5933382511138916, - "learning_rate": 1.1658839954308922e-05, - "loss": 0.0469, - "step": 49290 - }, - { - "epoch": 1.2513009265135169, - "grad_norm": 1.3860032558441162, - "learning_rate": 1.165799382324322e-05, - "loss": 0.0822, - "step": 49295 - }, - { - "epoch": 1.2514278461733723, - "grad_norm": 0.23397107422351837, - "learning_rate": 1.1657147692177519e-05, - "loss": 0.0317, - "step": 49300 - }, - { - "epoch": 1.2515547658332276, - "grad_norm": 0.2731282413005829, - "learning_rate": 1.1656301561111817e-05, - "loss": 0.0485, - "step": 49305 - }, - { - "epoch": 1.2516816854930828, - "grad_norm": 0.460480660200119, - "learning_rate": 1.1655455430046114e-05, - "loss": 0.0486, - "step": 49310 - }, - { - "epoch": 1.2518086051529382, - "grad_norm": 0.3907288908958435, - "learning_rate": 1.1654609298980413e-05, - "loss": 0.0697, - "step": 49315 - }, - { - "epoch": 1.2519355248127935, - "grad_norm": 0.5141067504882812, - "learning_rate": 1.1653763167914711e-05, - "loss": 0.0718, - "step": 49320 - }, - { - "epoch": 1.252062444472649, - "grad_norm": 0.6126102209091187, - "learning_rate": 1.165291703684901e-05, - "loss": 0.0778, - "step": 49325 - }, - { - "epoch": 1.2521893641325041, - "grad_norm": 0.48410147428512573, - "learning_rate": 1.1652070905783306e-05, - "loss": 0.0571, - "step": 49330 - }, - { - "epoch": 1.2523162837923594, - "grad_norm": 0.7324599623680115, - "learning_rate": 1.1651224774717604e-05, - "loss": 0.0463, - "step": 49335 - }, - { - "epoch": 1.2524432034522148, - "grad_norm": 0.4912703335285187, - "learning_rate": 1.1650378643651903e-05, - "loss": 0.0397, - "step": 49340 - }, - { - "epoch": 1.25257012311207, - "grad_norm": 0.5532506704330444, - "learning_rate": 1.1649532512586201e-05, - "loss": 0.0572, - "step": 49345 - }, - { - "epoch": 1.2526970427719255, - "grad_norm": 0.5418902039527893, - "learning_rate": 1.1648686381520498e-05, - "loss": 0.039, - "step": 49350 - }, - { - "epoch": 1.2528239624317807, - "grad_norm": 0.77113938331604, - "learning_rate": 1.1647840250454796e-05, - "loss": 0.0588, - "step": 49355 - }, - { - "epoch": 1.252950882091636, - "grad_norm": 0.5329844951629639, - "learning_rate": 1.1646994119389095e-05, - "loss": 0.0688, - "step": 49360 - }, - { - "epoch": 1.2530778017514912, - "grad_norm": 0.36559179425239563, - "learning_rate": 1.1646147988323393e-05, - "loss": 0.0469, - "step": 49365 - }, - { - "epoch": 1.2532047214113466, - "grad_norm": 0.384796142578125, - "learning_rate": 1.164530185725769e-05, - "loss": 0.0421, - "step": 49370 - }, - { - "epoch": 1.2533316410712019, - "grad_norm": 0.3551214933395386, - "learning_rate": 1.1644455726191988e-05, - "loss": 0.053, - "step": 49375 - }, - { - "epoch": 1.2534585607310573, - "grad_norm": 0.704922616481781, - "learning_rate": 1.1643609595126287e-05, - "loss": 0.0378, - "step": 49380 - }, - { - "epoch": 1.2535854803909126, - "grad_norm": 0.4609432816505432, - "learning_rate": 1.1642763464060585e-05, - "loss": 0.0618, - "step": 49385 - }, - { - "epoch": 1.2537124000507678, - "grad_norm": 0.7001742124557495, - "learning_rate": 1.1641917332994882e-05, - "loss": 0.0458, - "step": 49390 - }, - { - "epoch": 1.2538393197106232, - "grad_norm": 0.372547447681427, - "learning_rate": 1.164107120192918e-05, - "loss": 0.0421, - "step": 49395 - }, - { - "epoch": 1.2539662393704785, - "grad_norm": 0.5918241143226624, - "learning_rate": 1.1640225070863479e-05, - "loss": 0.0499, - "step": 49400 - }, - { - "epoch": 1.254093159030334, - "grad_norm": 0.3536445200443268, - "learning_rate": 1.1639378939797777e-05, - "loss": 0.0435, - "step": 49405 - }, - { - "epoch": 1.2542200786901891, - "grad_norm": 0.4062488377094269, - "learning_rate": 1.1638532808732072e-05, - "loss": 0.0519, - "step": 49410 - }, - { - "epoch": 1.2543469983500444, - "grad_norm": 0.47734540700912476, - "learning_rate": 1.163768667766637e-05, - "loss": 0.0302, - "step": 49415 - }, - { - "epoch": 1.2544739180098996, - "grad_norm": 0.39093485474586487, - "learning_rate": 1.1636840546600669e-05, - "loss": 0.0628, - "step": 49420 - }, - { - "epoch": 1.254600837669755, - "grad_norm": 0.5745553970336914, - "learning_rate": 1.1635994415534967e-05, - "loss": 0.0526, - "step": 49425 - }, - { - "epoch": 1.2547277573296103, - "grad_norm": 0.6553739309310913, - "learning_rate": 1.1635148284469264e-05, - "loss": 0.034, - "step": 49430 - }, - { - "epoch": 1.2548546769894657, - "grad_norm": 0.6339263319969177, - "learning_rate": 1.1634302153403562e-05, - "loss": 0.0568, - "step": 49435 - }, - { - "epoch": 1.254981596649321, - "grad_norm": 0.9380654692649841, - "learning_rate": 1.163345602233786e-05, - "loss": 0.0573, - "step": 49440 - }, - { - "epoch": 1.2551085163091762, - "grad_norm": 0.5416566133499146, - "learning_rate": 1.1632609891272159e-05, - "loss": 0.0506, - "step": 49445 - }, - { - "epoch": 1.2552354359690316, - "grad_norm": 0.4518841505050659, - "learning_rate": 1.1631763760206456e-05, - "loss": 0.0395, - "step": 49450 - }, - { - "epoch": 1.2553623556288869, - "grad_norm": 0.5174596309661865, - "learning_rate": 1.1630917629140754e-05, - "loss": 0.0716, - "step": 49455 - }, - { - "epoch": 1.2554892752887423, - "grad_norm": 0.5002907514572144, - "learning_rate": 1.1630071498075053e-05, - "loss": 0.0373, - "step": 49460 - }, - { - "epoch": 1.2556161949485976, - "grad_norm": 0.4610086977481842, - "learning_rate": 1.1629225367009351e-05, - "loss": 0.0486, - "step": 49465 - }, - { - "epoch": 1.2557431146084528, - "grad_norm": 0.6721351742744446, - "learning_rate": 1.1628379235943648e-05, - "loss": 0.0551, - "step": 49470 - }, - { - "epoch": 1.2558700342683082, - "grad_norm": 0.4148712456226349, - "learning_rate": 1.1627533104877946e-05, - "loss": 0.0447, - "step": 49475 - }, - { - "epoch": 1.2559969539281635, - "grad_norm": 0.9124544858932495, - "learning_rate": 1.1626686973812245e-05, - "loss": 0.0334, - "step": 49480 - }, - { - "epoch": 1.256123873588019, - "grad_norm": 0.3872074484825134, - "learning_rate": 1.1625840842746543e-05, - "loss": 0.0328, - "step": 49485 - }, - { - "epoch": 1.2562507932478741, - "grad_norm": 1.076979398727417, - "learning_rate": 1.162499471168084e-05, - "loss": 0.0483, - "step": 49490 - }, - { - "epoch": 1.2563777129077294, - "grad_norm": 0.2859608829021454, - "learning_rate": 1.1624148580615138e-05, - "loss": 0.0502, - "step": 49495 - }, - { - "epoch": 1.2565046325675846, - "grad_norm": 0.8012577891349792, - "learning_rate": 1.1623302449549436e-05, - "loss": 0.0325, - "step": 49500 - }, - { - "epoch": 1.25663155222744, - "grad_norm": 1.0243037939071655, - "learning_rate": 1.1622456318483735e-05, - "loss": 0.0547, - "step": 49505 - }, - { - "epoch": 1.2567584718872953, - "grad_norm": 0.43031325936317444, - "learning_rate": 1.1621610187418032e-05, - "loss": 0.0553, - "step": 49510 - }, - { - "epoch": 1.2568853915471507, - "grad_norm": 0.48579227924346924, - "learning_rate": 1.162076405635233e-05, - "loss": 0.0448, - "step": 49515 - }, - { - "epoch": 1.257012311207006, - "grad_norm": 0.4639450013637543, - "learning_rate": 1.1619917925286628e-05, - "loss": 0.0568, - "step": 49520 - }, - { - "epoch": 1.2571392308668612, - "grad_norm": 0.46689802408218384, - "learning_rate": 1.1619071794220927e-05, - "loss": 0.0263, - "step": 49525 - }, - { - "epoch": 1.2572661505267166, - "grad_norm": 0.3461529016494751, - "learning_rate": 1.1618225663155222e-05, - "loss": 0.0579, - "step": 49530 - }, - { - "epoch": 1.2573930701865719, - "grad_norm": 0.5738146901130676, - "learning_rate": 1.1617379532089522e-05, - "loss": 0.0453, - "step": 49535 - }, - { - "epoch": 1.2575199898464273, - "grad_norm": 1.0359755754470825, - "learning_rate": 1.161653340102382e-05, - "loss": 0.0412, - "step": 49540 - }, - { - "epoch": 1.2576469095062826, - "grad_norm": 0.47640934586524963, - "learning_rate": 1.1615687269958119e-05, - "loss": 0.0464, - "step": 49545 - }, - { - "epoch": 1.2577738291661378, - "grad_norm": 2.196767568588257, - "learning_rate": 1.1614841138892417e-05, - "loss": 0.0471, - "step": 49550 - }, - { - "epoch": 1.257900748825993, - "grad_norm": 0.6336268186569214, - "learning_rate": 1.1613995007826712e-05, - "loss": 0.0316, - "step": 49555 - }, - { - "epoch": 1.2580276684858485, - "grad_norm": 1.0863559246063232, - "learning_rate": 1.161314887676101e-05, - "loss": 0.056, - "step": 49560 - }, - { - "epoch": 1.2581545881457037, - "grad_norm": 0.33366525173187256, - "learning_rate": 1.1612302745695309e-05, - "loss": 0.0391, - "step": 49565 - }, - { - "epoch": 1.2582815078055591, - "grad_norm": 0.5163776874542236, - "learning_rate": 1.1611456614629609e-05, - "loss": 0.0489, - "step": 49570 - }, - { - "epoch": 1.2584084274654144, - "grad_norm": 0.4543854892253876, - "learning_rate": 1.1610610483563904e-05, - "loss": 0.0653, - "step": 49575 - }, - { - "epoch": 1.2585353471252696, - "grad_norm": 0.6712795495986938, - "learning_rate": 1.1609764352498202e-05, - "loss": 0.0674, - "step": 49580 - }, - { - "epoch": 1.258662266785125, - "grad_norm": 0.632601797580719, - "learning_rate": 1.16089182214325e-05, - "loss": 0.0361, - "step": 49585 - }, - { - "epoch": 1.2587891864449803, - "grad_norm": 0.7518216967582703, - "learning_rate": 1.16080720903668e-05, - "loss": 0.0513, - "step": 49590 - }, - { - "epoch": 1.2589161061048357, - "grad_norm": 0.3525523543357849, - "learning_rate": 1.1607225959301096e-05, - "loss": 0.0419, - "step": 49595 - }, - { - "epoch": 1.259043025764691, - "grad_norm": 0.5756150484085083, - "learning_rate": 1.1606379828235394e-05, - "loss": 0.0507, - "step": 49600 - }, - { - "epoch": 1.2591699454245462, - "grad_norm": 0.5323885083198547, - "learning_rate": 1.1605533697169693e-05, - "loss": 0.0556, - "step": 49605 - }, - { - "epoch": 1.2592968650844016, - "grad_norm": 0.7409961819648743, - "learning_rate": 1.1604687566103991e-05, - "loss": 0.052, - "step": 49610 - }, - { - "epoch": 1.2594237847442569, - "grad_norm": 0.6001591086387634, - "learning_rate": 1.1603841435038288e-05, - "loss": 0.0508, - "step": 49615 - }, - { - "epoch": 1.2595507044041123, - "grad_norm": 0.47746458649635315, - "learning_rate": 1.1602995303972586e-05, - "loss": 0.0375, - "step": 49620 - }, - { - "epoch": 1.2596776240639675, - "grad_norm": 0.5253760814666748, - "learning_rate": 1.1602149172906885e-05, - "loss": 0.0517, - "step": 49625 - }, - { - "epoch": 1.2598045437238228, - "grad_norm": 0.43663567304611206, - "learning_rate": 1.1601303041841183e-05, - "loss": 0.0533, - "step": 49630 - }, - { - "epoch": 1.259931463383678, - "grad_norm": 0.3516816198825836, - "learning_rate": 1.160045691077548e-05, - "loss": 0.0368, - "step": 49635 - }, - { - "epoch": 1.2600583830435335, - "grad_norm": 0.7530446648597717, - "learning_rate": 1.1599610779709778e-05, - "loss": 0.0468, - "step": 49640 - }, - { - "epoch": 1.2601853027033887, - "grad_norm": 0.7241869568824768, - "learning_rate": 1.1598764648644077e-05, - "loss": 0.0372, - "step": 49645 - }, - { - "epoch": 1.2603122223632441, - "grad_norm": 0.6159511208534241, - "learning_rate": 1.1597918517578375e-05, - "loss": 0.0469, - "step": 49650 - }, - { - "epoch": 1.2604391420230994, - "grad_norm": 0.7042768001556396, - "learning_rate": 1.1597072386512672e-05, - "loss": 0.0601, - "step": 49655 - }, - { - "epoch": 1.2605660616829546, - "grad_norm": 0.5378212332725525, - "learning_rate": 1.159622625544697e-05, - "loss": 0.0405, - "step": 49660 - }, - { - "epoch": 1.26069298134281, - "grad_norm": 0.4255902171134949, - "learning_rate": 1.1595380124381268e-05, - "loss": 0.0629, - "step": 49665 - }, - { - "epoch": 1.2608199010026653, - "grad_norm": 0.6325491070747375, - "learning_rate": 1.1594533993315567e-05, - "loss": 0.0563, - "step": 49670 - }, - { - "epoch": 1.2609468206625207, - "grad_norm": 1.1443179845809937, - "learning_rate": 1.1593687862249864e-05, - "loss": 0.0488, - "step": 49675 - }, - { - "epoch": 1.261073740322376, - "grad_norm": 0.3958832025527954, - "learning_rate": 1.1592841731184162e-05, - "loss": 0.0351, - "step": 49680 - }, - { - "epoch": 1.2612006599822312, - "grad_norm": 0.5235267281532288, - "learning_rate": 1.159199560011846e-05, - "loss": 0.0543, - "step": 49685 - }, - { - "epoch": 1.2613275796420866, - "grad_norm": 0.3901006877422333, - "learning_rate": 1.1591149469052759e-05, - "loss": 0.0494, - "step": 49690 - }, - { - "epoch": 1.2614544993019419, - "grad_norm": 0.7376134991645813, - "learning_rate": 1.1590303337987054e-05, - "loss": 0.0595, - "step": 49695 - }, - { - "epoch": 1.2615814189617973, - "grad_norm": 0.4819434583187103, - "learning_rate": 1.1589457206921352e-05, - "loss": 0.0474, - "step": 49700 - }, - { - "epoch": 1.2617083386216525, - "grad_norm": 0.5005621314048767, - "learning_rate": 1.158861107585565e-05, - "loss": 0.0554, - "step": 49705 - }, - { - "epoch": 1.2618352582815078, - "grad_norm": 0.5170199871063232, - "learning_rate": 1.1587764944789949e-05, - "loss": 0.0468, - "step": 49710 - }, - { - "epoch": 1.261962177941363, - "grad_norm": 0.488643079996109, - "learning_rate": 1.1586918813724246e-05, - "loss": 0.0388, - "step": 49715 - }, - { - "epoch": 1.2620890976012185, - "grad_norm": 0.7829392552375793, - "learning_rate": 1.1586072682658544e-05, - "loss": 0.0533, - "step": 49720 - }, - { - "epoch": 1.2622160172610737, - "grad_norm": 0.6337825059890747, - "learning_rate": 1.1585226551592843e-05, - "loss": 0.0545, - "step": 49725 - }, - { - "epoch": 1.2623429369209291, - "grad_norm": 0.31528523564338684, - "learning_rate": 1.1584380420527141e-05, - "loss": 0.0546, - "step": 49730 - }, - { - "epoch": 1.2624698565807844, - "grad_norm": 0.3525813817977905, - "learning_rate": 1.1583534289461438e-05, - "loss": 0.0507, - "step": 49735 - }, - { - "epoch": 1.2625967762406396, - "grad_norm": 0.3909033536911011, - "learning_rate": 1.1582688158395736e-05, - "loss": 0.0589, - "step": 49740 - }, - { - "epoch": 1.262723695900495, - "grad_norm": 0.5947679281234741, - "learning_rate": 1.1581842027330034e-05, - "loss": 0.0679, - "step": 49745 - }, - { - "epoch": 1.2628506155603503, - "grad_norm": 0.5934109091758728, - "learning_rate": 1.1580995896264333e-05, - "loss": 0.0518, - "step": 49750 - }, - { - "epoch": 1.2629775352202057, - "grad_norm": 0.3886428773403168, - "learning_rate": 1.158014976519863e-05, - "loss": 0.0478, - "step": 49755 - }, - { - "epoch": 1.263104454880061, - "grad_norm": 0.39156532287597656, - "learning_rate": 1.1579303634132928e-05, - "loss": 0.0452, - "step": 49760 - }, - { - "epoch": 1.2632313745399162, - "grad_norm": 2.1279942989349365, - "learning_rate": 1.1578457503067226e-05, - "loss": 0.0502, - "step": 49765 - }, - { - "epoch": 1.2633582941997714, - "grad_norm": 0.6011947989463806, - "learning_rate": 1.1577611372001525e-05, - "loss": 0.0434, - "step": 49770 - }, - { - "epoch": 1.2634852138596269, - "grad_norm": 0.5116747617721558, - "learning_rate": 1.1576765240935821e-05, - "loss": 0.0451, - "step": 49775 - }, - { - "epoch": 1.263612133519482, - "grad_norm": 0.4125896394252777, - "learning_rate": 1.157591910987012e-05, - "loss": 0.0434, - "step": 49780 - }, - { - "epoch": 1.2637390531793375, - "grad_norm": 0.4306887686252594, - "learning_rate": 1.1575072978804418e-05, - "loss": 0.0428, - "step": 49785 - }, - { - "epoch": 1.2638659728391928, - "grad_norm": 0.36260098218917847, - "learning_rate": 1.1574226847738717e-05, - "loss": 0.0496, - "step": 49790 - }, - { - "epoch": 1.263992892499048, - "grad_norm": 0.24723586440086365, - "learning_rate": 1.1573380716673013e-05, - "loss": 0.0384, - "step": 49795 - }, - { - "epoch": 1.2641198121589035, - "grad_norm": 0.31263360381126404, - "learning_rate": 1.1572534585607312e-05, - "loss": 0.0361, - "step": 49800 - }, - { - "epoch": 1.2642467318187587, - "grad_norm": 0.3854164481163025, - "learning_rate": 1.157168845454161e-05, - "loss": 0.0553, - "step": 49805 - }, - { - "epoch": 1.2643736514786141, - "grad_norm": 0.5211403965950012, - "learning_rate": 1.1570842323475909e-05, - "loss": 0.0723, - "step": 49810 - }, - { - "epoch": 1.2645005711384694, - "grad_norm": 0.5250747799873352, - "learning_rate": 1.1569996192410205e-05, - "loss": 0.0421, - "step": 49815 - }, - { - "epoch": 1.2646274907983246, - "grad_norm": 0.6391470432281494, - "learning_rate": 1.1569150061344504e-05, - "loss": 0.066, - "step": 49820 - }, - { - "epoch": 1.26475441045818, - "grad_norm": 0.3985007107257843, - "learning_rate": 1.1568303930278802e-05, - "loss": 0.0634, - "step": 49825 - }, - { - "epoch": 1.2648813301180353, - "grad_norm": 0.5053073763847351, - "learning_rate": 1.15674577992131e-05, - "loss": 0.0599, - "step": 49830 - }, - { - "epoch": 1.2650082497778907, - "grad_norm": 0.3633158802986145, - "learning_rate": 1.1566611668147395e-05, - "loss": 0.0417, - "step": 49835 - }, - { - "epoch": 1.265135169437746, - "grad_norm": 0.6008504629135132, - "learning_rate": 1.1565765537081694e-05, - "loss": 0.0332, - "step": 49840 - }, - { - "epoch": 1.2652620890976012, - "grad_norm": 0.27080345153808594, - "learning_rate": 1.1564919406015992e-05, - "loss": 0.0434, - "step": 49845 - }, - { - "epoch": 1.2653890087574564, - "grad_norm": 0.8451685309410095, - "learning_rate": 1.156407327495029e-05, - "loss": 0.0582, - "step": 49850 - }, - { - "epoch": 1.2655159284173119, - "grad_norm": 0.7209486961364746, - "learning_rate": 1.1563227143884587e-05, - "loss": 0.0454, - "step": 49855 - }, - { - "epoch": 1.265642848077167, - "grad_norm": 0.5303085446357727, - "learning_rate": 1.1562381012818886e-05, - "loss": 0.0466, - "step": 49860 - }, - { - "epoch": 1.2657697677370225, - "grad_norm": 0.5749386548995972, - "learning_rate": 1.1561534881753184e-05, - "loss": 0.0618, - "step": 49865 - }, - { - "epoch": 1.2658966873968778, - "grad_norm": 0.38653600215911865, - "learning_rate": 1.1560688750687483e-05, - "loss": 0.0697, - "step": 49870 - }, - { - "epoch": 1.266023607056733, - "grad_norm": 0.33260732889175415, - "learning_rate": 1.155984261962178e-05, - "loss": 0.0476, - "step": 49875 - }, - { - "epoch": 1.2661505267165885, - "grad_norm": 0.492939293384552, - "learning_rate": 1.1558996488556078e-05, - "loss": 0.0471, - "step": 49880 - }, - { - "epoch": 1.2662774463764437, - "grad_norm": 0.3538743555545807, - "learning_rate": 1.1558150357490376e-05, - "loss": 0.049, - "step": 49885 - }, - { - "epoch": 1.2664043660362991, - "grad_norm": 1.6365416049957275, - "learning_rate": 1.1557304226424675e-05, - "loss": 0.0487, - "step": 49890 - }, - { - "epoch": 1.2665312856961544, - "grad_norm": 0.41946470737457275, - "learning_rate": 1.1556458095358971e-05, - "loss": 0.0471, - "step": 49895 - }, - { - "epoch": 1.2666582053560096, - "grad_norm": 0.3229808509349823, - "learning_rate": 1.155561196429327e-05, - "loss": 0.0521, - "step": 49900 - }, - { - "epoch": 1.2667851250158648, - "grad_norm": 0.42085000872612, - "learning_rate": 1.1554765833227568e-05, - "loss": 0.0443, - "step": 49905 - }, - { - "epoch": 1.2669120446757203, - "grad_norm": 0.44441330432891846, - "learning_rate": 1.1553919702161866e-05, - "loss": 0.0476, - "step": 49910 - }, - { - "epoch": 1.2670389643355755, - "grad_norm": 0.5113344788551331, - "learning_rate": 1.1553073571096163e-05, - "loss": 0.0556, - "step": 49915 - }, - { - "epoch": 1.267165883995431, - "grad_norm": 0.8360052704811096, - "learning_rate": 1.1552227440030462e-05, - "loss": 0.042, - "step": 49920 - }, - { - "epoch": 1.2672928036552862, - "grad_norm": 2.3143985271453857, - "learning_rate": 1.155138130896476e-05, - "loss": 0.0417, - "step": 49925 - }, - { - "epoch": 1.2674197233151414, - "grad_norm": 0.46111348271369934, - "learning_rate": 1.1550535177899058e-05, - "loss": 0.0693, - "step": 49930 - }, - { - "epoch": 1.2675466429749969, - "grad_norm": 0.80971360206604, - "learning_rate": 1.1549689046833355e-05, - "loss": 0.0611, - "step": 49935 - }, - { - "epoch": 1.267673562634852, - "grad_norm": 0.45825934410095215, - "learning_rate": 1.1548842915767653e-05, - "loss": 0.053, - "step": 49940 - }, - { - "epoch": 1.2678004822947075, - "grad_norm": 0.5305872559547424, - "learning_rate": 1.1547996784701952e-05, - "loss": 0.0345, - "step": 49945 - }, - { - "epoch": 1.2679274019545628, - "grad_norm": 0.5255352258682251, - "learning_rate": 1.154715065363625e-05, - "loss": 0.0652, - "step": 49950 - }, - { - "epoch": 1.268054321614418, - "grad_norm": 0.5008720755577087, - "learning_rate": 1.1546304522570545e-05, - "loss": 0.0625, - "step": 49955 - }, - { - "epoch": 1.2681812412742735, - "grad_norm": 0.4904651343822479, - "learning_rate": 1.1545458391504845e-05, - "loss": 0.0563, - "step": 49960 - }, - { - "epoch": 1.2683081609341287, - "grad_norm": 0.24666230380535126, - "learning_rate": 1.1544612260439144e-05, - "loss": 0.0343, - "step": 49965 - }, - { - "epoch": 1.2684350805939841, - "grad_norm": 0.37007570266723633, - "learning_rate": 1.1543766129373442e-05, - "loss": 0.0542, - "step": 49970 - }, - { - "epoch": 1.2685620002538394, - "grad_norm": 1.4957159757614136, - "learning_rate": 1.1542919998307737e-05, - "loss": 0.0411, - "step": 49975 - }, - { - "epoch": 1.2686889199136946, - "grad_norm": 0.4166219234466553, - "learning_rate": 1.1542073867242036e-05, - "loss": 0.0454, - "step": 49980 - }, - { - "epoch": 1.2688158395735498, - "grad_norm": 0.3966434597969055, - "learning_rate": 1.1541227736176334e-05, - "loss": 0.035, - "step": 49985 - }, - { - "epoch": 1.2689427592334053, - "grad_norm": 0.5404184460639954, - "learning_rate": 1.1540381605110632e-05, - "loss": 0.0593, - "step": 49990 - }, - { - "epoch": 1.2690696788932605, - "grad_norm": 0.5797424912452698, - "learning_rate": 1.1539535474044929e-05, - "loss": 0.0527, - "step": 49995 - }, - { - "epoch": 1.269196598553116, - "grad_norm": 0.37031790614128113, - "learning_rate": 1.1538689342979228e-05, - "loss": 0.0506, - "step": 50000 - }, - { - "epoch": 1.2693235182129712, - "grad_norm": 0.5101723670959473, - "learning_rate": 1.1537843211913526e-05, - "loss": 0.035, - "step": 50005 - }, - { - "epoch": 1.2694504378728264, - "grad_norm": 0.371918648481369, - "learning_rate": 1.1536997080847824e-05, - "loss": 0.0403, - "step": 50010 - }, - { - "epoch": 1.2695773575326819, - "grad_norm": 0.3092801868915558, - "learning_rate": 1.1536150949782121e-05, - "loss": 0.0715, - "step": 50015 - }, - { - "epoch": 1.269704277192537, - "grad_norm": 0.4866507649421692, - "learning_rate": 1.153530481871642e-05, - "loss": 0.0376, - "step": 50020 - }, - { - "epoch": 1.2698311968523925, - "grad_norm": 0.5594435930252075, - "learning_rate": 1.1534458687650718e-05, - "loss": 0.0605, - "step": 50025 - }, - { - "epoch": 1.2699581165122478, - "grad_norm": 0.5024846792221069, - "learning_rate": 1.1533612556585016e-05, - "loss": 0.0537, - "step": 50030 - }, - { - "epoch": 1.270085036172103, - "grad_norm": 0.9016826748847961, - "learning_rate": 1.1532766425519313e-05, - "loss": 0.081, - "step": 50035 - }, - { - "epoch": 1.2702119558319585, - "grad_norm": 1.4193925857543945, - "learning_rate": 1.1531920294453611e-05, - "loss": 0.0629, - "step": 50040 - }, - { - "epoch": 1.2703388754918137, - "grad_norm": 0.9615411162376404, - "learning_rate": 1.153107416338791e-05, - "loss": 0.055, - "step": 50045 - }, - { - "epoch": 1.2704657951516691, - "grad_norm": 0.49710479378700256, - "learning_rate": 1.1530228032322208e-05, - "loss": 0.0463, - "step": 50050 - }, - { - "epoch": 1.2705927148115244, - "grad_norm": 0.41898977756500244, - "learning_rate": 1.1529381901256505e-05, - "loss": 0.0403, - "step": 50055 - }, - { - "epoch": 1.2707196344713796, - "grad_norm": 0.37807220220565796, - "learning_rate": 1.1528535770190803e-05, - "loss": 0.0631, - "step": 50060 - }, - { - "epoch": 1.2708465541312348, - "grad_norm": 0.46744272112846375, - "learning_rate": 1.1527689639125102e-05, - "loss": 0.0443, - "step": 50065 - }, - { - "epoch": 1.2709734737910903, - "grad_norm": 1.041251301765442, - "learning_rate": 1.15268435080594e-05, - "loss": 0.046, - "step": 50070 - }, - { - "epoch": 1.2711003934509455, - "grad_norm": 0.36855971813201904, - "learning_rate": 1.1525997376993698e-05, - "loss": 0.047, - "step": 50075 - }, - { - "epoch": 1.271227313110801, - "grad_norm": 0.4698679447174072, - "learning_rate": 1.1525151245927995e-05, - "loss": 0.0547, - "step": 50080 - }, - { - "epoch": 1.2713542327706562, - "grad_norm": 0.34856823086738586, - "learning_rate": 1.1524305114862294e-05, - "loss": 0.0263, - "step": 50085 - }, - { - "epoch": 1.2714811524305114, - "grad_norm": 0.5334038138389587, - "learning_rate": 1.1523458983796592e-05, - "loss": 0.0536, - "step": 50090 - }, - { - "epoch": 1.2716080720903669, - "grad_norm": 0.4744269847869873, - "learning_rate": 1.152261285273089e-05, - "loss": 0.0378, - "step": 50095 - }, - { - "epoch": 1.271734991750222, - "grad_norm": 0.5972955226898193, - "learning_rate": 1.1521766721665187e-05, - "loss": 0.0333, - "step": 50100 - }, - { - "epoch": 1.2718619114100775, - "grad_norm": 0.4030572772026062, - "learning_rate": 1.1520920590599485e-05, - "loss": 0.0566, - "step": 50105 - }, - { - "epoch": 1.2719888310699328, - "grad_norm": 0.3767230808734894, - "learning_rate": 1.1520074459533784e-05, - "loss": 0.044, - "step": 50110 - }, - { - "epoch": 1.272115750729788, - "grad_norm": 0.37558504939079285, - "learning_rate": 1.1519228328468082e-05, - "loss": 0.0458, - "step": 50115 - }, - { - "epoch": 1.2722426703896432, - "grad_norm": 0.30381688475608826, - "learning_rate": 1.1518382197402377e-05, - "loss": 0.0385, - "step": 50120 - }, - { - "epoch": 1.2723695900494987, - "grad_norm": 0.7072136998176575, - "learning_rate": 1.1517536066336676e-05, - "loss": 0.0517, - "step": 50125 - }, - { - "epoch": 1.272496509709354, - "grad_norm": 1.1328248977661133, - "learning_rate": 1.1516689935270974e-05, - "loss": 0.0636, - "step": 50130 - }, - { - "epoch": 1.2726234293692094, - "grad_norm": 0.498518168926239, - "learning_rate": 1.1515843804205274e-05, - "loss": 0.0535, - "step": 50135 - }, - { - "epoch": 1.2727503490290646, - "grad_norm": 0.3884521424770355, - "learning_rate": 1.151499767313957e-05, - "loss": 0.0533, - "step": 50140 - }, - { - "epoch": 1.2728772686889198, - "grad_norm": 0.7584547400474548, - "learning_rate": 1.1514151542073868e-05, - "loss": 0.0507, - "step": 50145 - }, - { - "epoch": 1.2730041883487753, - "grad_norm": 0.4044741094112396, - "learning_rate": 1.1513305411008166e-05, - "loss": 0.0508, - "step": 50150 - }, - { - "epoch": 1.2731311080086305, - "grad_norm": 0.2615651786327362, - "learning_rate": 1.1512459279942464e-05, - "loss": 0.0597, - "step": 50155 - }, - { - "epoch": 1.273258027668486, - "grad_norm": 0.3738313317298889, - "learning_rate": 1.1511613148876761e-05, - "loss": 0.0506, - "step": 50160 - }, - { - "epoch": 1.2733849473283412, - "grad_norm": 0.39385902881622314, - "learning_rate": 1.151076701781106e-05, - "loss": 0.0529, - "step": 50165 - }, - { - "epoch": 1.2735118669881964, - "grad_norm": 0.548884928226471, - "learning_rate": 1.1509920886745358e-05, - "loss": 0.0355, - "step": 50170 - }, - { - "epoch": 1.2736387866480519, - "grad_norm": 0.47614210844039917, - "learning_rate": 1.1509074755679656e-05, - "loss": 0.0348, - "step": 50175 - }, - { - "epoch": 1.273765706307907, - "grad_norm": 1.3573839664459229, - "learning_rate": 1.1508228624613953e-05, - "loss": 0.0647, - "step": 50180 - }, - { - "epoch": 1.2738926259677625, - "grad_norm": 0.7749003171920776, - "learning_rate": 1.1507382493548251e-05, - "loss": 0.0511, - "step": 50185 - }, - { - "epoch": 1.2740195456276178, - "grad_norm": 0.378508061170578, - "learning_rate": 1.150653636248255e-05, - "loss": 0.0416, - "step": 50190 - }, - { - "epoch": 1.274146465287473, - "grad_norm": 0.18507710099220276, - "learning_rate": 1.1505690231416848e-05, - "loss": 0.0448, - "step": 50195 - }, - { - "epoch": 1.2742733849473282, - "grad_norm": 0.5079507827758789, - "learning_rate": 1.1504844100351145e-05, - "loss": 0.074, - "step": 50200 - }, - { - "epoch": 1.2744003046071837, - "grad_norm": 0.378673255443573, - "learning_rate": 1.1503997969285443e-05, - "loss": 0.0317, - "step": 50205 - }, - { - "epoch": 1.274527224267039, - "grad_norm": 0.32712358236312866, - "learning_rate": 1.1503151838219742e-05, - "loss": 0.0578, - "step": 50210 - }, - { - "epoch": 1.2746541439268944, - "grad_norm": 0.5268957018852234, - "learning_rate": 1.150230570715404e-05, - "loss": 0.0502, - "step": 50215 - }, - { - "epoch": 1.2747810635867496, - "grad_norm": 0.5224324464797974, - "learning_rate": 1.1501459576088337e-05, - "loss": 0.0474, - "step": 50220 - }, - { - "epoch": 1.2749079832466048, - "grad_norm": 0.4316576421260834, - "learning_rate": 1.1500613445022635e-05, - "loss": 0.0403, - "step": 50225 - }, - { - "epoch": 1.2750349029064603, - "grad_norm": 0.5947959423065186, - "learning_rate": 1.1499767313956934e-05, - "loss": 0.0455, - "step": 50230 - }, - { - "epoch": 1.2751618225663155, - "grad_norm": 0.6389008164405823, - "learning_rate": 1.1498921182891232e-05, - "loss": 0.0621, - "step": 50235 - }, - { - "epoch": 1.275288742226171, - "grad_norm": 0.43400341272354126, - "learning_rate": 1.1498075051825529e-05, - "loss": 0.0411, - "step": 50240 - }, - { - "epoch": 1.2754156618860262, - "grad_norm": 0.3568451702594757, - "learning_rate": 1.1497228920759827e-05, - "loss": 0.0358, - "step": 50245 - }, - { - "epoch": 1.2755425815458814, - "grad_norm": 0.584367036819458, - "learning_rate": 1.1496382789694126e-05, - "loss": 0.0615, - "step": 50250 - }, - { - "epoch": 1.2756695012057366, - "grad_norm": 0.4277099668979645, - "learning_rate": 1.1495536658628424e-05, - "loss": 0.0524, - "step": 50255 - }, - { - "epoch": 1.275796420865592, - "grad_norm": 0.4825708568096161, - "learning_rate": 1.1494690527562719e-05, - "loss": 0.039, - "step": 50260 - }, - { - "epoch": 1.2759233405254473, - "grad_norm": 0.5037581324577332, - "learning_rate": 1.1493844396497017e-05, - "loss": 0.0552, - "step": 50265 - }, - { - "epoch": 1.2760502601853028, - "grad_norm": 0.531256914138794, - "learning_rate": 1.1492998265431316e-05, - "loss": 0.0543, - "step": 50270 - }, - { - "epoch": 1.276177179845158, - "grad_norm": 0.26732969284057617, - "learning_rate": 1.1492152134365614e-05, - "loss": 0.0513, - "step": 50275 - }, - { - "epoch": 1.2763040995050132, - "grad_norm": 0.39576300978660583, - "learning_rate": 1.1491306003299911e-05, - "loss": 0.0372, - "step": 50280 - }, - { - "epoch": 1.2764310191648687, - "grad_norm": 2.4663846492767334, - "learning_rate": 1.149045987223421e-05, - "loss": 0.0396, - "step": 50285 - }, - { - "epoch": 1.276557938824724, - "grad_norm": 0.4570237696170807, - "learning_rate": 1.1489613741168508e-05, - "loss": 0.0579, - "step": 50290 - }, - { - "epoch": 1.2766848584845794, - "grad_norm": 0.3177133798599243, - "learning_rate": 1.1488767610102806e-05, - "loss": 0.0486, - "step": 50295 - }, - { - "epoch": 1.2768117781444346, - "grad_norm": 0.43757301568984985, - "learning_rate": 1.1487921479037103e-05, - "loss": 0.0439, - "step": 50300 - }, - { - "epoch": 1.2769386978042898, - "grad_norm": 0.5071711540222168, - "learning_rate": 1.1487075347971401e-05, - "loss": 0.0455, - "step": 50305 - }, - { - "epoch": 1.2770656174641453, - "grad_norm": 0.32714423537254333, - "learning_rate": 1.14862292169057e-05, - "loss": 0.0505, - "step": 50310 - }, - { - "epoch": 1.2771925371240005, - "grad_norm": 0.596071720123291, - "learning_rate": 1.1485383085839998e-05, - "loss": 0.0527, - "step": 50315 - }, - { - "epoch": 1.277319456783856, - "grad_norm": 0.5077025294303894, - "learning_rate": 1.1484536954774295e-05, - "loss": 0.051, - "step": 50320 - }, - { - "epoch": 1.2774463764437112, - "grad_norm": 0.4828338623046875, - "learning_rate": 1.1483690823708593e-05, - "loss": 0.0564, - "step": 50325 - }, - { - "epoch": 1.2775732961035664, - "grad_norm": 0.3493671417236328, - "learning_rate": 1.1482844692642892e-05, - "loss": 0.0391, - "step": 50330 - }, - { - "epoch": 1.2777002157634216, - "grad_norm": 0.34307020902633667, - "learning_rate": 1.148199856157719e-05, - "loss": 0.0551, - "step": 50335 - }, - { - "epoch": 1.277827135423277, - "grad_norm": 0.36445629596710205, - "learning_rate": 1.1481152430511487e-05, - "loss": 0.0468, - "step": 50340 - }, - { - "epoch": 1.2779540550831323, - "grad_norm": 0.25721636414527893, - "learning_rate": 1.1480306299445785e-05, - "loss": 0.0374, - "step": 50345 - }, - { - "epoch": 1.2780809747429878, - "grad_norm": 0.7383787631988525, - "learning_rate": 1.1479460168380083e-05, - "loss": 0.0517, - "step": 50350 - }, - { - "epoch": 1.278207894402843, - "grad_norm": 0.38485515117645264, - "learning_rate": 1.1478614037314382e-05, - "loss": 0.0406, - "step": 50355 - }, - { - "epoch": 1.2783348140626982, - "grad_norm": 0.5339574813842773, - "learning_rate": 1.1477767906248679e-05, - "loss": 0.0328, - "step": 50360 - }, - { - "epoch": 1.2784617337225537, - "grad_norm": 0.5266634821891785, - "learning_rate": 1.1476921775182977e-05, - "loss": 0.0473, - "step": 50365 - }, - { - "epoch": 1.278588653382409, - "grad_norm": 0.6661748886108398, - "learning_rate": 1.1476075644117275e-05, - "loss": 0.042, - "step": 50370 - }, - { - "epoch": 1.2787155730422644, - "grad_norm": 0.44630172848701477, - "learning_rate": 1.1475229513051574e-05, - "loss": 0.0468, - "step": 50375 - }, - { - "epoch": 1.2788424927021196, - "grad_norm": 0.5831486582756042, - "learning_rate": 1.147438338198587e-05, - "loss": 0.0521, - "step": 50380 - }, - { - "epoch": 1.2789694123619748, - "grad_norm": 0.46802034974098206, - "learning_rate": 1.1473537250920169e-05, - "loss": 0.0519, - "step": 50385 - }, - { - "epoch": 1.2790963320218303, - "grad_norm": 0.3121873140335083, - "learning_rate": 1.1472691119854467e-05, - "loss": 0.0659, - "step": 50390 - }, - { - "epoch": 1.2792232516816855, - "grad_norm": 0.47905561327934265, - "learning_rate": 1.1471844988788766e-05, - "loss": 0.061, - "step": 50395 - }, - { - "epoch": 1.279350171341541, - "grad_norm": 0.5119578838348389, - "learning_rate": 1.147099885772306e-05, - "loss": 0.0399, - "step": 50400 - }, - { - "epoch": 1.2794770910013962, - "grad_norm": 1.12296462059021, - "learning_rate": 1.1470152726657359e-05, - "loss": 0.0507, - "step": 50405 - }, - { - "epoch": 1.2796040106612514, - "grad_norm": 0.5859237909317017, - "learning_rate": 1.1469306595591658e-05, - "loss": 0.0489, - "step": 50410 - }, - { - "epoch": 1.2797309303211066, - "grad_norm": 0.2912003695964813, - "learning_rate": 1.1468460464525956e-05, - "loss": 0.0362, - "step": 50415 - }, - { - "epoch": 1.279857849980962, - "grad_norm": 0.7639374136924744, - "learning_rate": 1.1467614333460253e-05, - "loss": 0.0426, - "step": 50420 - }, - { - "epoch": 1.2799847696408173, - "grad_norm": 0.4668353199958801, - "learning_rate": 1.1466768202394551e-05, - "loss": 0.0638, - "step": 50425 - }, - { - "epoch": 1.2801116893006728, - "grad_norm": 0.49789077043533325, - "learning_rate": 1.146592207132885e-05, - "loss": 0.0456, - "step": 50430 - }, - { - "epoch": 1.280238608960528, - "grad_norm": 0.5145264863967896, - "learning_rate": 1.1465075940263148e-05, - "loss": 0.0782, - "step": 50435 - }, - { - "epoch": 1.2803655286203832, - "grad_norm": 0.5782946348190308, - "learning_rate": 1.1464229809197445e-05, - "loss": 0.0382, - "step": 50440 - }, - { - "epoch": 1.2804924482802387, - "grad_norm": 0.607715904712677, - "learning_rate": 1.1463383678131743e-05, - "loss": 0.0526, - "step": 50445 - }, - { - "epoch": 1.280619367940094, - "grad_norm": 0.3658584952354431, - "learning_rate": 1.1462537547066041e-05, - "loss": 0.0501, - "step": 50450 - }, - { - "epoch": 1.2807462875999494, - "grad_norm": 0.5784962177276611, - "learning_rate": 1.146169141600034e-05, - "loss": 0.0635, - "step": 50455 - }, - { - "epoch": 1.2808732072598046, - "grad_norm": 0.316525399684906, - "learning_rate": 1.1460845284934636e-05, - "loss": 0.0385, - "step": 50460 - }, - { - "epoch": 1.2810001269196598, - "grad_norm": 0.44780638813972473, - "learning_rate": 1.1459999153868935e-05, - "loss": 0.0296, - "step": 50465 - }, - { - "epoch": 1.281127046579515, - "grad_norm": 0.3856445550918579, - "learning_rate": 1.1459153022803233e-05, - "loss": 0.0527, - "step": 50470 - }, - { - "epoch": 1.2812539662393705, - "grad_norm": 0.3427886962890625, - "learning_rate": 1.1458306891737532e-05, - "loss": 0.0513, - "step": 50475 - }, - { - "epoch": 1.2813808858992257, - "grad_norm": 0.29328668117523193, - "learning_rate": 1.1457460760671828e-05, - "loss": 0.0495, - "step": 50480 - }, - { - "epoch": 1.2815078055590812, - "grad_norm": 0.47195789217948914, - "learning_rate": 1.1456614629606127e-05, - "loss": 0.0404, - "step": 50485 - }, - { - "epoch": 1.2816347252189364, - "grad_norm": 0.36162739992141724, - "learning_rate": 1.1455768498540425e-05, - "loss": 0.0362, - "step": 50490 - }, - { - "epoch": 1.2817616448787916, - "grad_norm": 0.6077198386192322, - "learning_rate": 1.1454922367474724e-05, - "loss": 0.0496, - "step": 50495 - }, - { - "epoch": 1.281888564538647, - "grad_norm": 0.37206920981407166, - "learning_rate": 1.145407623640902e-05, - "loss": 0.0511, - "step": 50500 - }, - { - "epoch": 1.2820154841985023, - "grad_norm": 0.6220295429229736, - "learning_rate": 1.1453230105343319e-05, - "loss": 0.0544, - "step": 50505 - }, - { - "epoch": 1.2821424038583578, - "grad_norm": 0.45226532220840454, - "learning_rate": 1.1452383974277617e-05, - "loss": 0.0425, - "step": 50510 - }, - { - "epoch": 1.282269323518213, - "grad_norm": 1.5752347707748413, - "learning_rate": 1.1451537843211915e-05, - "loss": 0.0505, - "step": 50515 - }, - { - "epoch": 1.2823962431780682, - "grad_norm": 0.2906428277492523, - "learning_rate": 1.145069171214621e-05, - "loss": 0.0439, - "step": 50520 - }, - { - "epoch": 1.2825231628379237, - "grad_norm": 0.5128083229064941, - "learning_rate": 1.144984558108051e-05, - "loss": 0.0437, - "step": 50525 - }, - { - "epoch": 1.282650082497779, - "grad_norm": 0.5466879606246948, - "learning_rate": 1.1448999450014809e-05, - "loss": 0.0479, - "step": 50530 - }, - { - "epoch": 1.2827770021576343, - "grad_norm": 0.31844377517700195, - "learning_rate": 1.1448153318949107e-05, - "loss": 0.0291, - "step": 50535 - }, - { - "epoch": 1.2829039218174896, - "grad_norm": 0.5425229072570801, - "learning_rate": 1.1447307187883402e-05, - "loss": 0.047, - "step": 50540 - }, - { - "epoch": 1.2830308414773448, - "grad_norm": 0.323993980884552, - "learning_rate": 1.14464610568177e-05, - "loss": 0.0525, - "step": 50545 - }, - { - "epoch": 1.2831577611372, - "grad_norm": 1.6013209819793701, - "learning_rate": 1.1445614925752e-05, - "loss": 0.0587, - "step": 50550 - }, - { - "epoch": 1.2832846807970555, - "grad_norm": 0.7060006260871887, - "learning_rate": 1.1444768794686298e-05, - "loss": 0.0616, - "step": 50555 - }, - { - "epoch": 1.2834116004569107, - "grad_norm": 0.46610137820243835, - "learning_rate": 1.1443922663620594e-05, - "loss": 0.0538, - "step": 50560 - }, - { - "epoch": 1.2835385201167662, - "grad_norm": 0.6739826202392578, - "learning_rate": 1.1443076532554893e-05, - "loss": 0.062, - "step": 50565 - }, - { - "epoch": 1.2836654397766214, - "grad_norm": 0.3349662125110626, - "learning_rate": 1.1442230401489191e-05, - "loss": 0.0339, - "step": 50570 - }, - { - "epoch": 1.2837923594364766, - "grad_norm": 0.4940806031227112, - "learning_rate": 1.144138427042349e-05, - "loss": 0.0471, - "step": 50575 - }, - { - "epoch": 1.283919279096332, - "grad_norm": 0.43451422452926636, - "learning_rate": 1.1440538139357788e-05, - "loss": 0.0516, - "step": 50580 - }, - { - "epoch": 1.2840461987561873, - "grad_norm": 1.7084145545959473, - "learning_rate": 1.1439692008292085e-05, - "loss": 0.0716, - "step": 50585 - }, - { - "epoch": 1.2841731184160428, - "grad_norm": 0.5689837336540222, - "learning_rate": 1.1438845877226383e-05, - "loss": 0.055, - "step": 50590 - }, - { - "epoch": 1.284300038075898, - "grad_norm": 0.5868340134620667, - "learning_rate": 1.1437999746160681e-05, - "loss": 0.0464, - "step": 50595 - }, - { - "epoch": 1.2844269577357532, - "grad_norm": 0.41964152455329895, - "learning_rate": 1.143715361509498e-05, - "loss": 0.04, - "step": 50600 - }, - { - "epoch": 1.2845538773956084, - "grad_norm": 0.4572502076625824, - "learning_rate": 1.1436307484029277e-05, - "loss": 0.0458, - "step": 50605 - }, - { - "epoch": 1.284680797055464, - "grad_norm": 0.6685749292373657, - "learning_rate": 1.1435461352963575e-05, - "loss": 0.0278, - "step": 50610 - }, - { - "epoch": 1.2848077167153191, - "grad_norm": 0.4102068841457367, - "learning_rate": 1.1434615221897873e-05, - "loss": 0.0541, - "step": 50615 - }, - { - "epoch": 1.2849346363751746, - "grad_norm": 0.5511897802352905, - "learning_rate": 1.1433769090832172e-05, - "loss": 0.0607, - "step": 50620 - }, - { - "epoch": 1.2850615560350298, - "grad_norm": 0.5077516436576843, - "learning_rate": 1.1432922959766468e-05, - "loss": 0.0614, - "step": 50625 - }, - { - "epoch": 1.285188475694885, - "grad_norm": 0.6151395440101624, - "learning_rate": 1.1432076828700767e-05, - "loss": 0.0648, - "step": 50630 - }, - { - "epoch": 1.2853153953547405, - "grad_norm": 0.44962677359580994, - "learning_rate": 1.1431230697635065e-05, - "loss": 0.0433, - "step": 50635 - }, - { - "epoch": 1.2854423150145957, - "grad_norm": 0.7516661882400513, - "learning_rate": 1.1430384566569364e-05, - "loss": 0.0377, - "step": 50640 - }, - { - "epoch": 1.2855692346744512, - "grad_norm": 0.7097063064575195, - "learning_rate": 1.142953843550366e-05, - "loss": 0.0665, - "step": 50645 - }, - { - "epoch": 1.2856961543343064, - "grad_norm": 1.1969683170318604, - "learning_rate": 1.1428692304437959e-05, - "loss": 0.0479, - "step": 50650 - }, - { - "epoch": 1.2858230739941616, - "grad_norm": 0.517066240310669, - "learning_rate": 1.1427846173372257e-05, - "loss": 0.046, - "step": 50655 - }, - { - "epoch": 1.285949993654017, - "grad_norm": 0.6123455762863159, - "learning_rate": 1.1427000042306556e-05, - "loss": 0.0497, - "step": 50660 - }, - { - "epoch": 1.2860769133138723, - "grad_norm": 0.2915724217891693, - "learning_rate": 1.1426153911240852e-05, - "loss": 0.0448, - "step": 50665 - }, - { - "epoch": 1.2862038329737278, - "grad_norm": 0.4866197407245636, - "learning_rate": 1.142530778017515e-05, - "loss": 0.0443, - "step": 50670 - }, - { - "epoch": 1.286330752633583, - "grad_norm": 0.6526208519935608, - "learning_rate": 1.1424461649109449e-05, - "loss": 0.0594, - "step": 50675 - }, - { - "epoch": 1.2864576722934382, - "grad_norm": 0.7466217279434204, - "learning_rate": 1.1423615518043748e-05, - "loss": 0.0374, - "step": 50680 - }, - { - "epoch": 1.2865845919532934, - "grad_norm": 0.41855886578559875, - "learning_rate": 1.1422769386978043e-05, - "loss": 0.0513, - "step": 50685 - }, - { - "epoch": 1.286711511613149, - "grad_norm": 0.37665075063705444, - "learning_rate": 1.1421923255912341e-05, - "loss": 0.0367, - "step": 50690 - }, - { - "epoch": 1.2868384312730041, - "grad_norm": 0.8075544238090515, - "learning_rate": 1.142107712484664e-05, - "loss": 0.0474, - "step": 50695 - }, - { - "epoch": 1.2869653509328596, - "grad_norm": 0.6573134660720825, - "learning_rate": 1.142023099378094e-05, - "loss": 0.0497, - "step": 50700 - }, - { - "epoch": 1.2870922705927148, - "grad_norm": 0.7978018522262573, - "learning_rate": 1.1419384862715234e-05, - "loss": 0.0419, - "step": 50705 - }, - { - "epoch": 1.28721919025257, - "grad_norm": 0.4401187598705292, - "learning_rate": 1.1418538731649533e-05, - "loss": 0.0541, - "step": 50710 - }, - { - "epoch": 1.2873461099124255, - "grad_norm": 0.3871384561061859, - "learning_rate": 1.1417692600583831e-05, - "loss": 0.0416, - "step": 50715 - }, - { - "epoch": 1.2874730295722807, - "grad_norm": 0.5403187870979309, - "learning_rate": 1.141684646951813e-05, - "loss": 0.0481, - "step": 50720 - }, - { - "epoch": 1.2875999492321362, - "grad_norm": 0.7755883932113647, - "learning_rate": 1.1416000338452426e-05, - "loss": 0.0491, - "step": 50725 - }, - { - "epoch": 1.2877268688919914, - "grad_norm": 0.5331670641899109, - "learning_rate": 1.1415154207386725e-05, - "loss": 0.054, - "step": 50730 - }, - { - "epoch": 1.2878537885518466, - "grad_norm": 0.3761843144893646, - "learning_rate": 1.1414308076321023e-05, - "loss": 0.0502, - "step": 50735 - }, - { - "epoch": 1.287980708211702, - "grad_norm": 0.49102821946144104, - "learning_rate": 1.1413461945255322e-05, - "loss": 0.0541, - "step": 50740 - }, - { - "epoch": 1.2881076278715573, - "grad_norm": 2.454448699951172, - "learning_rate": 1.1412615814189618e-05, - "loss": 0.0628, - "step": 50745 - }, - { - "epoch": 1.2882345475314125, - "grad_norm": 0.48739564418792725, - "learning_rate": 1.1411769683123917e-05, - "loss": 0.0511, - "step": 50750 - }, - { - "epoch": 1.288361467191268, - "grad_norm": 0.9136450886726379, - "learning_rate": 1.1410923552058215e-05, - "loss": 0.0448, - "step": 50755 - }, - { - "epoch": 1.2884883868511232, - "grad_norm": 0.5396285653114319, - "learning_rate": 1.1410077420992513e-05, - "loss": 0.0503, - "step": 50760 - }, - { - "epoch": 1.2886153065109784, - "grad_norm": 0.6410667300224304, - "learning_rate": 1.140923128992681e-05, - "loss": 0.0463, - "step": 50765 - }, - { - "epoch": 1.288742226170834, - "grad_norm": 0.4546367824077606, - "learning_rate": 1.1408385158861109e-05, - "loss": 0.0487, - "step": 50770 - }, - { - "epoch": 1.2888691458306891, - "grad_norm": 0.36298808455467224, - "learning_rate": 1.1407539027795407e-05, - "loss": 0.05, - "step": 50775 - }, - { - "epoch": 1.2889960654905446, - "grad_norm": 0.4457317888736725, - "learning_rate": 1.1406692896729705e-05, - "loss": 0.0452, - "step": 50780 - }, - { - "epoch": 1.2891229851503998, - "grad_norm": 0.47851842641830444, - "learning_rate": 1.1405846765664002e-05, - "loss": 0.0649, - "step": 50785 - }, - { - "epoch": 1.289249904810255, - "grad_norm": 0.3506905436515808, - "learning_rate": 1.14050006345983e-05, - "loss": 0.0317, - "step": 50790 - }, - { - "epoch": 1.2893768244701105, - "grad_norm": 0.4424348473548889, - "learning_rate": 1.1404154503532599e-05, - "loss": 0.0374, - "step": 50795 - }, - { - "epoch": 1.2895037441299657, - "grad_norm": 0.4011210501194, - "learning_rate": 1.1403308372466897e-05, - "loss": 0.0586, - "step": 50800 - }, - { - "epoch": 1.2896306637898212, - "grad_norm": 0.5491868257522583, - "learning_rate": 1.1402462241401194e-05, - "loss": 0.0469, - "step": 50805 - }, - { - "epoch": 1.2897575834496764, - "grad_norm": 0.6216325759887695, - "learning_rate": 1.1401616110335492e-05, - "loss": 0.0645, - "step": 50810 - }, - { - "epoch": 1.2898845031095316, - "grad_norm": 0.7771264314651489, - "learning_rate": 1.140076997926979e-05, - "loss": 0.0435, - "step": 50815 - }, - { - "epoch": 1.2900114227693869, - "grad_norm": 0.4540340006351471, - "learning_rate": 1.139992384820409e-05, - "loss": 0.0448, - "step": 50820 - }, - { - "epoch": 1.2901383424292423, - "grad_norm": 0.39654800295829773, - "learning_rate": 1.1399077717138384e-05, - "loss": 0.0519, - "step": 50825 - }, - { - "epoch": 1.2902652620890975, - "grad_norm": 0.30273962020874023, - "learning_rate": 1.1398231586072683e-05, - "loss": 0.0524, - "step": 50830 - }, - { - "epoch": 1.290392181748953, - "grad_norm": 0.5760459899902344, - "learning_rate": 1.1397385455006981e-05, - "loss": 0.0677, - "step": 50835 - }, - { - "epoch": 1.2905191014088082, - "grad_norm": 0.3429059684276581, - "learning_rate": 1.139653932394128e-05, - "loss": 0.0383, - "step": 50840 - }, - { - "epoch": 1.2906460210686634, - "grad_norm": 0.48076143860816956, - "learning_rate": 1.1395693192875576e-05, - "loss": 0.0489, - "step": 50845 - }, - { - "epoch": 1.290772940728519, - "grad_norm": 0.3584219217300415, - "learning_rate": 1.1394847061809875e-05, - "loss": 0.0416, - "step": 50850 - }, - { - "epoch": 1.2908998603883741, - "grad_norm": 0.4252598285675049, - "learning_rate": 1.1394000930744173e-05, - "loss": 0.0407, - "step": 50855 - }, - { - "epoch": 1.2910267800482296, - "grad_norm": 0.9653546214103699, - "learning_rate": 1.1393154799678471e-05, - "loss": 0.0521, - "step": 50860 - }, - { - "epoch": 1.2911536997080848, - "grad_norm": 0.4255286455154419, - "learning_rate": 1.1392308668612768e-05, - "loss": 0.0437, - "step": 50865 - }, - { - "epoch": 1.29128061936794, - "grad_norm": 0.683806836605072, - "learning_rate": 1.1391462537547066e-05, - "loss": 0.0573, - "step": 50870 - }, - { - "epoch": 1.2914075390277955, - "grad_norm": 0.32018712162971497, - "learning_rate": 1.1390616406481365e-05, - "loss": 0.0296, - "step": 50875 - }, - { - "epoch": 1.2915344586876507, - "grad_norm": 0.35350486636161804, - "learning_rate": 1.1389770275415663e-05, - "loss": 0.0454, - "step": 50880 - }, - { - "epoch": 1.2916613783475062, - "grad_norm": 0.46060648560523987, - "learning_rate": 1.138892414434996e-05, - "loss": 0.0402, - "step": 50885 - }, - { - "epoch": 1.2917882980073614, - "grad_norm": 0.44933637976646423, - "learning_rate": 1.1388078013284258e-05, - "loss": 0.0502, - "step": 50890 - }, - { - "epoch": 1.2919152176672166, - "grad_norm": 0.3862329423427582, - "learning_rate": 1.1387231882218557e-05, - "loss": 0.0507, - "step": 50895 - }, - { - "epoch": 1.2920421373270718, - "grad_norm": 0.47465386986732483, - "learning_rate": 1.1386385751152855e-05, - "loss": 0.053, - "step": 50900 - }, - { - "epoch": 1.2921690569869273, - "grad_norm": 0.45583444833755493, - "learning_rate": 1.1385539620087152e-05, - "loss": 0.0608, - "step": 50905 - }, - { - "epoch": 1.2922959766467825, - "grad_norm": 0.5318730473518372, - "learning_rate": 1.138469348902145e-05, - "loss": 0.0483, - "step": 50910 - }, - { - "epoch": 1.292422896306638, - "grad_norm": 1.1171612739562988, - "learning_rate": 1.1383847357955749e-05, - "loss": 0.0447, - "step": 50915 - }, - { - "epoch": 1.2925498159664932, - "grad_norm": 0.6103150844573975, - "learning_rate": 1.1383001226890047e-05, - "loss": 0.042, - "step": 50920 - }, - { - "epoch": 1.2926767356263484, - "grad_norm": 0.4347284734249115, - "learning_rate": 1.1382155095824344e-05, - "loss": 0.0366, - "step": 50925 - }, - { - "epoch": 1.292803655286204, - "grad_norm": 0.34618067741394043, - "learning_rate": 1.1381308964758642e-05, - "loss": 0.0504, - "step": 50930 - }, - { - "epoch": 1.2929305749460591, - "grad_norm": 0.5073671936988831, - "learning_rate": 1.138046283369294e-05, - "loss": 0.0475, - "step": 50935 - }, - { - "epoch": 1.2930574946059146, - "grad_norm": 1.3737465143203735, - "learning_rate": 1.1379616702627239e-05, - "loss": 0.048, - "step": 50940 - }, - { - "epoch": 1.2931844142657698, - "grad_norm": 0.4001829922199249, - "learning_rate": 1.1378770571561534e-05, - "loss": 0.0698, - "step": 50945 - }, - { - "epoch": 1.293311333925625, - "grad_norm": 0.6365550756454468, - "learning_rate": 1.1377924440495834e-05, - "loss": 0.0589, - "step": 50950 - }, - { - "epoch": 1.2934382535854803, - "grad_norm": 0.7187914848327637, - "learning_rate": 1.1377078309430133e-05, - "loss": 0.0478, - "step": 50955 - }, - { - "epoch": 1.2935651732453357, - "grad_norm": 0.5168372988700867, - "learning_rate": 1.1376232178364431e-05, - "loss": 0.0621, - "step": 50960 - }, - { - "epoch": 1.293692092905191, - "grad_norm": 0.5544586181640625, - "learning_rate": 1.1375386047298726e-05, - "loss": 0.0491, - "step": 50965 - }, - { - "epoch": 1.2938190125650464, - "grad_norm": 0.4411337971687317, - "learning_rate": 1.1374539916233024e-05, - "loss": 0.0582, - "step": 50970 - }, - { - "epoch": 1.2939459322249016, - "grad_norm": 0.4156343340873718, - "learning_rate": 1.1373693785167323e-05, - "loss": 0.0475, - "step": 50975 - }, - { - "epoch": 1.2940728518847568, - "grad_norm": 0.6199815273284912, - "learning_rate": 1.1372847654101621e-05, - "loss": 0.048, - "step": 50980 - }, - { - "epoch": 1.2941997715446123, - "grad_norm": 0.40593406558036804, - "learning_rate": 1.1372001523035918e-05, - "loss": 0.0665, - "step": 50985 - }, - { - "epoch": 1.2943266912044675, - "grad_norm": 0.2601001262664795, - "learning_rate": 1.1371155391970216e-05, - "loss": 0.0299, - "step": 50990 - }, - { - "epoch": 1.294453610864323, - "grad_norm": 0.5721041560173035, - "learning_rate": 1.1370309260904515e-05, - "loss": 0.0572, - "step": 50995 - }, - { - "epoch": 1.2945805305241782, - "grad_norm": 0.26474153995513916, - "learning_rate": 1.1369463129838813e-05, - "loss": 0.0406, - "step": 51000 - }, - { - "epoch": 1.2947074501840334, - "grad_norm": 0.49803003668785095, - "learning_rate": 1.136861699877311e-05, - "loss": 0.0421, - "step": 51005 - }, - { - "epoch": 1.2948343698438889, - "grad_norm": 0.4036301076412201, - "learning_rate": 1.1367770867707408e-05, - "loss": 0.0493, - "step": 51010 - }, - { - "epoch": 1.2949612895037441, - "grad_norm": 0.7239077091217041, - "learning_rate": 1.1366924736641707e-05, - "loss": 0.0649, - "step": 51015 - }, - { - "epoch": 1.2950882091635996, - "grad_norm": 0.43970850110054016, - "learning_rate": 1.1366078605576005e-05, - "loss": 0.0525, - "step": 51020 - }, - { - "epoch": 1.2952151288234548, - "grad_norm": 0.5326611399650574, - "learning_rate": 1.1365232474510302e-05, - "loss": 0.0425, - "step": 51025 - }, - { - "epoch": 1.29534204848331, - "grad_norm": 0.3287754952907562, - "learning_rate": 1.13643863434446e-05, - "loss": 0.0506, - "step": 51030 - }, - { - "epoch": 1.2954689681431653, - "grad_norm": 0.4148683249950409, - "learning_rate": 1.1363540212378898e-05, - "loss": 0.0504, - "step": 51035 - }, - { - "epoch": 1.2955958878030207, - "grad_norm": 0.4192848801612854, - "learning_rate": 1.1362694081313197e-05, - "loss": 0.0306, - "step": 51040 - }, - { - "epoch": 1.295722807462876, - "grad_norm": 0.4921371638774872, - "learning_rate": 1.1361847950247494e-05, - "loss": 0.0319, - "step": 51045 - }, - { - "epoch": 1.2958497271227314, - "grad_norm": 0.45872774720191956, - "learning_rate": 1.1361001819181792e-05, - "loss": 0.0663, - "step": 51050 - }, - { - "epoch": 1.2959766467825866, - "grad_norm": 1.036953091621399, - "learning_rate": 1.136015568811609e-05, - "loss": 0.0646, - "step": 51055 - }, - { - "epoch": 1.2961035664424418, - "grad_norm": 0.3643571138381958, - "learning_rate": 1.1359309557050389e-05, - "loss": 0.0479, - "step": 51060 - }, - { - "epoch": 1.2962304861022973, - "grad_norm": 0.40488722920417786, - "learning_rate": 1.1358463425984685e-05, - "loss": 0.0588, - "step": 51065 - }, - { - "epoch": 1.2963574057621525, - "grad_norm": 0.2843170762062073, - "learning_rate": 1.1357617294918984e-05, - "loss": 0.0459, - "step": 51070 - }, - { - "epoch": 1.296484325422008, - "grad_norm": 0.3754228353500366, - "learning_rate": 1.1356771163853282e-05, - "loss": 0.0534, - "step": 51075 - }, - { - "epoch": 1.2966112450818632, - "grad_norm": 0.4366191029548645, - "learning_rate": 1.135592503278758e-05, - "loss": 0.0492, - "step": 51080 - }, - { - "epoch": 1.2967381647417184, - "grad_norm": 0.45622971653938293, - "learning_rate": 1.1355078901721876e-05, - "loss": 0.0415, - "step": 51085 - }, - { - "epoch": 1.2968650844015739, - "grad_norm": 0.48318809270858765, - "learning_rate": 1.1354232770656176e-05, - "loss": 0.0388, - "step": 51090 - }, - { - "epoch": 1.2969920040614291, - "grad_norm": 0.6007116436958313, - "learning_rate": 1.1353386639590474e-05, - "loss": 0.0569, - "step": 51095 - }, - { - "epoch": 1.2971189237212843, - "grad_norm": 0.765918493270874, - "learning_rate": 1.1352540508524773e-05, - "loss": 0.0428, - "step": 51100 - }, - { - "epoch": 1.2972458433811398, - "grad_norm": 0.487775593996048, - "learning_rate": 1.1351694377459071e-05, - "loss": 0.0658, - "step": 51105 - }, - { - "epoch": 1.297372763040995, - "grad_norm": 0.3310084640979767, - "learning_rate": 1.1350848246393366e-05, - "loss": 0.0297, - "step": 51110 - }, - { - "epoch": 1.2974996827008503, - "grad_norm": 0.5260680913925171, - "learning_rate": 1.1350002115327664e-05, - "loss": 0.0467, - "step": 51115 - }, - { - "epoch": 1.2976266023607057, - "grad_norm": 0.4042007327079773, - "learning_rate": 1.1349155984261963e-05, - "loss": 0.0394, - "step": 51120 - }, - { - "epoch": 1.297753522020561, - "grad_norm": 0.9525343775749207, - "learning_rate": 1.1348309853196263e-05, - "loss": 0.0596, - "step": 51125 - }, - { - "epoch": 1.2978804416804164, - "grad_norm": 1.1846073865890503, - "learning_rate": 1.1347463722130558e-05, - "loss": 0.0541, - "step": 51130 - }, - { - "epoch": 1.2980073613402716, - "grad_norm": 0.4314163029193878, - "learning_rate": 1.1346617591064856e-05, - "loss": 0.0311, - "step": 51135 - }, - { - "epoch": 1.2981342810001268, - "grad_norm": 1.5906341075897217, - "learning_rate": 1.1345771459999155e-05, - "loss": 0.056, - "step": 51140 - }, - { - "epoch": 1.2982612006599823, - "grad_norm": 0.5512549877166748, - "learning_rate": 1.1344925328933453e-05, - "loss": 0.0406, - "step": 51145 - }, - { - "epoch": 1.2983881203198375, - "grad_norm": 0.6115833520889282, - "learning_rate": 1.134407919786775e-05, - "loss": 0.0722, - "step": 51150 - }, - { - "epoch": 1.298515039979693, - "grad_norm": 0.340628445148468, - "learning_rate": 1.1343233066802048e-05, - "loss": 0.0556, - "step": 51155 - }, - { - "epoch": 1.2986419596395482, - "grad_norm": 0.6631166338920593, - "learning_rate": 1.1342386935736347e-05, - "loss": 0.0382, - "step": 51160 - }, - { - "epoch": 1.2987688792994034, - "grad_norm": 0.5282384157180786, - "learning_rate": 1.1341540804670645e-05, - "loss": 0.0381, - "step": 51165 - }, - { - "epoch": 1.2988957989592587, - "grad_norm": 0.2898809313774109, - "learning_rate": 1.1340694673604942e-05, - "loss": 0.0694, - "step": 51170 - }, - { - "epoch": 1.2990227186191141, - "grad_norm": 0.3862461745738983, - "learning_rate": 1.133984854253924e-05, - "loss": 0.0282, - "step": 51175 - }, - { - "epoch": 1.2991496382789693, - "grad_norm": 0.3570457398891449, - "learning_rate": 1.1339002411473539e-05, - "loss": 0.0373, - "step": 51180 - }, - { - "epoch": 1.2992765579388248, - "grad_norm": 0.5394024848937988, - "learning_rate": 1.1338156280407837e-05, - "loss": 0.045, - "step": 51185 - }, - { - "epoch": 1.29940347759868, - "grad_norm": 0.911893904209137, - "learning_rate": 1.1337310149342134e-05, - "loss": 0.0498, - "step": 51190 - }, - { - "epoch": 1.2995303972585353, - "grad_norm": 0.3145937919616699, - "learning_rate": 1.1336464018276432e-05, - "loss": 0.033, - "step": 51195 - }, - { - "epoch": 1.2996573169183907, - "grad_norm": 0.16940291225910187, - "learning_rate": 1.133561788721073e-05, - "loss": 0.0354, - "step": 51200 - }, - { - "epoch": 1.299784236578246, - "grad_norm": 0.44827914237976074, - "learning_rate": 1.1334771756145029e-05, - "loss": 0.0712, - "step": 51205 - }, - { - "epoch": 1.2999111562381014, - "grad_norm": 0.5447380542755127, - "learning_rate": 1.1333925625079326e-05, - "loss": 0.0594, - "step": 51210 - }, - { - "epoch": 1.3000380758979566, - "grad_norm": 0.3721446692943573, - "learning_rate": 1.1333079494013624e-05, - "loss": 0.0439, - "step": 51215 - }, - { - "epoch": 1.3001649955578118, - "grad_norm": 0.5048140287399292, - "learning_rate": 1.1332233362947922e-05, - "loss": 0.0345, - "step": 51220 - }, - { - "epoch": 1.3002919152176673, - "grad_norm": 0.5490175485610962, - "learning_rate": 1.133138723188222e-05, - "loss": 0.0617, - "step": 51225 - }, - { - "epoch": 1.3004188348775225, - "grad_norm": 0.37621670961380005, - "learning_rate": 1.1330541100816518e-05, - "loss": 0.0631, - "step": 51230 - }, - { - "epoch": 1.300545754537378, - "grad_norm": 0.5170714259147644, - "learning_rate": 1.1329694969750816e-05, - "loss": 0.0634, - "step": 51235 - }, - { - "epoch": 1.3006726741972332, - "grad_norm": 0.5272743701934814, - "learning_rate": 1.1328848838685114e-05, - "loss": 0.0581, - "step": 51240 - }, - { - "epoch": 1.3007995938570884, - "grad_norm": 0.42634570598602295, - "learning_rate": 1.1328002707619413e-05, - "loss": 0.0538, - "step": 51245 - }, - { - "epoch": 1.3009265135169437, - "grad_norm": 0.35968804359436035, - "learning_rate": 1.1327156576553708e-05, - "loss": 0.0499, - "step": 51250 - }, - { - "epoch": 1.3010534331767991, - "grad_norm": 0.5058416128158569, - "learning_rate": 1.1326310445488006e-05, - "loss": 0.0348, - "step": 51255 - }, - { - "epoch": 1.3011803528366543, - "grad_norm": 0.8263579607009888, - "learning_rate": 1.1325464314422305e-05, - "loss": 0.0472, - "step": 51260 - }, - { - "epoch": 1.3013072724965098, - "grad_norm": 0.6018458008766174, - "learning_rate": 1.1324618183356603e-05, - "loss": 0.0411, - "step": 51265 - }, - { - "epoch": 1.301434192156365, - "grad_norm": 0.5122480392456055, - "learning_rate": 1.13237720522909e-05, - "loss": 0.0477, - "step": 51270 - }, - { - "epoch": 1.3015611118162203, - "grad_norm": 0.3980444669723511, - "learning_rate": 1.1322925921225198e-05, - "loss": 0.0235, - "step": 51275 - }, - { - "epoch": 1.3016880314760757, - "grad_norm": 0.5100890398025513, - "learning_rate": 1.1322079790159496e-05, - "loss": 0.0602, - "step": 51280 - }, - { - "epoch": 1.301814951135931, - "grad_norm": 0.6184352040290833, - "learning_rate": 1.1321233659093795e-05, - "loss": 0.0387, - "step": 51285 - }, - { - "epoch": 1.3019418707957864, - "grad_norm": 0.578171968460083, - "learning_rate": 1.1320387528028092e-05, - "loss": 0.0303, - "step": 51290 - }, - { - "epoch": 1.3020687904556416, - "grad_norm": 0.5325555205345154, - "learning_rate": 1.131954139696239e-05, - "loss": 0.0529, - "step": 51295 - }, - { - "epoch": 1.3021957101154968, - "grad_norm": 0.4216128885746002, - "learning_rate": 1.1318695265896688e-05, - "loss": 0.0653, - "step": 51300 - }, - { - "epoch": 1.302322629775352, - "grad_norm": 0.6566740274429321, - "learning_rate": 1.1317849134830987e-05, - "loss": 0.0427, - "step": 51305 - }, - { - "epoch": 1.3024495494352075, - "grad_norm": 0.4273035526275635, - "learning_rate": 1.1317003003765283e-05, - "loss": 0.0367, - "step": 51310 - }, - { - "epoch": 1.3025764690950628, - "grad_norm": 0.4969930052757263, - "learning_rate": 1.1316156872699582e-05, - "loss": 0.0384, - "step": 51315 - }, - { - "epoch": 1.3027033887549182, - "grad_norm": 0.47913187742233276, - "learning_rate": 1.131531074163388e-05, - "loss": 0.0501, - "step": 51320 - }, - { - "epoch": 1.3028303084147734, - "grad_norm": 0.33286479115486145, - "learning_rate": 1.1314464610568179e-05, - "loss": 0.0481, - "step": 51325 - }, - { - "epoch": 1.3029572280746287, - "grad_norm": 0.47099271416664124, - "learning_rate": 1.1313618479502475e-05, - "loss": 0.0672, - "step": 51330 - }, - { - "epoch": 1.303084147734484, - "grad_norm": 0.883169949054718, - "learning_rate": 1.1312772348436774e-05, - "loss": 0.0595, - "step": 51335 - }, - { - "epoch": 1.3032110673943393, - "grad_norm": 0.5031092762947083, - "learning_rate": 1.1311926217371072e-05, - "loss": 0.0356, - "step": 51340 - }, - { - "epoch": 1.3033379870541948, - "grad_norm": 0.4990406930446625, - "learning_rate": 1.131108008630537e-05, - "loss": 0.0436, - "step": 51345 - }, - { - "epoch": 1.30346490671405, - "grad_norm": 0.424800843000412, - "learning_rate": 1.1310233955239667e-05, - "loss": 0.0331, - "step": 51350 - }, - { - "epoch": 1.3035918263739052, - "grad_norm": 0.425152450799942, - "learning_rate": 1.1309387824173966e-05, - "loss": 0.036, - "step": 51355 - }, - { - "epoch": 1.3037187460337607, - "grad_norm": 0.3553943932056427, - "learning_rate": 1.1308541693108264e-05, - "loss": 0.0466, - "step": 51360 - }, - { - "epoch": 1.303845665693616, - "grad_norm": 0.39660653471946716, - "learning_rate": 1.1307695562042563e-05, - "loss": 0.0641, - "step": 51365 - }, - { - "epoch": 1.3039725853534714, - "grad_norm": 0.4750511348247528, - "learning_rate": 1.130684943097686e-05, - "loss": 0.0413, - "step": 51370 - }, - { - "epoch": 1.3040995050133266, - "grad_norm": 0.42489051818847656, - "learning_rate": 1.1306003299911158e-05, - "loss": 0.0418, - "step": 51375 - }, - { - "epoch": 1.3042264246731818, - "grad_norm": 1.2967506647109985, - "learning_rate": 1.1305157168845456e-05, - "loss": 0.0783, - "step": 51380 - }, - { - "epoch": 1.304353344333037, - "grad_norm": 0.4643852114677429, - "learning_rate": 1.1304311037779754e-05, - "loss": 0.0391, - "step": 51385 - }, - { - "epoch": 1.3044802639928925, - "grad_norm": 0.47356000542640686, - "learning_rate": 1.130346490671405e-05, - "loss": 0.0451, - "step": 51390 - }, - { - "epoch": 1.3046071836527477, - "grad_norm": 0.3234248161315918, - "learning_rate": 1.1302618775648348e-05, - "loss": 0.0361, - "step": 51395 - }, - { - "epoch": 1.3047341033126032, - "grad_norm": 1.297439694404602, - "learning_rate": 1.1301772644582646e-05, - "loss": 0.0582, - "step": 51400 - }, - { - "epoch": 1.3048610229724584, - "grad_norm": 0.3020826280117035, - "learning_rate": 1.1300926513516945e-05, - "loss": 0.053, - "step": 51405 - }, - { - "epoch": 1.3049879426323137, - "grad_norm": 0.3189208507537842, - "learning_rate": 1.1300080382451241e-05, - "loss": 0.038, - "step": 51410 - }, - { - "epoch": 1.305114862292169, - "grad_norm": 0.6744409799575806, - "learning_rate": 1.129923425138554e-05, - "loss": 0.0477, - "step": 51415 - }, - { - "epoch": 1.3052417819520243, - "grad_norm": 0.6185187101364136, - "learning_rate": 1.1298388120319838e-05, - "loss": 0.0512, - "step": 51420 - }, - { - "epoch": 1.3053687016118798, - "grad_norm": 0.3981840908527374, - "learning_rate": 1.1297541989254137e-05, - "loss": 0.0432, - "step": 51425 - }, - { - "epoch": 1.305495621271735, - "grad_norm": 0.648384153842926, - "learning_rate": 1.1296695858188433e-05, - "loss": 0.0621, - "step": 51430 - }, - { - "epoch": 1.3056225409315902, - "grad_norm": 0.5570166707038879, - "learning_rate": 1.1295849727122732e-05, - "loss": 0.0366, - "step": 51435 - }, - { - "epoch": 1.3057494605914455, - "grad_norm": 0.3285747468471527, - "learning_rate": 1.129500359605703e-05, - "loss": 0.0359, - "step": 51440 - }, - { - "epoch": 1.305876380251301, - "grad_norm": 0.5265329480171204, - "learning_rate": 1.1294157464991328e-05, - "loss": 0.0517, - "step": 51445 - }, - { - "epoch": 1.3060032999111562, - "grad_norm": 0.42750170826911926, - "learning_rate": 1.1293311333925625e-05, - "loss": 0.0396, - "step": 51450 - }, - { - "epoch": 1.3061302195710116, - "grad_norm": 0.5530766844749451, - "learning_rate": 1.1292465202859924e-05, - "loss": 0.0316, - "step": 51455 - }, - { - "epoch": 1.3062571392308668, - "grad_norm": 0.4332669675350189, - "learning_rate": 1.1291619071794222e-05, - "loss": 0.0327, - "step": 51460 - }, - { - "epoch": 1.306384058890722, - "grad_norm": 0.45845645666122437, - "learning_rate": 1.129077294072852e-05, - "loss": 0.0472, - "step": 51465 - }, - { - "epoch": 1.3065109785505775, - "grad_norm": 0.4752406179904938, - "learning_rate": 1.1289926809662817e-05, - "loss": 0.0465, - "step": 51470 - }, - { - "epoch": 1.3066378982104327, - "grad_norm": 0.5498816967010498, - "learning_rate": 1.1289080678597116e-05, - "loss": 0.0669, - "step": 51475 - }, - { - "epoch": 1.3067648178702882, - "grad_norm": 0.3219185471534729, - "learning_rate": 1.1288234547531414e-05, - "loss": 0.0319, - "step": 51480 - }, - { - "epoch": 1.3068917375301434, - "grad_norm": 0.5962609648704529, - "learning_rate": 1.1287388416465712e-05, - "loss": 0.0417, - "step": 51485 - }, - { - "epoch": 1.3070186571899987, - "grad_norm": 0.44336652755737305, - "learning_rate": 1.1286542285400009e-05, - "loss": 0.0526, - "step": 51490 - }, - { - "epoch": 1.307145576849854, - "grad_norm": 0.5614840984344482, - "learning_rate": 1.1285696154334307e-05, - "loss": 0.0545, - "step": 51495 - }, - { - "epoch": 1.3072724965097093, - "grad_norm": 0.565162718296051, - "learning_rate": 1.1284850023268606e-05, - "loss": 0.0526, - "step": 51500 - }, - { - "epoch": 1.3073994161695648, - "grad_norm": 0.39119860529899597, - "learning_rate": 1.1284003892202904e-05, - "loss": 0.0545, - "step": 51505 - }, - { - "epoch": 1.30752633582942, - "grad_norm": 0.7743842005729675, - "learning_rate": 1.12831577611372e-05, - "loss": 0.0564, - "step": 51510 - }, - { - "epoch": 1.3076532554892752, - "grad_norm": 0.8204236626625061, - "learning_rate": 1.12823116300715e-05, - "loss": 0.0602, - "step": 51515 - }, - { - "epoch": 1.3077801751491305, - "grad_norm": 0.3118831515312195, - "learning_rate": 1.1281465499005798e-05, - "loss": 0.0443, - "step": 51520 - }, - { - "epoch": 1.307907094808986, - "grad_norm": 0.6991625428199768, - "learning_rate": 1.1280619367940096e-05, - "loss": 0.0456, - "step": 51525 - }, - { - "epoch": 1.3080340144688412, - "grad_norm": 0.36681032180786133, - "learning_rate": 1.1279773236874391e-05, - "loss": 0.0351, - "step": 51530 - }, - { - "epoch": 1.3081609341286966, - "grad_norm": 0.769809365272522, - "learning_rate": 1.127892710580869e-05, - "loss": 0.0465, - "step": 51535 - }, - { - "epoch": 1.3082878537885518, - "grad_norm": 0.6785352826118469, - "learning_rate": 1.1278080974742988e-05, - "loss": 0.0433, - "step": 51540 - }, - { - "epoch": 1.308414773448407, - "grad_norm": 0.2630726099014282, - "learning_rate": 1.1277234843677286e-05, - "loss": 0.0479, - "step": 51545 - }, - { - "epoch": 1.3085416931082625, - "grad_norm": 0.3686884045600891, - "learning_rate": 1.1276388712611583e-05, - "loss": 0.0417, - "step": 51550 - }, - { - "epoch": 1.3086686127681177, - "grad_norm": 0.47540003061294556, - "learning_rate": 1.1275542581545881e-05, - "loss": 0.0429, - "step": 51555 - }, - { - "epoch": 1.3087955324279732, - "grad_norm": 0.33644992113113403, - "learning_rate": 1.127469645048018e-05, - "loss": 0.0385, - "step": 51560 - }, - { - "epoch": 1.3089224520878284, - "grad_norm": 0.46161991357803345, - "learning_rate": 1.1273850319414478e-05, - "loss": 0.0453, - "step": 51565 - }, - { - "epoch": 1.3090493717476837, - "grad_norm": 0.41110947728157043, - "learning_rate": 1.1273004188348775e-05, - "loss": 0.0509, - "step": 51570 - }, - { - "epoch": 1.309176291407539, - "grad_norm": 0.49451592564582825, - "learning_rate": 1.1272158057283073e-05, - "loss": 0.0596, - "step": 51575 - }, - { - "epoch": 1.3093032110673943, - "grad_norm": 0.4981329143047333, - "learning_rate": 1.1271311926217372e-05, - "loss": 0.0483, - "step": 51580 - }, - { - "epoch": 1.3094301307272498, - "grad_norm": 0.46015045046806335, - "learning_rate": 1.127046579515167e-05, - "loss": 0.0339, - "step": 51585 - }, - { - "epoch": 1.309557050387105, - "grad_norm": 0.47559863328933716, - "learning_rate": 1.1269619664085967e-05, - "loss": 0.0441, - "step": 51590 - }, - { - "epoch": 1.3096839700469602, - "grad_norm": 1.2829904556274414, - "learning_rate": 1.1268773533020265e-05, - "loss": 0.04, - "step": 51595 - }, - { - "epoch": 1.3098108897068155, - "grad_norm": 1.261403203010559, - "learning_rate": 1.1267927401954564e-05, - "loss": 0.0421, - "step": 51600 - }, - { - "epoch": 1.309937809366671, - "grad_norm": 0.3030202388763428, - "learning_rate": 1.1267081270888862e-05, - "loss": 0.0349, - "step": 51605 - }, - { - "epoch": 1.3100647290265262, - "grad_norm": 0.37467122077941895, - "learning_rate": 1.126623513982316e-05, - "loss": 0.0359, - "step": 51610 - }, - { - "epoch": 1.3101916486863816, - "grad_norm": 0.3764854371547699, - "learning_rate": 1.1265389008757457e-05, - "loss": 0.0499, - "step": 51615 - }, - { - "epoch": 1.3103185683462368, - "grad_norm": 0.4441997706890106, - "learning_rate": 1.1264542877691756e-05, - "loss": 0.0757, - "step": 51620 - }, - { - "epoch": 1.310445488006092, - "grad_norm": 0.4199162721633911, - "learning_rate": 1.1263696746626054e-05, - "loss": 0.0454, - "step": 51625 - }, - { - "epoch": 1.3105724076659475, - "grad_norm": 0.4297911822795868, - "learning_rate": 1.1262850615560352e-05, - "loss": 0.0578, - "step": 51630 - }, - { - "epoch": 1.3106993273258027, - "grad_norm": 0.3605617880821228, - "learning_rate": 1.1262004484494649e-05, - "loss": 0.0434, - "step": 51635 - }, - { - "epoch": 1.3108262469856582, - "grad_norm": 0.8665217161178589, - "learning_rate": 1.1261158353428948e-05, - "loss": 0.0502, - "step": 51640 - }, - { - "epoch": 1.3109531666455134, - "grad_norm": 0.35876625776290894, - "learning_rate": 1.1260312222363246e-05, - "loss": 0.0436, - "step": 51645 - }, - { - "epoch": 1.3110800863053687, - "grad_norm": 0.4019695818424225, - "learning_rate": 1.1259466091297544e-05, - "loss": 0.0523, - "step": 51650 - }, - { - "epoch": 1.3112070059652239, - "grad_norm": 0.39068281650543213, - "learning_rate": 1.1258619960231841e-05, - "loss": 0.0514, - "step": 51655 - }, - { - "epoch": 1.3113339256250793, - "grad_norm": 0.5446902513504028, - "learning_rate": 1.125777382916614e-05, - "loss": 0.0589, - "step": 51660 - }, - { - "epoch": 1.3114608452849346, - "grad_norm": 0.1626068651676178, - "learning_rate": 1.1256927698100438e-05, - "loss": 0.0291, - "step": 51665 - }, - { - "epoch": 1.31158776494479, - "grad_norm": 0.6359480619430542, - "learning_rate": 1.1256081567034736e-05, - "loss": 0.0464, - "step": 51670 - }, - { - "epoch": 1.3117146846046452, - "grad_norm": 0.7304179668426514, - "learning_rate": 1.1255235435969031e-05, - "loss": 0.0495, - "step": 51675 - }, - { - "epoch": 1.3118416042645005, - "grad_norm": 0.6524847149848938, - "learning_rate": 1.125438930490333e-05, - "loss": 0.0441, - "step": 51680 - }, - { - "epoch": 1.311968523924356, - "grad_norm": 0.3859202563762665, - "learning_rate": 1.1253543173837628e-05, - "loss": 0.0486, - "step": 51685 - }, - { - "epoch": 1.3120954435842112, - "grad_norm": 0.3065509796142578, - "learning_rate": 1.1252697042771928e-05, - "loss": 0.0454, - "step": 51690 - }, - { - "epoch": 1.3122223632440666, - "grad_norm": 0.43021246790885925, - "learning_rate": 1.1251850911706223e-05, - "loss": 0.0415, - "step": 51695 - }, - { - "epoch": 1.3123492829039218, - "grad_norm": 0.6250462532043457, - "learning_rate": 1.1251004780640522e-05, - "loss": 0.0457, - "step": 51700 - }, - { - "epoch": 1.312476202563777, - "grad_norm": 1.317004680633545, - "learning_rate": 1.125015864957482e-05, - "loss": 0.0517, - "step": 51705 - }, - { - "epoch": 1.3126031222236325, - "grad_norm": 0.49839672446250916, - "learning_rate": 1.1249312518509118e-05, - "loss": 0.048, - "step": 51710 - }, - { - "epoch": 1.3127300418834877, - "grad_norm": 0.4958000183105469, - "learning_rate": 1.1248466387443415e-05, - "loss": 0.0529, - "step": 51715 - }, - { - "epoch": 1.3128569615433432, - "grad_norm": 0.389293909072876, - "learning_rate": 1.1247620256377713e-05, - "loss": 0.0412, - "step": 51720 - }, - { - "epoch": 1.3129838812031984, - "grad_norm": 0.32812365889549255, - "learning_rate": 1.1246774125312012e-05, - "loss": 0.0253, - "step": 51725 - }, - { - "epoch": 1.3131108008630537, - "grad_norm": 0.5899739861488342, - "learning_rate": 1.124592799424631e-05, - "loss": 0.0435, - "step": 51730 - }, - { - "epoch": 1.3132377205229089, - "grad_norm": 0.5287603735923767, - "learning_rate": 1.1245081863180607e-05, - "loss": 0.0419, - "step": 51735 - }, - { - "epoch": 1.3133646401827643, - "grad_norm": 0.402224600315094, - "learning_rate": 1.1244235732114905e-05, - "loss": 0.034, - "step": 51740 - }, - { - "epoch": 1.3134915598426196, - "grad_norm": 0.4485335648059845, - "learning_rate": 1.1243389601049204e-05, - "loss": 0.0498, - "step": 51745 - }, - { - "epoch": 1.313618479502475, - "grad_norm": 0.44640421867370605, - "learning_rate": 1.1242543469983502e-05, - "loss": 0.0532, - "step": 51750 - }, - { - "epoch": 1.3137453991623302, - "grad_norm": 0.5137338638305664, - "learning_rate": 1.1241697338917799e-05, - "loss": 0.0438, - "step": 51755 - }, - { - "epoch": 1.3138723188221855, - "grad_norm": 0.36935484409332275, - "learning_rate": 1.1240851207852097e-05, - "loss": 0.076, - "step": 51760 - }, - { - "epoch": 1.313999238482041, - "grad_norm": 1.2067519426345825, - "learning_rate": 1.1240005076786396e-05, - "loss": 0.0503, - "step": 51765 - }, - { - "epoch": 1.3141261581418962, - "grad_norm": 0.4538722634315491, - "learning_rate": 1.1239158945720694e-05, - "loss": 0.0527, - "step": 51770 - }, - { - "epoch": 1.3142530778017516, - "grad_norm": 0.4536571204662323, - "learning_rate": 1.123831281465499e-05, - "loss": 0.0493, - "step": 51775 - }, - { - "epoch": 1.3143799974616068, - "grad_norm": 0.39564526081085205, - "learning_rate": 1.123746668358929e-05, - "loss": 0.049, - "step": 51780 - }, - { - "epoch": 1.314506917121462, - "grad_norm": 0.6261420249938965, - "learning_rate": 1.1236620552523588e-05, - "loss": 0.0415, - "step": 51785 - }, - { - "epoch": 1.3146338367813173, - "grad_norm": 0.7176336050033569, - "learning_rate": 1.1235774421457886e-05, - "loss": 0.0392, - "step": 51790 - }, - { - "epoch": 1.3147607564411727, - "grad_norm": 0.5741378664970398, - "learning_rate": 1.1234928290392183e-05, - "loss": 0.0568, - "step": 51795 - }, - { - "epoch": 1.314887676101028, - "grad_norm": 0.40504124760627747, - "learning_rate": 1.1234082159326481e-05, - "loss": 0.0406, - "step": 51800 - }, - { - "epoch": 1.3150145957608834, - "grad_norm": 0.29988357424736023, - "learning_rate": 1.123323602826078e-05, - "loss": 0.0298, - "step": 51805 - }, - { - "epoch": 1.3151415154207386, - "grad_norm": 1.5849395990371704, - "learning_rate": 1.1232389897195078e-05, - "loss": 0.0482, - "step": 51810 - }, - { - "epoch": 1.3152684350805939, - "grad_norm": 0.47199568152427673, - "learning_rate": 1.1231543766129373e-05, - "loss": 0.0588, - "step": 51815 - }, - { - "epoch": 1.3153953547404493, - "grad_norm": 0.513408362865448, - "learning_rate": 1.1230697635063671e-05, - "loss": 0.04, - "step": 51820 - }, - { - "epoch": 1.3155222744003046, - "grad_norm": 0.510222852230072, - "learning_rate": 1.122985150399797e-05, - "loss": 0.0425, - "step": 51825 - }, - { - "epoch": 1.31564919406016, - "grad_norm": 0.23910091817378998, - "learning_rate": 1.1229005372932268e-05, - "loss": 0.0433, - "step": 51830 - }, - { - "epoch": 1.3157761137200152, - "grad_norm": 0.4873216152191162, - "learning_rate": 1.1228159241866565e-05, - "loss": 0.0566, - "step": 51835 - }, - { - "epoch": 1.3159030333798705, - "grad_norm": 0.38633790612220764, - "learning_rate": 1.1227313110800863e-05, - "loss": 0.0633, - "step": 51840 - }, - { - "epoch": 1.316029953039726, - "grad_norm": 0.4138302206993103, - "learning_rate": 1.1226466979735162e-05, - "loss": 0.0396, - "step": 51845 - }, - { - "epoch": 1.3161568726995811, - "grad_norm": 0.6953312754631042, - "learning_rate": 1.122562084866946e-05, - "loss": 0.0499, - "step": 51850 - }, - { - "epoch": 1.3162837923594366, - "grad_norm": 0.3501913845539093, - "learning_rate": 1.1224774717603757e-05, - "loss": 0.0757, - "step": 51855 - }, - { - "epoch": 1.3164107120192918, - "grad_norm": 0.8991566896438599, - "learning_rate": 1.1223928586538055e-05, - "loss": 0.0291, - "step": 51860 - }, - { - "epoch": 1.316537631679147, - "grad_norm": 0.41620877385139465, - "learning_rate": 1.1223082455472354e-05, - "loss": 0.0458, - "step": 51865 - }, - { - "epoch": 1.3166645513390023, - "grad_norm": 0.4571879208087921, - "learning_rate": 1.1222236324406652e-05, - "loss": 0.0433, - "step": 51870 - }, - { - "epoch": 1.3167914709988577, - "grad_norm": 0.3606286644935608, - "learning_rate": 1.1221390193340949e-05, - "loss": 0.065, - "step": 51875 - }, - { - "epoch": 1.316918390658713, - "grad_norm": 0.5832714438438416, - "learning_rate": 1.1220544062275247e-05, - "loss": 0.04, - "step": 51880 - }, - { - "epoch": 1.3170453103185684, - "grad_norm": 0.30555030703544617, - "learning_rate": 1.1219697931209546e-05, - "loss": 0.0519, - "step": 51885 - }, - { - "epoch": 1.3171722299784236, - "grad_norm": 0.48177585005760193, - "learning_rate": 1.1218851800143844e-05, - "loss": 0.055, - "step": 51890 - }, - { - "epoch": 1.3172991496382789, - "grad_norm": 0.34697219729423523, - "learning_rate": 1.121800566907814e-05, - "loss": 0.0367, - "step": 51895 - }, - { - "epoch": 1.3174260692981343, - "grad_norm": 0.45484739542007446, - "learning_rate": 1.1217159538012439e-05, - "loss": 0.0592, - "step": 51900 - }, - { - "epoch": 1.3175529889579896, - "grad_norm": 0.7766381502151489, - "learning_rate": 1.1216313406946737e-05, - "loss": 0.032, - "step": 51905 - }, - { - "epoch": 1.317679908617845, - "grad_norm": 0.42390042543411255, - "learning_rate": 1.1215467275881036e-05, - "loss": 0.0514, - "step": 51910 - }, - { - "epoch": 1.3178068282777002, - "grad_norm": 1.622931957244873, - "learning_rate": 1.1214621144815333e-05, - "loss": 0.0351, - "step": 51915 - }, - { - "epoch": 1.3179337479375555, - "grad_norm": 0.5649393200874329, - "learning_rate": 1.1213775013749631e-05, - "loss": 0.0417, - "step": 51920 - }, - { - "epoch": 1.318060667597411, - "grad_norm": 0.6749083399772644, - "learning_rate": 1.121292888268393e-05, - "loss": 0.0539, - "step": 51925 - }, - { - "epoch": 1.3181875872572661, - "grad_norm": 0.3019692897796631, - "learning_rate": 1.1212082751618228e-05, - "loss": 0.0487, - "step": 51930 - }, - { - "epoch": 1.3183145069171216, - "grad_norm": 0.27354347705841064, - "learning_rate": 1.1211236620552524e-05, - "loss": 0.0727, - "step": 51935 - }, - { - "epoch": 1.3184414265769768, - "grad_norm": 0.7084040641784668, - "learning_rate": 1.1210390489486823e-05, - "loss": 0.0568, - "step": 51940 - }, - { - "epoch": 1.318568346236832, - "grad_norm": 0.6597729921340942, - "learning_rate": 1.1209544358421121e-05, - "loss": 0.0605, - "step": 51945 - }, - { - "epoch": 1.3186952658966873, - "grad_norm": 0.5509073734283447, - "learning_rate": 1.120869822735542e-05, - "loss": 0.0327, - "step": 51950 - }, - { - "epoch": 1.3188221855565427, - "grad_norm": 0.7804974317550659, - "learning_rate": 1.1207852096289715e-05, - "loss": 0.0559, - "step": 51955 - }, - { - "epoch": 1.318949105216398, - "grad_norm": 0.27943137288093567, - "learning_rate": 1.1207005965224013e-05, - "loss": 0.0411, - "step": 51960 - }, - { - "epoch": 1.3190760248762534, - "grad_norm": 0.39674514532089233, - "learning_rate": 1.1206159834158311e-05, - "loss": 0.0624, - "step": 51965 - }, - { - "epoch": 1.3192029445361086, - "grad_norm": 0.2917519807815552, - "learning_rate": 1.120531370309261e-05, - "loss": 0.0475, - "step": 51970 - }, - { - "epoch": 1.3193298641959639, - "grad_norm": 0.29745423793792725, - "learning_rate": 1.1204467572026907e-05, - "loss": 0.0583, - "step": 51975 - }, - { - "epoch": 1.3194567838558193, - "grad_norm": 0.5777328014373779, - "learning_rate": 1.1203621440961205e-05, - "loss": 0.0439, - "step": 51980 - }, - { - "epoch": 1.3195837035156746, - "grad_norm": 1.1755657196044922, - "learning_rate": 1.1202775309895503e-05, - "loss": 0.059, - "step": 51985 - }, - { - "epoch": 1.31971062317553, - "grad_norm": 0.35396525263786316, - "learning_rate": 1.1201929178829802e-05, - "loss": 0.0231, - "step": 51990 - }, - { - "epoch": 1.3198375428353852, - "grad_norm": 0.4098815321922302, - "learning_rate": 1.1201083047764098e-05, - "loss": 0.0455, - "step": 51995 - }, - { - "epoch": 1.3199644624952405, - "grad_norm": 0.5251819491386414, - "learning_rate": 1.1200236916698397e-05, - "loss": 0.0412, - "step": 52000 - }, - { - "epoch": 1.3200913821550957, - "grad_norm": 0.49328914284706116, - "learning_rate": 1.1199390785632695e-05, - "loss": 0.0422, - "step": 52005 - }, - { - "epoch": 1.3202183018149511, - "grad_norm": 0.387271523475647, - "learning_rate": 1.1198544654566994e-05, - "loss": 0.0412, - "step": 52010 - }, - { - "epoch": 1.3203452214748064, - "grad_norm": 0.46261313557624817, - "learning_rate": 1.119769852350129e-05, - "loss": 0.0494, - "step": 52015 - }, - { - "epoch": 1.3204721411346618, - "grad_norm": 0.37279194593429565, - "learning_rate": 1.1196852392435589e-05, - "loss": 0.0418, - "step": 52020 - }, - { - "epoch": 1.320599060794517, - "grad_norm": 0.42507919669151306, - "learning_rate": 1.1196006261369887e-05, - "loss": 0.0446, - "step": 52025 - }, - { - "epoch": 1.3207259804543723, - "grad_norm": 0.41799938678741455, - "learning_rate": 1.1195160130304186e-05, - "loss": 0.0423, - "step": 52030 - }, - { - "epoch": 1.3208529001142277, - "grad_norm": 0.5592229962348938, - "learning_rate": 1.1194313999238482e-05, - "loss": 0.054, - "step": 52035 - }, - { - "epoch": 1.320979819774083, - "grad_norm": 0.3884831964969635, - "learning_rate": 1.119346786817278e-05, - "loss": 0.0729, - "step": 52040 - }, - { - "epoch": 1.3211067394339384, - "grad_norm": 0.9319155812263489, - "learning_rate": 1.1192621737107079e-05, - "loss": 0.0462, - "step": 52045 - }, - { - "epoch": 1.3212336590937936, - "grad_norm": 0.5150001049041748, - "learning_rate": 1.1191775606041378e-05, - "loss": 0.0315, - "step": 52050 - }, - { - "epoch": 1.3213605787536489, - "grad_norm": 0.36233091354370117, - "learning_rate": 1.1190929474975674e-05, - "loss": 0.0336, - "step": 52055 - }, - { - "epoch": 1.3214874984135043, - "grad_norm": 0.6297858357429504, - "learning_rate": 1.1190083343909973e-05, - "loss": 0.0425, - "step": 52060 - }, - { - "epoch": 1.3216144180733596, - "grad_norm": 0.5519241094589233, - "learning_rate": 1.1189237212844271e-05, - "loss": 0.058, - "step": 52065 - }, - { - "epoch": 1.321741337733215, - "grad_norm": 0.7694854140281677, - "learning_rate": 1.118839108177857e-05, - "loss": 0.073, - "step": 52070 - }, - { - "epoch": 1.3218682573930702, - "grad_norm": 0.41316288709640503, - "learning_rate": 1.1187544950712864e-05, - "loss": 0.0487, - "step": 52075 - }, - { - "epoch": 1.3219951770529255, - "grad_norm": 0.3536502420902252, - "learning_rate": 1.1186698819647165e-05, - "loss": 0.0433, - "step": 52080 - }, - { - "epoch": 1.3221220967127807, - "grad_norm": 0.4322640299797058, - "learning_rate": 1.1185852688581463e-05, - "loss": 0.0302, - "step": 52085 - }, - { - "epoch": 1.3222490163726361, - "grad_norm": 0.7504964470863342, - "learning_rate": 1.1185006557515761e-05, - "loss": 0.049, - "step": 52090 - }, - { - "epoch": 1.3223759360324914, - "grad_norm": 0.42978164553642273, - "learning_rate": 1.1184160426450056e-05, - "loss": 0.043, - "step": 52095 - }, - { - "epoch": 1.3225028556923468, - "grad_norm": 0.43165335059165955, - "learning_rate": 1.1183314295384355e-05, - "loss": 0.0574, - "step": 52100 - }, - { - "epoch": 1.322629775352202, - "grad_norm": 0.33555763959884644, - "learning_rate": 1.1182468164318653e-05, - "loss": 0.0474, - "step": 52105 - }, - { - "epoch": 1.3227566950120573, - "grad_norm": 0.6092585921287537, - "learning_rate": 1.1181622033252952e-05, - "loss": 0.0697, - "step": 52110 - }, - { - "epoch": 1.3228836146719127, - "grad_norm": 0.5808975100517273, - "learning_rate": 1.1180775902187248e-05, - "loss": 0.0501, - "step": 52115 - }, - { - "epoch": 1.323010534331768, - "grad_norm": 0.29997867345809937, - "learning_rate": 1.1179929771121547e-05, - "loss": 0.0313, - "step": 52120 - }, - { - "epoch": 1.3231374539916234, - "grad_norm": 0.5199922919273376, - "learning_rate": 1.1179083640055845e-05, - "loss": 0.0334, - "step": 52125 - }, - { - "epoch": 1.3232643736514786, - "grad_norm": 0.4965060353279114, - "learning_rate": 1.1178237508990143e-05, - "loss": 0.024, - "step": 52130 - }, - { - "epoch": 1.3233912933113339, - "grad_norm": 0.5802716016769409, - "learning_rate": 1.1177391377924442e-05, - "loss": 0.045, - "step": 52135 - }, - { - "epoch": 1.323518212971189, - "grad_norm": 0.30247095227241516, - "learning_rate": 1.1176545246858739e-05, - "loss": 0.0456, - "step": 52140 - }, - { - "epoch": 1.3236451326310446, - "grad_norm": 0.5621630549430847, - "learning_rate": 1.1175699115793037e-05, - "loss": 0.0442, - "step": 52145 - }, - { - "epoch": 1.3237720522908998, - "grad_norm": 0.2858867049217224, - "learning_rate": 1.1174852984727335e-05, - "loss": 0.0394, - "step": 52150 - }, - { - "epoch": 1.3238989719507552, - "grad_norm": 0.9623168706893921, - "learning_rate": 1.1174006853661634e-05, - "loss": 0.0595, - "step": 52155 - }, - { - "epoch": 1.3240258916106105, - "grad_norm": 0.5302748084068298, - "learning_rate": 1.117316072259593e-05, - "loss": 0.054, - "step": 52160 - }, - { - "epoch": 1.3241528112704657, - "grad_norm": 0.49292200803756714, - "learning_rate": 1.1172314591530229e-05, - "loss": 0.0531, - "step": 52165 - }, - { - "epoch": 1.3242797309303211, - "grad_norm": 1.1690247058868408, - "learning_rate": 1.1171468460464527e-05, - "loss": 0.0458, - "step": 52170 - }, - { - "epoch": 1.3244066505901764, - "grad_norm": 0.4076308012008667, - "learning_rate": 1.1170622329398826e-05, - "loss": 0.0443, - "step": 52175 - }, - { - "epoch": 1.3245335702500318, - "grad_norm": 0.444553941488266, - "learning_rate": 1.1169776198333122e-05, - "loss": 0.0716, - "step": 52180 - }, - { - "epoch": 1.324660489909887, - "grad_norm": 0.5071567893028259, - "learning_rate": 1.116893006726742e-05, - "loss": 0.043, - "step": 52185 - }, - { - "epoch": 1.3247874095697423, - "grad_norm": 0.8273819088935852, - "learning_rate": 1.116808393620172e-05, - "loss": 0.0468, - "step": 52190 - }, - { - "epoch": 1.3249143292295977, - "grad_norm": 0.461258202791214, - "learning_rate": 1.1167237805136018e-05, - "loss": 0.0404, - "step": 52195 - }, - { - "epoch": 1.325041248889453, - "grad_norm": 0.6827552914619446, - "learning_rate": 1.1166391674070314e-05, - "loss": 0.0473, - "step": 52200 - }, - { - "epoch": 1.3251681685493084, - "grad_norm": 0.5346698760986328, - "learning_rate": 1.1165545543004613e-05, - "loss": 0.0625, - "step": 52205 - }, - { - "epoch": 1.3252950882091636, - "grad_norm": 0.208026722073555, - "learning_rate": 1.1164699411938911e-05, - "loss": 0.0501, - "step": 52210 - }, - { - "epoch": 1.3254220078690189, - "grad_norm": 0.5499128699302673, - "learning_rate": 1.116385328087321e-05, - "loss": 0.0466, - "step": 52215 - }, - { - "epoch": 1.325548927528874, - "grad_norm": 0.4940014183521271, - "learning_rate": 1.1163007149807506e-05, - "loss": 0.0392, - "step": 52220 - }, - { - "epoch": 1.3256758471887296, - "grad_norm": 0.9672101140022278, - "learning_rate": 1.1162161018741805e-05, - "loss": 0.0485, - "step": 52225 - }, - { - "epoch": 1.3258027668485848, - "grad_norm": 0.5458986163139343, - "learning_rate": 1.1161314887676103e-05, - "loss": 0.0488, - "step": 52230 - }, - { - "epoch": 1.3259296865084402, - "grad_norm": 0.48723235726356506, - "learning_rate": 1.1160468756610401e-05, - "loss": 0.058, - "step": 52235 - }, - { - "epoch": 1.3260566061682955, - "grad_norm": 0.5129174590110779, - "learning_rate": 1.1159622625544696e-05, - "loss": 0.0633, - "step": 52240 - }, - { - "epoch": 1.3261835258281507, - "grad_norm": 0.49010172486305237, - "learning_rate": 1.1158776494478995e-05, - "loss": 0.0472, - "step": 52245 - }, - { - "epoch": 1.3263104454880061, - "grad_norm": 1.349707007408142, - "learning_rate": 1.1157930363413293e-05, - "loss": 0.0533, - "step": 52250 - }, - { - "epoch": 1.3264373651478614, - "grad_norm": 0.6999847292900085, - "learning_rate": 1.1157084232347593e-05, - "loss": 0.045, - "step": 52255 - }, - { - "epoch": 1.3265642848077168, - "grad_norm": 0.6986672282218933, - "learning_rate": 1.1156238101281888e-05, - "loss": 0.0358, - "step": 52260 - }, - { - "epoch": 1.326691204467572, - "grad_norm": 0.7128406167030334, - "learning_rate": 1.1155391970216187e-05, - "loss": 0.0507, - "step": 52265 - }, - { - "epoch": 1.3268181241274273, - "grad_norm": 0.3095841109752655, - "learning_rate": 1.1154545839150485e-05, - "loss": 0.0293, - "step": 52270 - }, - { - "epoch": 1.3269450437872827, - "grad_norm": 0.6061810255050659, - "learning_rate": 1.1153699708084784e-05, - "loss": 0.0658, - "step": 52275 - }, - { - "epoch": 1.327071963447138, - "grad_norm": 0.3251649737358093, - "learning_rate": 1.115285357701908e-05, - "loss": 0.0541, - "step": 52280 - }, - { - "epoch": 1.3271988831069934, - "grad_norm": 0.37374287843704224, - "learning_rate": 1.1152007445953379e-05, - "loss": 0.0427, - "step": 52285 - }, - { - "epoch": 1.3273258027668486, - "grad_norm": 1.2597519159317017, - "learning_rate": 1.1151161314887677e-05, - "loss": 0.0571, - "step": 52290 - }, - { - "epoch": 1.3274527224267039, - "grad_norm": 0.41827353835105896, - "learning_rate": 1.1150315183821976e-05, - "loss": 0.0332, - "step": 52295 - }, - { - "epoch": 1.327579642086559, - "grad_norm": 0.5687612891197205, - "learning_rate": 1.1149469052756272e-05, - "loss": 0.0305, - "step": 52300 - }, - { - "epoch": 1.3277065617464145, - "grad_norm": 0.34608936309814453, - "learning_rate": 1.114862292169057e-05, - "loss": 0.0451, - "step": 52305 - }, - { - "epoch": 1.3278334814062698, - "grad_norm": 0.21250377595424652, - "learning_rate": 1.1147776790624869e-05, - "loss": 0.0367, - "step": 52310 - }, - { - "epoch": 1.3279604010661252, - "grad_norm": 0.548119843006134, - "learning_rate": 1.1146930659559167e-05, - "loss": 0.0649, - "step": 52315 - }, - { - "epoch": 1.3280873207259805, - "grad_norm": 0.3870033025741577, - "learning_rate": 1.1146084528493464e-05, - "loss": 0.0388, - "step": 52320 - }, - { - "epoch": 1.3282142403858357, - "grad_norm": 0.4258376657962799, - "learning_rate": 1.1145238397427763e-05, - "loss": 0.0415, - "step": 52325 - }, - { - "epoch": 1.3283411600456911, - "grad_norm": 0.3500882685184479, - "learning_rate": 1.1144392266362061e-05, - "loss": 0.0563, - "step": 52330 - }, - { - "epoch": 1.3284680797055464, - "grad_norm": 0.33070123195648193, - "learning_rate": 1.114354613529636e-05, - "loss": 0.0484, - "step": 52335 - }, - { - "epoch": 1.3285949993654018, - "grad_norm": 0.42885544896125793, - "learning_rate": 1.1142700004230656e-05, - "loss": 0.0464, - "step": 52340 - }, - { - "epoch": 1.328721919025257, - "grad_norm": 0.535929799079895, - "learning_rate": 1.1141853873164954e-05, - "loss": 0.0496, - "step": 52345 - }, - { - "epoch": 1.3288488386851123, - "grad_norm": 0.5201472640037537, - "learning_rate": 1.1141007742099253e-05, - "loss": 0.0458, - "step": 52350 - }, - { - "epoch": 1.3289757583449675, - "grad_norm": 0.6322513222694397, - "learning_rate": 1.1140161611033551e-05, - "loss": 0.0416, - "step": 52355 - }, - { - "epoch": 1.329102678004823, - "grad_norm": 0.7035607099533081, - "learning_rate": 1.1139315479967848e-05, - "loss": 0.0559, - "step": 52360 - }, - { - "epoch": 1.3292295976646782, - "grad_norm": 0.6343091130256653, - "learning_rate": 1.1138469348902146e-05, - "loss": 0.0621, - "step": 52365 - }, - { - "epoch": 1.3293565173245336, - "grad_norm": 0.4332626163959503, - "learning_rate": 1.1137623217836445e-05, - "loss": 0.0332, - "step": 52370 - }, - { - "epoch": 1.3294834369843889, - "grad_norm": 0.5000534653663635, - "learning_rate": 1.1136777086770743e-05, - "loss": 0.0363, - "step": 52375 - }, - { - "epoch": 1.329610356644244, - "grad_norm": 0.326675683259964, - "learning_rate": 1.1135930955705038e-05, - "loss": 0.0372, - "step": 52380 - }, - { - "epoch": 1.3297372763040995, - "grad_norm": 0.40810126066207886, - "learning_rate": 1.1135084824639337e-05, - "loss": 0.0392, - "step": 52385 - }, - { - "epoch": 1.3298641959639548, - "grad_norm": 0.4647757411003113, - "learning_rate": 1.1134238693573635e-05, - "loss": 0.0526, - "step": 52390 - }, - { - "epoch": 1.3299911156238102, - "grad_norm": 0.42562225461006165, - "learning_rate": 1.1133392562507933e-05, - "loss": 0.0388, - "step": 52395 - }, - { - "epoch": 1.3301180352836655, - "grad_norm": 0.4812804162502289, - "learning_rate": 1.113254643144223e-05, - "loss": 0.0773, - "step": 52400 - }, - { - "epoch": 1.3302449549435207, - "grad_norm": 0.38087984919548035, - "learning_rate": 1.1131700300376528e-05, - "loss": 0.0404, - "step": 52405 - }, - { - "epoch": 1.3303718746033761, - "grad_norm": 0.387320339679718, - "learning_rate": 1.1130854169310827e-05, - "loss": 0.0439, - "step": 52410 - }, - { - "epoch": 1.3304987942632314, - "grad_norm": 0.3978314697742462, - "learning_rate": 1.1130008038245125e-05, - "loss": 0.0449, - "step": 52415 - }, - { - "epoch": 1.3306257139230868, - "grad_norm": 0.5318277478218079, - "learning_rate": 1.1129161907179422e-05, - "loss": 0.0488, - "step": 52420 - }, - { - "epoch": 1.330752633582942, - "grad_norm": 0.7158482670783997, - "learning_rate": 1.112831577611372e-05, - "loss": 0.057, - "step": 52425 - }, - { - "epoch": 1.3308795532427973, - "grad_norm": 0.41811636090278625, - "learning_rate": 1.1127469645048019e-05, - "loss": 0.0462, - "step": 52430 - }, - { - "epoch": 1.3310064729026525, - "grad_norm": 0.5206167697906494, - "learning_rate": 1.1126623513982317e-05, - "loss": 0.0675, - "step": 52435 - }, - { - "epoch": 1.331133392562508, - "grad_norm": 0.5125089287757874, - "learning_rate": 1.1125777382916614e-05, - "loss": 0.0416, - "step": 52440 - }, - { - "epoch": 1.3312603122223632, - "grad_norm": 0.6721792817115784, - "learning_rate": 1.1124931251850912e-05, - "loss": 0.0507, - "step": 52445 - }, - { - "epoch": 1.3313872318822186, - "grad_norm": 0.4159162640571594, - "learning_rate": 1.112408512078521e-05, - "loss": 0.0375, - "step": 52450 - }, - { - "epoch": 1.3315141515420739, - "grad_norm": 0.7864142060279846, - "learning_rate": 1.1123238989719509e-05, - "loss": 0.0417, - "step": 52455 - }, - { - "epoch": 1.331641071201929, - "grad_norm": 0.5053152441978455, - "learning_rate": 1.1122392858653806e-05, - "loss": 0.0474, - "step": 52460 - }, - { - "epoch": 1.3317679908617845, - "grad_norm": 0.36263588070869446, - "learning_rate": 1.1121546727588104e-05, - "loss": 0.0395, - "step": 52465 - }, - { - "epoch": 1.3318949105216398, - "grad_norm": 0.6311860084533691, - "learning_rate": 1.1120700596522403e-05, - "loss": 0.0585, - "step": 52470 - }, - { - "epoch": 1.3320218301814952, - "grad_norm": 0.5844104886054993, - "learning_rate": 1.1119854465456701e-05, - "loss": 0.0305, - "step": 52475 - }, - { - "epoch": 1.3321487498413505, - "grad_norm": 0.8917750716209412, - "learning_rate": 1.1119008334390998e-05, - "loss": 0.0443, - "step": 52480 - }, - { - "epoch": 1.3322756695012057, - "grad_norm": 0.43973079323768616, - "learning_rate": 1.1118162203325296e-05, - "loss": 0.0363, - "step": 52485 - }, - { - "epoch": 1.332402589161061, - "grad_norm": 0.4064072370529175, - "learning_rate": 1.1117316072259595e-05, - "loss": 0.0406, - "step": 52490 - }, - { - "epoch": 1.3325295088209164, - "grad_norm": 0.5464910864830017, - "learning_rate": 1.1116469941193893e-05, - "loss": 0.046, - "step": 52495 - }, - { - "epoch": 1.3326564284807716, - "grad_norm": 0.6597504615783691, - "learning_rate": 1.1115623810128188e-05, - "loss": 0.04, - "step": 52500 - }, - { - "epoch": 1.332783348140627, - "grad_norm": 0.231491819024086, - "learning_rate": 1.1114777679062488e-05, - "loss": 0.0289, - "step": 52505 - }, - { - "epoch": 1.3329102678004823, - "grad_norm": 0.3438830077648163, - "learning_rate": 1.1113931547996786e-05, - "loss": 0.0419, - "step": 52510 - }, - { - "epoch": 1.3330371874603375, - "grad_norm": 0.4861186146736145, - "learning_rate": 1.1113085416931085e-05, - "loss": 0.054, - "step": 52515 - }, - { - "epoch": 1.333164107120193, - "grad_norm": 0.6242367029190063, - "learning_rate": 1.111223928586538e-05, - "loss": 0.0556, - "step": 52520 - }, - { - "epoch": 1.3332910267800482, - "grad_norm": 0.24497170746326447, - "learning_rate": 1.1111393154799678e-05, - "loss": 0.0356, - "step": 52525 - }, - { - "epoch": 1.3334179464399036, - "grad_norm": 0.34950006008148193, - "learning_rate": 1.1110547023733977e-05, - "loss": 0.032, - "step": 52530 - }, - { - "epoch": 1.3335448660997589, - "grad_norm": 0.7477009892463684, - "learning_rate": 1.1109700892668275e-05, - "loss": 0.069, - "step": 52535 - }, - { - "epoch": 1.333671785759614, - "grad_norm": 0.5540804266929626, - "learning_rate": 1.1108854761602572e-05, - "loss": 0.0374, - "step": 52540 - }, - { - "epoch": 1.3337987054194695, - "grad_norm": 0.5038281083106995, - "learning_rate": 1.110800863053687e-05, - "loss": 0.0558, - "step": 52545 - }, - { - "epoch": 1.3339256250793248, - "grad_norm": 0.6040815114974976, - "learning_rate": 1.1107162499471169e-05, - "loss": 0.0507, - "step": 52550 - }, - { - "epoch": 1.3340525447391802, - "grad_norm": 0.4221320152282715, - "learning_rate": 1.1106316368405467e-05, - "loss": 0.0353, - "step": 52555 - }, - { - "epoch": 1.3341794643990355, - "grad_norm": 0.6520642638206482, - "learning_rate": 1.1105470237339764e-05, - "loss": 0.0544, - "step": 52560 - }, - { - "epoch": 1.3343063840588907, - "grad_norm": 0.34965091943740845, - "learning_rate": 1.1104624106274062e-05, - "loss": 0.047, - "step": 52565 - }, - { - "epoch": 1.334433303718746, - "grad_norm": 0.49959617853164673, - "learning_rate": 1.110377797520836e-05, - "loss": 0.0412, - "step": 52570 - }, - { - "epoch": 1.3345602233786014, - "grad_norm": 0.37894871830940247, - "learning_rate": 1.1102931844142659e-05, - "loss": 0.0405, - "step": 52575 - }, - { - "epoch": 1.3346871430384566, - "grad_norm": 0.4863879084587097, - "learning_rate": 1.1102085713076956e-05, - "loss": 0.0548, - "step": 52580 - }, - { - "epoch": 1.334814062698312, - "grad_norm": 0.48451393842697144, - "learning_rate": 1.1101239582011254e-05, - "loss": 0.0676, - "step": 52585 - }, - { - "epoch": 1.3349409823581673, - "grad_norm": 0.7893093824386597, - "learning_rate": 1.1100393450945552e-05, - "loss": 0.0501, - "step": 52590 - }, - { - "epoch": 1.3350679020180225, - "grad_norm": 0.3295591473579407, - "learning_rate": 1.109954731987985e-05, - "loss": 0.0401, - "step": 52595 - }, - { - "epoch": 1.335194821677878, - "grad_norm": 0.2455197274684906, - "learning_rate": 1.1098701188814148e-05, - "loss": 0.0409, - "step": 52600 - }, - { - "epoch": 1.3353217413377332, - "grad_norm": 0.6198737025260925, - "learning_rate": 1.1097855057748446e-05, - "loss": 0.0468, - "step": 52605 - }, - { - "epoch": 1.3354486609975886, - "grad_norm": 0.6078842878341675, - "learning_rate": 1.1097008926682744e-05, - "loss": 0.0358, - "step": 52610 - }, - { - "epoch": 1.3355755806574439, - "grad_norm": 0.5498015880584717, - "learning_rate": 1.1096162795617043e-05, - "loss": 0.059, - "step": 52615 - }, - { - "epoch": 1.335702500317299, - "grad_norm": 0.5896313786506653, - "learning_rate": 1.109531666455134e-05, - "loss": 0.0485, - "step": 52620 - }, - { - "epoch": 1.3358294199771545, - "grad_norm": 0.4376700222492218, - "learning_rate": 1.1094470533485638e-05, - "loss": 0.0622, - "step": 52625 - }, - { - "epoch": 1.3359563396370098, - "grad_norm": 0.5324379205703735, - "learning_rate": 1.1093624402419936e-05, - "loss": 0.0434, - "step": 52630 - }, - { - "epoch": 1.3360832592968652, - "grad_norm": 0.5381837487220764, - "learning_rate": 1.1092778271354235e-05, - "loss": 0.0555, - "step": 52635 - }, - { - "epoch": 1.3362101789567205, - "grad_norm": 0.44530603289604187, - "learning_rate": 1.109193214028853e-05, - "loss": 0.0716, - "step": 52640 - }, - { - "epoch": 1.3363370986165757, - "grad_norm": 0.42441943287849426, - "learning_rate": 1.109108600922283e-05, - "loss": 0.0562, - "step": 52645 - }, - { - "epoch": 1.336464018276431, - "grad_norm": 0.21617455780506134, - "learning_rate": 1.1090239878157128e-05, - "loss": 0.0387, - "step": 52650 - }, - { - "epoch": 1.3365909379362864, - "grad_norm": 0.42741158604621887, - "learning_rate": 1.1089393747091427e-05, - "loss": 0.05, - "step": 52655 - }, - { - "epoch": 1.3367178575961416, - "grad_norm": 1.1569139957427979, - "learning_rate": 1.1088547616025725e-05, - "loss": 0.0347, - "step": 52660 - }, - { - "epoch": 1.336844777255997, - "grad_norm": 0.17814187705516815, - "learning_rate": 1.108770148496002e-05, - "loss": 0.034, - "step": 52665 - }, - { - "epoch": 1.3369716969158523, - "grad_norm": 0.446382075548172, - "learning_rate": 1.1086855353894318e-05, - "loss": 0.0561, - "step": 52670 - }, - { - "epoch": 1.3370986165757075, - "grad_norm": 0.40200573205947876, - "learning_rate": 1.1086009222828617e-05, - "loss": 0.0362, - "step": 52675 - }, - { - "epoch": 1.337225536235563, - "grad_norm": 0.46044519543647766, - "learning_rate": 1.1085163091762917e-05, - "loss": 0.046, - "step": 52680 - }, - { - "epoch": 1.3373524558954182, - "grad_norm": 0.37783125042915344, - "learning_rate": 1.1084316960697212e-05, - "loss": 0.035, - "step": 52685 - }, - { - "epoch": 1.3374793755552736, - "grad_norm": 0.6366821527481079, - "learning_rate": 1.108347082963151e-05, - "loss": 0.0663, - "step": 52690 - }, - { - "epoch": 1.3376062952151289, - "grad_norm": 0.4284309446811676, - "learning_rate": 1.1082624698565809e-05, - "loss": 0.0546, - "step": 52695 - }, - { - "epoch": 1.337733214874984, - "grad_norm": 0.4953398108482361, - "learning_rate": 1.1081778567500107e-05, - "loss": 0.0588, - "step": 52700 - }, - { - "epoch": 1.3378601345348393, - "grad_norm": 0.4262517988681793, - "learning_rate": 1.1080932436434404e-05, - "loss": 0.0426, - "step": 52705 - }, - { - "epoch": 1.3379870541946948, - "grad_norm": 0.26062002778053284, - "learning_rate": 1.1080086305368702e-05, - "loss": 0.0517, - "step": 52710 - }, - { - "epoch": 1.33811397385455, - "grad_norm": 0.6285046935081482, - "learning_rate": 1.1079240174303e-05, - "loss": 0.0391, - "step": 52715 - }, - { - "epoch": 1.3382408935144054, - "grad_norm": 0.7298570871353149, - "learning_rate": 1.1078394043237299e-05, - "loss": 0.0471, - "step": 52720 - }, - { - "epoch": 1.3383678131742607, - "grad_norm": 0.5900947451591492, - "learning_rate": 1.1077547912171596e-05, - "loss": 0.0384, - "step": 52725 - }, - { - "epoch": 1.338494732834116, - "grad_norm": 0.5464057326316833, - "learning_rate": 1.1076701781105894e-05, - "loss": 0.0499, - "step": 52730 - }, - { - "epoch": 1.3386216524939714, - "grad_norm": 0.3692372739315033, - "learning_rate": 1.1075855650040193e-05, - "loss": 0.0617, - "step": 52735 - }, - { - "epoch": 1.3387485721538266, - "grad_norm": 0.6021597981452942, - "learning_rate": 1.1075009518974491e-05, - "loss": 0.0511, - "step": 52740 - }, - { - "epoch": 1.338875491813682, - "grad_norm": 0.38757970929145813, - "learning_rate": 1.1074163387908788e-05, - "loss": 0.0427, - "step": 52745 - }, - { - "epoch": 1.3390024114735373, - "grad_norm": 0.428615927696228, - "learning_rate": 1.1073317256843086e-05, - "loss": 0.042, - "step": 52750 - }, - { - "epoch": 1.3391293311333925, - "grad_norm": 0.35450947284698486, - "learning_rate": 1.1072471125777384e-05, - "loss": 0.0329, - "step": 52755 - }, - { - "epoch": 1.339256250793248, - "grad_norm": 0.41152021288871765, - "learning_rate": 1.1071624994711683e-05, - "loss": 0.0435, - "step": 52760 - }, - { - "epoch": 1.3393831704531032, - "grad_norm": 0.31707775592803955, - "learning_rate": 1.107077886364598e-05, - "loss": 0.0387, - "step": 52765 - }, - { - "epoch": 1.3395100901129586, - "grad_norm": 0.8103160262107849, - "learning_rate": 1.1069932732580278e-05, - "loss": 0.0556, - "step": 52770 - }, - { - "epoch": 1.3396370097728139, - "grad_norm": 0.3172599673271179, - "learning_rate": 1.1069086601514576e-05, - "loss": 0.0596, - "step": 52775 - }, - { - "epoch": 1.339763929432669, - "grad_norm": 0.36891868710517883, - "learning_rate": 1.1068240470448875e-05, - "loss": 0.0435, - "step": 52780 - }, - { - "epoch": 1.3398908490925243, - "grad_norm": 0.3704805374145508, - "learning_rate": 1.1067394339383171e-05, - "loss": 0.038, - "step": 52785 - }, - { - "epoch": 1.3400177687523798, - "grad_norm": 0.5198268890380859, - "learning_rate": 1.106654820831747e-05, - "loss": 0.0675, - "step": 52790 - }, - { - "epoch": 1.340144688412235, - "grad_norm": 0.3371961712837219, - "learning_rate": 1.1065702077251768e-05, - "loss": 0.0582, - "step": 52795 - }, - { - "epoch": 1.3402716080720904, - "grad_norm": 0.34088411927223206, - "learning_rate": 1.1064855946186067e-05, - "loss": 0.0388, - "step": 52800 - }, - { - "epoch": 1.3403985277319457, - "grad_norm": 0.49113765358924866, - "learning_rate": 1.1064009815120362e-05, - "loss": 0.0418, - "step": 52805 - }, - { - "epoch": 1.340525447391801, - "grad_norm": 0.5627961158752441, - "learning_rate": 1.106316368405466e-05, - "loss": 0.0533, - "step": 52810 - }, - { - "epoch": 1.3406523670516564, - "grad_norm": 0.5284172892570496, - "learning_rate": 1.1062317552988958e-05, - "loss": 0.0636, - "step": 52815 - }, - { - "epoch": 1.3407792867115116, - "grad_norm": 0.5894677042961121, - "learning_rate": 1.1061471421923257e-05, - "loss": 0.0616, - "step": 52820 - }, - { - "epoch": 1.340906206371367, - "grad_norm": 0.764873206615448, - "learning_rate": 1.1060625290857554e-05, - "loss": 0.0539, - "step": 52825 - }, - { - "epoch": 1.3410331260312223, - "grad_norm": 0.4973489046096802, - "learning_rate": 1.1059779159791852e-05, - "loss": 0.0571, - "step": 52830 - }, - { - "epoch": 1.3411600456910775, - "grad_norm": 0.6249152421951294, - "learning_rate": 1.105893302872615e-05, - "loss": 0.0399, - "step": 52835 - }, - { - "epoch": 1.3412869653509327, - "grad_norm": 0.696834921836853, - "learning_rate": 1.1058086897660449e-05, - "loss": 0.0492, - "step": 52840 - }, - { - "epoch": 1.3414138850107882, - "grad_norm": 0.5117500424385071, - "learning_rate": 1.1057240766594746e-05, - "loss": 0.0387, - "step": 52845 - }, - { - "epoch": 1.3415408046706434, - "grad_norm": 0.5268440246582031, - "learning_rate": 1.1056394635529044e-05, - "loss": 0.0517, - "step": 52850 - }, - { - "epoch": 1.3416677243304989, - "grad_norm": 0.3537476658821106, - "learning_rate": 1.1055548504463342e-05, - "loss": 0.0556, - "step": 52855 - }, - { - "epoch": 1.341794643990354, - "grad_norm": 0.5214223861694336, - "learning_rate": 1.105470237339764e-05, - "loss": 0.0453, - "step": 52860 - }, - { - "epoch": 1.3419215636502093, - "grad_norm": 0.5020655989646912, - "learning_rate": 1.1053856242331937e-05, - "loss": 0.0485, - "step": 52865 - }, - { - "epoch": 1.3420484833100648, - "grad_norm": 0.5933429598808289, - "learning_rate": 1.1053010111266236e-05, - "loss": 0.0505, - "step": 52870 - }, - { - "epoch": 1.34217540296992, - "grad_norm": 0.49892038106918335, - "learning_rate": 1.1052163980200534e-05, - "loss": 0.062, - "step": 52875 - }, - { - "epoch": 1.3423023226297754, - "grad_norm": 0.42024311423301697, - "learning_rate": 1.1051317849134833e-05, - "loss": 0.0464, - "step": 52880 - }, - { - "epoch": 1.3424292422896307, - "grad_norm": 0.36293825507164, - "learning_rate": 1.105047171806913e-05, - "loss": 0.0514, - "step": 52885 - }, - { - "epoch": 1.342556161949486, - "grad_norm": 0.5293930172920227, - "learning_rate": 1.1049625587003428e-05, - "loss": 0.0458, - "step": 52890 - }, - { - "epoch": 1.3426830816093414, - "grad_norm": 0.43361079692840576, - "learning_rate": 1.1048779455937726e-05, - "loss": 0.0466, - "step": 52895 - }, - { - "epoch": 1.3428100012691966, - "grad_norm": 0.8886879682540894, - "learning_rate": 1.1047933324872025e-05, - "loss": 0.0543, - "step": 52900 - }, - { - "epoch": 1.342936920929052, - "grad_norm": 0.42494216561317444, - "learning_rate": 1.1047087193806321e-05, - "loss": 0.0431, - "step": 52905 - }, - { - "epoch": 1.3430638405889073, - "grad_norm": 0.3489678204059601, - "learning_rate": 1.104624106274062e-05, - "loss": 0.0585, - "step": 52910 - }, - { - "epoch": 1.3431907602487625, - "grad_norm": 0.3530276417732239, - "learning_rate": 1.1045394931674918e-05, - "loss": 0.0493, - "step": 52915 - }, - { - "epoch": 1.3433176799086177, - "grad_norm": 0.35376977920532227, - "learning_rate": 1.1044548800609216e-05, - "loss": 0.0407, - "step": 52920 - }, - { - "epoch": 1.3434445995684732, - "grad_norm": 0.543273389339447, - "learning_rate": 1.1043702669543513e-05, - "loss": 0.0577, - "step": 52925 - }, - { - "epoch": 1.3435715192283284, - "grad_norm": 0.5101701617240906, - "learning_rate": 1.1042856538477812e-05, - "loss": 0.0578, - "step": 52930 - }, - { - "epoch": 1.3436984388881839, - "grad_norm": 0.3993208706378937, - "learning_rate": 1.104201040741211e-05, - "loss": 0.0475, - "step": 52935 - }, - { - "epoch": 1.343825358548039, - "grad_norm": 0.48680874705314636, - "learning_rate": 1.1041164276346408e-05, - "loss": 0.0525, - "step": 52940 - }, - { - "epoch": 1.3439522782078943, - "grad_norm": 0.3107956647872925, - "learning_rate": 1.1040318145280703e-05, - "loss": 0.0489, - "step": 52945 - }, - { - "epoch": 1.3440791978677498, - "grad_norm": 0.5420413017272949, - "learning_rate": 1.1039472014215002e-05, - "loss": 0.0483, - "step": 52950 - }, - { - "epoch": 1.344206117527605, - "grad_norm": 0.34761297702789307, - "learning_rate": 1.10386258831493e-05, - "loss": 0.0342, - "step": 52955 - }, - { - "epoch": 1.3443330371874604, - "grad_norm": 1.4526066780090332, - "learning_rate": 1.1037779752083599e-05, - "loss": 0.0394, - "step": 52960 - }, - { - "epoch": 1.3444599568473157, - "grad_norm": 1.1488935947418213, - "learning_rate": 1.1036933621017895e-05, - "loss": 0.0548, - "step": 52965 - }, - { - "epoch": 1.344586876507171, - "grad_norm": 0.3175216019153595, - "learning_rate": 1.1036087489952194e-05, - "loss": 0.0369, - "step": 52970 - }, - { - "epoch": 1.3447137961670264, - "grad_norm": 0.5850012302398682, - "learning_rate": 1.1035241358886492e-05, - "loss": 0.0407, - "step": 52975 - }, - { - "epoch": 1.3448407158268816, - "grad_norm": 0.3800737261772156, - "learning_rate": 1.103439522782079e-05, - "loss": 0.0348, - "step": 52980 - }, - { - "epoch": 1.344967635486737, - "grad_norm": 0.8202928900718689, - "learning_rate": 1.1033549096755087e-05, - "loss": 0.0449, - "step": 52985 - }, - { - "epoch": 1.3450945551465923, - "grad_norm": 0.5413805246353149, - "learning_rate": 1.1032702965689386e-05, - "loss": 0.0473, - "step": 52990 - }, - { - "epoch": 1.3452214748064475, - "grad_norm": 0.2781500518321991, - "learning_rate": 1.1031856834623684e-05, - "loss": 0.0381, - "step": 52995 - }, - { - "epoch": 1.3453483944663027, - "grad_norm": 0.33299726247787476, - "learning_rate": 1.1031010703557982e-05, - "loss": 0.0447, - "step": 53000 - }, - { - "epoch": 1.3454753141261582, - "grad_norm": 0.4322962462902069, - "learning_rate": 1.1030164572492279e-05, - "loss": 0.0489, - "step": 53005 - }, - { - "epoch": 1.3456022337860134, - "grad_norm": 0.3978603184223175, - "learning_rate": 1.1029318441426578e-05, - "loss": 0.0463, - "step": 53010 - }, - { - "epoch": 1.3457291534458689, - "grad_norm": 0.4901110529899597, - "learning_rate": 1.1028472310360876e-05, - "loss": 0.0539, - "step": 53015 - }, - { - "epoch": 1.345856073105724, - "grad_norm": 0.4323243796825409, - "learning_rate": 1.1027626179295174e-05, - "loss": 0.0336, - "step": 53020 - }, - { - "epoch": 1.3459829927655793, - "grad_norm": 0.567152738571167, - "learning_rate": 1.1026780048229471e-05, - "loss": 0.0444, - "step": 53025 - }, - { - "epoch": 1.3461099124254348, - "grad_norm": 0.3241184651851654, - "learning_rate": 1.102593391716377e-05, - "loss": 0.0412, - "step": 53030 - }, - { - "epoch": 1.34623683208529, - "grad_norm": 0.4159352481365204, - "learning_rate": 1.1025087786098068e-05, - "loss": 0.058, - "step": 53035 - }, - { - "epoch": 1.3463637517451454, - "grad_norm": 0.6665406227111816, - "learning_rate": 1.1024241655032366e-05, - "loss": 0.0348, - "step": 53040 - }, - { - "epoch": 1.3464906714050007, - "grad_norm": 0.36050543189048767, - "learning_rate": 1.1023395523966663e-05, - "loss": 0.0393, - "step": 53045 - }, - { - "epoch": 1.346617591064856, - "grad_norm": 0.44552376866340637, - "learning_rate": 1.1022549392900961e-05, - "loss": 0.0567, - "step": 53050 - }, - { - "epoch": 1.3467445107247111, - "grad_norm": 0.4481447637081146, - "learning_rate": 1.102170326183526e-05, - "loss": 0.0481, - "step": 53055 - }, - { - "epoch": 1.3468714303845666, - "grad_norm": 0.43383464217185974, - "learning_rate": 1.1020857130769558e-05, - "loss": 0.0491, - "step": 53060 - }, - { - "epoch": 1.3469983500444218, - "grad_norm": 0.5679055452346802, - "learning_rate": 1.1020010999703853e-05, - "loss": 0.0418, - "step": 53065 - }, - { - "epoch": 1.3471252697042773, - "grad_norm": 0.27192381024360657, - "learning_rate": 1.1019164868638153e-05, - "loss": 0.0508, - "step": 53070 - }, - { - "epoch": 1.3472521893641325, - "grad_norm": 0.7341122627258301, - "learning_rate": 1.1018318737572452e-05, - "loss": 0.0552, - "step": 53075 - }, - { - "epoch": 1.3473791090239877, - "grad_norm": 0.49983227252960205, - "learning_rate": 1.101747260650675e-05, - "loss": 0.0407, - "step": 53080 - }, - { - "epoch": 1.3475060286838432, - "grad_norm": 0.4972081780433655, - "learning_rate": 1.1016626475441045e-05, - "loss": 0.0483, - "step": 53085 - }, - { - "epoch": 1.3476329483436984, - "grad_norm": 0.7382157444953918, - "learning_rate": 1.1015780344375343e-05, - "loss": 0.0634, - "step": 53090 - }, - { - "epoch": 1.3477598680035539, - "grad_norm": 0.6699075698852539, - "learning_rate": 1.1014934213309642e-05, - "loss": 0.0408, - "step": 53095 - }, - { - "epoch": 1.347886787663409, - "grad_norm": 0.3825647234916687, - "learning_rate": 1.101408808224394e-05, - "loss": 0.0423, - "step": 53100 - }, - { - "epoch": 1.3480137073232643, - "grad_norm": 0.342568576335907, - "learning_rate": 1.1013241951178237e-05, - "loss": 0.0547, - "step": 53105 - }, - { - "epoch": 1.3481406269831198, - "grad_norm": 0.6396322250366211, - "learning_rate": 1.1012395820112535e-05, - "loss": 0.047, - "step": 53110 - }, - { - "epoch": 1.348267546642975, - "grad_norm": 0.3856010437011719, - "learning_rate": 1.1011549689046834e-05, - "loss": 0.0285, - "step": 53115 - }, - { - "epoch": 1.3483944663028304, - "grad_norm": 0.3921366333961487, - "learning_rate": 1.1010703557981132e-05, - "loss": 0.0478, - "step": 53120 - }, - { - "epoch": 1.3485213859626857, - "grad_norm": 0.36515989899635315, - "learning_rate": 1.1009857426915429e-05, - "loss": 0.0385, - "step": 53125 - }, - { - "epoch": 1.348648305622541, - "grad_norm": 0.8691408038139343, - "learning_rate": 1.1009011295849727e-05, - "loss": 0.0491, - "step": 53130 - }, - { - "epoch": 1.3487752252823961, - "grad_norm": 0.43723711371421814, - "learning_rate": 1.1008165164784026e-05, - "loss": 0.0565, - "step": 53135 - }, - { - "epoch": 1.3489021449422516, - "grad_norm": 1.0473073720932007, - "learning_rate": 1.1007319033718324e-05, - "loss": 0.0291, - "step": 53140 - }, - { - "epoch": 1.3490290646021068, - "grad_norm": 0.35291609168052673, - "learning_rate": 1.100647290265262e-05, - "loss": 0.0396, - "step": 53145 - }, - { - "epoch": 1.3491559842619623, - "grad_norm": 0.32374483346939087, - "learning_rate": 1.100562677158692e-05, - "loss": 0.0441, - "step": 53150 - }, - { - "epoch": 1.3492829039218175, - "grad_norm": 0.6807259321212769, - "learning_rate": 1.1004780640521218e-05, - "loss": 0.0501, - "step": 53155 - }, - { - "epoch": 1.3494098235816727, - "grad_norm": 0.3930251896381378, - "learning_rate": 1.1003934509455516e-05, - "loss": 0.0438, - "step": 53160 - }, - { - "epoch": 1.3495367432415282, - "grad_norm": 1.3577582836151123, - "learning_rate": 1.1003088378389814e-05, - "loss": 0.0486, - "step": 53165 - }, - { - "epoch": 1.3496636629013834, - "grad_norm": 0.8024559617042542, - "learning_rate": 1.1002242247324111e-05, - "loss": 0.0534, - "step": 53170 - }, - { - "epoch": 1.3497905825612388, - "grad_norm": 0.7474536299705505, - "learning_rate": 1.100139611625841e-05, - "loss": 0.0406, - "step": 53175 - }, - { - "epoch": 1.349917502221094, - "grad_norm": 0.4460926353931427, - "learning_rate": 1.1000549985192708e-05, - "loss": 0.0316, - "step": 53180 - }, - { - "epoch": 1.3500444218809493, - "grad_norm": 0.5796162486076355, - "learning_rate": 1.0999703854127006e-05, - "loss": 0.0392, - "step": 53185 - }, - { - "epoch": 1.3501713415408045, - "grad_norm": 0.356139600276947, - "learning_rate": 1.0998857723061303e-05, - "loss": 0.0535, - "step": 53190 - }, - { - "epoch": 1.35029826120066, - "grad_norm": 0.49730184674263, - "learning_rate": 1.0998011591995601e-05, - "loss": 0.0579, - "step": 53195 - }, - { - "epoch": 1.3504251808605152, - "grad_norm": 0.36840641498565674, - "learning_rate": 1.09971654609299e-05, - "loss": 0.034, - "step": 53200 - }, - { - "epoch": 1.3505521005203707, - "grad_norm": 0.3170594871044159, - "learning_rate": 1.0996319329864198e-05, - "loss": 0.0532, - "step": 53205 - }, - { - "epoch": 1.350679020180226, - "grad_norm": 0.46470481157302856, - "learning_rate": 1.0995473198798495e-05, - "loss": 0.0366, - "step": 53210 - }, - { - "epoch": 1.3508059398400811, - "grad_norm": 0.7157647013664246, - "learning_rate": 1.0994627067732793e-05, - "loss": 0.0453, - "step": 53215 - }, - { - "epoch": 1.3509328594999366, - "grad_norm": 0.6086739897727966, - "learning_rate": 1.0993780936667092e-05, - "loss": 0.0335, - "step": 53220 - }, - { - "epoch": 1.3510597791597918, - "grad_norm": 0.4176923930644989, - "learning_rate": 1.099293480560139e-05, - "loss": 0.0408, - "step": 53225 - }, - { - "epoch": 1.3511866988196473, - "grad_norm": 0.310525506734848, - "learning_rate": 1.0992088674535685e-05, - "loss": 0.033, - "step": 53230 - }, - { - "epoch": 1.3513136184795025, - "grad_norm": 0.5805631279945374, - "learning_rate": 1.0991242543469984e-05, - "loss": 0.0423, - "step": 53235 - }, - { - "epoch": 1.3514405381393577, - "grad_norm": 0.3773474097251892, - "learning_rate": 1.0990396412404282e-05, - "loss": 0.0421, - "step": 53240 - }, - { - "epoch": 1.3515674577992132, - "grad_norm": 0.38054558634757996, - "learning_rate": 1.0989550281338582e-05, - "loss": 0.0798, - "step": 53245 - }, - { - "epoch": 1.3516943774590684, - "grad_norm": 0.5591832995414734, - "learning_rate": 1.0988704150272877e-05, - "loss": 0.0616, - "step": 53250 - }, - { - "epoch": 1.3518212971189238, - "grad_norm": 0.560260534286499, - "learning_rate": 1.0987858019207176e-05, - "loss": 0.0559, - "step": 53255 - }, - { - "epoch": 1.351948216778779, - "grad_norm": 0.3620752692222595, - "learning_rate": 1.0987011888141474e-05, - "loss": 0.0417, - "step": 53260 - }, - { - "epoch": 1.3520751364386343, - "grad_norm": 0.515896737575531, - "learning_rate": 1.0986165757075772e-05, - "loss": 0.0411, - "step": 53265 - }, - { - "epoch": 1.3522020560984895, - "grad_norm": 0.49279922246932983, - "learning_rate": 1.0985319626010069e-05, - "loss": 0.0496, - "step": 53270 - }, - { - "epoch": 1.352328975758345, - "grad_norm": 0.36664626002311707, - "learning_rate": 1.0984473494944367e-05, - "loss": 0.0605, - "step": 53275 - }, - { - "epoch": 1.3524558954182002, - "grad_norm": 0.6199666261672974, - "learning_rate": 1.0983627363878666e-05, - "loss": 0.0606, - "step": 53280 - }, - { - "epoch": 1.3525828150780557, - "grad_norm": 0.5378656983375549, - "learning_rate": 1.0982781232812964e-05, - "loss": 0.0374, - "step": 53285 - }, - { - "epoch": 1.352709734737911, - "grad_norm": 0.4341100752353668, - "learning_rate": 1.0981935101747261e-05, - "loss": 0.0559, - "step": 53290 - }, - { - "epoch": 1.3528366543977661, - "grad_norm": 0.47434118390083313, - "learning_rate": 1.098108897068156e-05, - "loss": 0.0599, - "step": 53295 - }, - { - "epoch": 1.3529635740576216, - "grad_norm": 0.5288444757461548, - "learning_rate": 1.0980242839615858e-05, - "loss": 0.0326, - "step": 53300 - }, - { - "epoch": 1.3530904937174768, - "grad_norm": 0.5931593179702759, - "learning_rate": 1.0979396708550156e-05, - "loss": 0.0528, - "step": 53305 - }, - { - "epoch": 1.3532174133773323, - "grad_norm": 0.4350128769874573, - "learning_rate": 1.0978550577484453e-05, - "loss": 0.0477, - "step": 53310 - }, - { - "epoch": 1.3533443330371875, - "grad_norm": 0.5422735810279846, - "learning_rate": 1.0977704446418751e-05, - "loss": 0.0572, - "step": 53315 - }, - { - "epoch": 1.3534712526970427, - "grad_norm": 0.43845710158348083, - "learning_rate": 1.097685831535305e-05, - "loss": 0.0375, - "step": 53320 - }, - { - "epoch": 1.3535981723568982, - "grad_norm": 0.4326101541519165, - "learning_rate": 1.0976012184287348e-05, - "loss": 0.0558, - "step": 53325 - }, - { - "epoch": 1.3537250920167534, - "grad_norm": 0.40490272641181946, - "learning_rate": 1.0975166053221645e-05, - "loss": 0.0409, - "step": 53330 - }, - { - "epoch": 1.3538520116766086, - "grad_norm": 0.7901862859725952, - "learning_rate": 1.0974319922155943e-05, - "loss": 0.0416, - "step": 53335 - }, - { - "epoch": 1.353978931336464, - "grad_norm": 0.7654166221618652, - "learning_rate": 1.0973473791090242e-05, - "loss": 0.051, - "step": 53340 - }, - { - "epoch": 1.3541058509963193, - "grad_norm": 0.4073667824268341, - "learning_rate": 1.097262766002454e-05, - "loss": 0.0384, - "step": 53345 - }, - { - "epoch": 1.3542327706561745, - "grad_norm": 0.4571843445301056, - "learning_rate": 1.0971781528958837e-05, - "loss": 0.0478, - "step": 53350 - }, - { - "epoch": 1.35435969031603, - "grad_norm": 0.6812313199043274, - "learning_rate": 1.0970935397893135e-05, - "loss": 0.0443, - "step": 53355 - }, - { - "epoch": 1.3544866099758852, - "grad_norm": 0.5831789374351501, - "learning_rate": 1.0970089266827433e-05, - "loss": 0.0317, - "step": 53360 - }, - { - "epoch": 1.3546135296357407, - "grad_norm": 0.5341454744338989, - "learning_rate": 1.0969243135761732e-05, - "loss": 0.0428, - "step": 53365 - }, - { - "epoch": 1.354740449295596, - "grad_norm": 1.1246310472488403, - "learning_rate": 1.0968397004696027e-05, - "loss": 0.0582, - "step": 53370 - }, - { - "epoch": 1.3548673689554511, - "grad_norm": 0.5167511701583862, - "learning_rate": 1.0967550873630325e-05, - "loss": 0.0495, - "step": 53375 - }, - { - "epoch": 1.3549942886153066, - "grad_norm": 0.824572741985321, - "learning_rate": 1.0966704742564624e-05, - "loss": 0.0543, - "step": 53380 - }, - { - "epoch": 1.3551212082751618, - "grad_norm": 0.8822797536849976, - "learning_rate": 1.0965858611498922e-05, - "loss": 0.0335, - "step": 53385 - }, - { - "epoch": 1.3552481279350173, - "grad_norm": 0.4675166606903076, - "learning_rate": 1.0965012480433219e-05, - "loss": 0.0535, - "step": 53390 - }, - { - "epoch": 1.3553750475948725, - "grad_norm": 0.42688530683517456, - "learning_rate": 1.0964166349367517e-05, - "loss": 0.064, - "step": 53395 - }, - { - "epoch": 1.3555019672547277, - "grad_norm": 0.4483187794685364, - "learning_rate": 1.0963320218301816e-05, - "loss": 0.0369, - "step": 53400 - }, - { - "epoch": 1.355628886914583, - "grad_norm": 0.35744476318359375, - "learning_rate": 1.0962474087236114e-05, - "loss": 0.0571, - "step": 53405 - }, - { - "epoch": 1.3557558065744384, - "grad_norm": 0.49724072217941284, - "learning_rate": 1.096162795617041e-05, - "loss": 0.0564, - "step": 53410 - }, - { - "epoch": 1.3558827262342936, - "grad_norm": 0.43220216035842896, - "learning_rate": 1.0960781825104709e-05, - "loss": 0.0566, - "step": 53415 - }, - { - "epoch": 1.356009645894149, - "grad_norm": 0.9348387122154236, - "learning_rate": 1.0959935694039008e-05, - "loss": 0.0419, - "step": 53420 - }, - { - "epoch": 1.3561365655540043, - "grad_norm": 0.5424503087997437, - "learning_rate": 1.0959089562973306e-05, - "loss": 0.0656, - "step": 53425 - }, - { - "epoch": 1.3562634852138595, - "grad_norm": 0.5812525749206543, - "learning_rate": 1.0958243431907603e-05, - "loss": 0.0514, - "step": 53430 - }, - { - "epoch": 1.356390404873715, - "grad_norm": 0.3814697563648224, - "learning_rate": 1.0957397300841901e-05, - "loss": 0.0415, - "step": 53435 - }, - { - "epoch": 1.3565173245335702, - "grad_norm": 0.5344324707984924, - "learning_rate": 1.09565511697762e-05, - "loss": 0.0406, - "step": 53440 - }, - { - "epoch": 1.3566442441934257, - "grad_norm": 0.3382473289966583, - "learning_rate": 1.0955705038710498e-05, - "loss": 0.0584, - "step": 53445 - }, - { - "epoch": 1.356771163853281, - "grad_norm": 0.48940780758857727, - "learning_rate": 1.0954858907644795e-05, - "loss": 0.066, - "step": 53450 - }, - { - "epoch": 1.3568980835131361, - "grad_norm": 0.42757534980773926, - "learning_rate": 1.0954012776579093e-05, - "loss": 0.0519, - "step": 53455 - }, - { - "epoch": 1.3570250031729916, - "grad_norm": 0.7245476841926575, - "learning_rate": 1.0953166645513391e-05, - "loss": 0.0415, - "step": 53460 - }, - { - "epoch": 1.3571519228328468, - "grad_norm": 0.3575679063796997, - "learning_rate": 1.095232051444769e-05, - "loss": 0.0337, - "step": 53465 - }, - { - "epoch": 1.3572788424927023, - "grad_norm": 0.37606191635131836, - "learning_rate": 1.0951474383381986e-05, - "loss": 0.0487, - "step": 53470 - }, - { - "epoch": 1.3574057621525575, - "grad_norm": 0.37248948216438293, - "learning_rate": 1.0950628252316285e-05, - "loss": 0.0542, - "step": 53475 - }, - { - "epoch": 1.3575326818124127, - "grad_norm": 0.3421197831630707, - "learning_rate": 1.0949782121250583e-05, - "loss": 0.0335, - "step": 53480 - }, - { - "epoch": 1.357659601472268, - "grad_norm": 0.5673224925994873, - "learning_rate": 1.0948935990184882e-05, - "loss": 0.0664, - "step": 53485 - }, - { - "epoch": 1.3577865211321234, - "grad_norm": 0.8562920689582825, - "learning_rate": 1.0948089859119178e-05, - "loss": 0.0676, - "step": 53490 - }, - { - "epoch": 1.3579134407919786, - "grad_norm": 0.4336473345756531, - "learning_rate": 1.0947243728053477e-05, - "loss": 0.051, - "step": 53495 - }, - { - "epoch": 1.358040360451834, - "grad_norm": 0.41566815972328186, - "learning_rate": 1.0946397596987775e-05, - "loss": 0.0498, - "step": 53500 - }, - { - "epoch": 1.3581672801116893, - "grad_norm": 0.41039618849754333, - "learning_rate": 1.0945551465922074e-05, - "loss": 0.0347, - "step": 53505 - }, - { - "epoch": 1.3582941997715445, - "grad_norm": 0.3132307827472687, - "learning_rate": 1.0944705334856369e-05, - "loss": 0.053, - "step": 53510 - }, - { - "epoch": 1.3584211194314, - "grad_norm": 0.5331346392631531, - "learning_rate": 1.0943859203790667e-05, - "loss": 0.0455, - "step": 53515 - }, - { - "epoch": 1.3585480390912552, - "grad_norm": 0.36201757192611694, - "learning_rate": 1.0943013072724965e-05, - "loss": 0.0489, - "step": 53520 - }, - { - "epoch": 1.3586749587511107, - "grad_norm": 0.26112622022628784, - "learning_rate": 1.0942166941659264e-05, - "loss": 0.0348, - "step": 53525 - }, - { - "epoch": 1.358801878410966, - "grad_norm": 0.4063245952129364, - "learning_rate": 1.094132081059356e-05, - "loss": 0.0606, - "step": 53530 - }, - { - "epoch": 1.3589287980708211, - "grad_norm": 0.3706381916999817, - "learning_rate": 1.0940474679527859e-05, - "loss": 0.0342, - "step": 53535 - }, - { - "epoch": 1.3590557177306763, - "grad_norm": 0.29689982533454895, - "learning_rate": 1.0939628548462157e-05, - "loss": 0.0393, - "step": 53540 - }, - { - "epoch": 1.3591826373905318, - "grad_norm": 0.6007696390151978, - "learning_rate": 1.0938782417396456e-05, - "loss": 0.0352, - "step": 53545 - }, - { - "epoch": 1.359309557050387, - "grad_norm": 0.6783130764961243, - "learning_rate": 1.0937936286330752e-05, - "loss": 0.0566, - "step": 53550 - }, - { - "epoch": 1.3594364767102425, - "grad_norm": 0.261785626411438, - "learning_rate": 1.093709015526505e-05, - "loss": 0.0362, - "step": 53555 - }, - { - "epoch": 1.3595633963700977, - "grad_norm": 0.44863200187683105, - "learning_rate": 1.093624402419935e-05, - "loss": 0.0497, - "step": 53560 - }, - { - "epoch": 1.359690316029953, - "grad_norm": 0.5107850432395935, - "learning_rate": 1.0935397893133648e-05, - "loss": 0.0427, - "step": 53565 - }, - { - "epoch": 1.3598172356898084, - "grad_norm": 0.42744284868240356, - "learning_rate": 1.0934551762067944e-05, - "loss": 0.0332, - "step": 53570 - }, - { - "epoch": 1.3599441553496636, - "grad_norm": 0.45599794387817383, - "learning_rate": 1.0933705631002243e-05, - "loss": 0.0532, - "step": 53575 - }, - { - "epoch": 1.360071075009519, - "grad_norm": 0.4348253607749939, - "learning_rate": 1.0932859499936541e-05, - "loss": 0.0369, - "step": 53580 - }, - { - "epoch": 1.3601979946693743, - "grad_norm": 0.24862349033355713, - "learning_rate": 1.093201336887084e-05, - "loss": 0.037, - "step": 53585 - }, - { - "epoch": 1.3603249143292295, - "grad_norm": 0.37189561128616333, - "learning_rate": 1.0931167237805136e-05, - "loss": 0.0438, - "step": 53590 - }, - { - "epoch": 1.360451833989085, - "grad_norm": 0.36627763509750366, - "learning_rate": 1.0930321106739435e-05, - "loss": 0.0478, - "step": 53595 - }, - { - "epoch": 1.3605787536489402, - "grad_norm": 0.5051516890525818, - "learning_rate": 1.0929474975673733e-05, - "loss": 0.038, - "step": 53600 - }, - { - "epoch": 1.3607056733087957, - "grad_norm": 0.2955676019191742, - "learning_rate": 1.0928628844608031e-05, - "loss": 0.0402, - "step": 53605 - }, - { - "epoch": 1.360832592968651, - "grad_norm": 0.4687293469905853, - "learning_rate": 1.0927782713542328e-05, - "loss": 0.0438, - "step": 53610 - }, - { - "epoch": 1.3609595126285061, - "grad_norm": 0.49653366208076477, - "learning_rate": 1.0926936582476627e-05, - "loss": 0.0431, - "step": 53615 - }, - { - "epoch": 1.3610864322883613, - "grad_norm": 0.38274291157722473, - "learning_rate": 1.0926090451410925e-05, - "loss": 0.0341, - "step": 53620 - }, - { - "epoch": 1.3612133519482168, - "grad_norm": 0.3832927346229553, - "learning_rate": 1.0925244320345223e-05, - "loss": 0.0487, - "step": 53625 - }, - { - "epoch": 1.361340271608072, - "grad_norm": 0.424782931804657, - "learning_rate": 1.0924398189279518e-05, - "loss": 0.0476, - "step": 53630 - }, - { - "epoch": 1.3614671912679275, - "grad_norm": 0.4093928039073944, - "learning_rate": 1.0923552058213818e-05, - "loss": 0.0531, - "step": 53635 - }, - { - "epoch": 1.3615941109277827, - "grad_norm": 0.42932412028312683, - "learning_rate": 1.0922705927148117e-05, - "loss": 0.0476, - "step": 53640 - }, - { - "epoch": 1.361721030587638, - "grad_norm": 0.3751372992992401, - "learning_rate": 1.0921859796082415e-05, - "loss": 0.0515, - "step": 53645 - }, - { - "epoch": 1.3618479502474934, - "grad_norm": 0.39512017369270325, - "learning_rate": 1.092101366501671e-05, - "loss": 0.0471, - "step": 53650 - }, - { - "epoch": 1.3619748699073486, - "grad_norm": 0.2763456404209137, - "learning_rate": 1.0920167533951009e-05, - "loss": 0.0507, - "step": 53655 - }, - { - "epoch": 1.362101789567204, - "grad_norm": 0.45206692814826965, - "learning_rate": 1.0919321402885307e-05, - "loss": 0.0521, - "step": 53660 - }, - { - "epoch": 1.3622287092270593, - "grad_norm": 0.5730060935020447, - "learning_rate": 1.0918475271819606e-05, - "loss": 0.0444, - "step": 53665 - }, - { - "epoch": 1.3623556288869145, - "grad_norm": 0.4873270094394684, - "learning_rate": 1.0917629140753902e-05, - "loss": 0.0479, - "step": 53670 - }, - { - "epoch": 1.3624825485467698, - "grad_norm": 0.35784056782722473, - "learning_rate": 1.09167830096882e-05, - "loss": 0.0398, - "step": 53675 - }, - { - "epoch": 1.3626094682066252, - "grad_norm": 1.2452991008758545, - "learning_rate": 1.0915936878622499e-05, - "loss": 0.0623, - "step": 53680 - }, - { - "epoch": 1.3627363878664804, - "grad_norm": 0.8236744403839111, - "learning_rate": 1.0915090747556797e-05, - "loss": 0.0447, - "step": 53685 - }, - { - "epoch": 1.3628633075263359, - "grad_norm": 0.4126247465610504, - "learning_rate": 1.0914244616491096e-05, - "loss": 0.0471, - "step": 53690 - }, - { - "epoch": 1.3629902271861911, - "grad_norm": 0.26605385541915894, - "learning_rate": 1.0913398485425393e-05, - "loss": 0.0301, - "step": 53695 - }, - { - "epoch": 1.3631171468460463, - "grad_norm": 0.4442836046218872, - "learning_rate": 1.0912552354359691e-05, - "loss": 0.059, - "step": 53700 - }, - { - "epoch": 1.3632440665059018, - "grad_norm": 0.6150057315826416, - "learning_rate": 1.091170622329399e-05, - "loss": 0.042, - "step": 53705 - }, - { - "epoch": 1.363370986165757, - "grad_norm": 0.48178768157958984, - "learning_rate": 1.0910860092228288e-05, - "loss": 0.0496, - "step": 53710 - }, - { - "epoch": 1.3634979058256125, - "grad_norm": 0.4878601133823395, - "learning_rate": 1.0910013961162584e-05, - "loss": 0.0482, - "step": 53715 - }, - { - "epoch": 1.3636248254854677, - "grad_norm": 0.5452958941459656, - "learning_rate": 1.0909167830096883e-05, - "loss": 0.0329, - "step": 53720 - }, - { - "epoch": 1.363751745145323, - "grad_norm": 0.314645379781723, - "learning_rate": 1.0908321699031181e-05, - "loss": 0.047, - "step": 53725 - }, - { - "epoch": 1.3638786648051784, - "grad_norm": 0.20884467661380768, - "learning_rate": 1.090747556796548e-05, - "loss": 0.0318, - "step": 53730 - }, - { - "epoch": 1.3640055844650336, - "grad_norm": 0.30169984698295593, - "learning_rate": 1.0906629436899776e-05, - "loss": 0.0385, - "step": 53735 - }, - { - "epoch": 1.364132504124889, - "grad_norm": 0.4197489619255066, - "learning_rate": 1.0905783305834075e-05, - "loss": 0.0479, - "step": 53740 - }, - { - "epoch": 1.3642594237847443, - "grad_norm": 0.2374115139245987, - "learning_rate": 1.0904937174768373e-05, - "loss": 0.0407, - "step": 53745 - }, - { - "epoch": 1.3643863434445995, - "grad_norm": 0.34630247950553894, - "learning_rate": 1.0904091043702672e-05, - "loss": 0.027, - "step": 53750 - }, - { - "epoch": 1.3645132631044548, - "grad_norm": 0.5338728427886963, - "learning_rate": 1.0903244912636968e-05, - "loss": 0.0358, - "step": 53755 - }, - { - "epoch": 1.3646401827643102, - "grad_norm": 0.5007510781288147, - "learning_rate": 1.0902398781571267e-05, - "loss": 0.0343, - "step": 53760 - }, - { - "epoch": 1.3647671024241654, - "grad_norm": 0.375861257314682, - "learning_rate": 1.0901552650505565e-05, - "loss": 0.0462, - "step": 53765 - }, - { - "epoch": 1.3648940220840209, - "grad_norm": 0.40075042843818665, - "learning_rate": 1.0900706519439863e-05, - "loss": 0.0534, - "step": 53770 - }, - { - "epoch": 1.3650209417438761, - "grad_norm": 0.49201664328575134, - "learning_rate": 1.089986038837416e-05, - "loss": 0.0607, - "step": 53775 - }, - { - "epoch": 1.3651478614037313, - "grad_norm": 0.4179738759994507, - "learning_rate": 1.0899014257308459e-05, - "loss": 0.0438, - "step": 53780 - }, - { - "epoch": 1.3652747810635868, - "grad_norm": 0.43299633264541626, - "learning_rate": 1.0898168126242757e-05, - "loss": 0.0414, - "step": 53785 - }, - { - "epoch": 1.365401700723442, - "grad_norm": 0.6901454925537109, - "learning_rate": 1.0897321995177055e-05, - "loss": 0.0441, - "step": 53790 - }, - { - "epoch": 1.3655286203832975, - "grad_norm": 0.4798634350299835, - "learning_rate": 1.089647586411135e-05, - "loss": 0.0414, - "step": 53795 - }, - { - "epoch": 1.3656555400431527, - "grad_norm": 0.3565271496772766, - "learning_rate": 1.0895629733045649e-05, - "loss": 0.0357, - "step": 53800 - }, - { - "epoch": 1.365782459703008, - "grad_norm": 0.4075581729412079, - "learning_rate": 1.0894783601979947e-05, - "loss": 0.0639, - "step": 53805 - }, - { - "epoch": 1.3659093793628634, - "grad_norm": 0.41974395513534546, - "learning_rate": 1.0893937470914247e-05, - "loss": 0.0445, - "step": 53810 - }, - { - "epoch": 1.3660362990227186, - "grad_norm": 0.30935588479042053, - "learning_rate": 1.0893091339848542e-05, - "loss": 0.0438, - "step": 53815 - }, - { - "epoch": 1.366163218682574, - "grad_norm": 1.9121562242507935, - "learning_rate": 1.089224520878284e-05, - "loss": 0.0424, - "step": 53820 - }, - { - "epoch": 1.3662901383424293, - "grad_norm": 0.42710453271865845, - "learning_rate": 1.0891399077717139e-05, - "loss": 0.0369, - "step": 53825 - }, - { - "epoch": 1.3664170580022845, - "grad_norm": 0.35866478085517883, - "learning_rate": 1.0890552946651438e-05, - "loss": 0.0286, - "step": 53830 - }, - { - "epoch": 1.3665439776621398, - "grad_norm": 0.6083397269248962, - "learning_rate": 1.0889706815585734e-05, - "loss": 0.0444, - "step": 53835 - }, - { - "epoch": 1.3666708973219952, - "grad_norm": 0.4911203682422638, - "learning_rate": 1.0888860684520033e-05, - "loss": 0.046, - "step": 53840 - }, - { - "epoch": 1.3667978169818504, - "grad_norm": 0.4520212411880493, - "learning_rate": 1.0888014553454331e-05, - "loss": 0.0552, - "step": 53845 - }, - { - "epoch": 1.3669247366417059, - "grad_norm": 0.3374655246734619, - "learning_rate": 1.088716842238863e-05, - "loss": 0.0461, - "step": 53850 - }, - { - "epoch": 1.3670516563015611, - "grad_norm": 0.3363019526004791, - "learning_rate": 1.0886322291322926e-05, - "loss": 0.0268, - "step": 53855 - }, - { - "epoch": 1.3671785759614163, - "grad_norm": 0.3821159899234772, - "learning_rate": 1.0885476160257225e-05, - "loss": 0.0585, - "step": 53860 - }, - { - "epoch": 1.3673054956212718, - "grad_norm": 0.4121531844139099, - "learning_rate": 1.0884630029191523e-05, - "loss": 0.0423, - "step": 53865 - }, - { - "epoch": 1.367432415281127, - "grad_norm": 0.5599525570869446, - "learning_rate": 1.0883783898125821e-05, - "loss": 0.0476, - "step": 53870 - }, - { - "epoch": 1.3675593349409825, - "grad_norm": 0.31169214844703674, - "learning_rate": 1.0882937767060118e-05, - "loss": 0.0415, - "step": 53875 - }, - { - "epoch": 1.3676862546008377, - "grad_norm": 0.3912662863731384, - "learning_rate": 1.0882091635994416e-05, - "loss": 0.0467, - "step": 53880 - }, - { - "epoch": 1.367813174260693, - "grad_norm": 0.3830748498439789, - "learning_rate": 1.0881245504928715e-05, - "loss": 0.0325, - "step": 53885 - }, - { - "epoch": 1.3679400939205482, - "grad_norm": 0.3413200378417969, - "learning_rate": 1.0880399373863013e-05, - "loss": 0.0335, - "step": 53890 - }, - { - "epoch": 1.3680670135804036, - "grad_norm": 1.1567871570587158, - "learning_rate": 1.087955324279731e-05, - "loss": 0.0527, - "step": 53895 - }, - { - "epoch": 1.3681939332402588, - "grad_norm": 0.7733235955238342, - "learning_rate": 1.0878707111731608e-05, - "loss": 0.0369, - "step": 53900 - }, - { - "epoch": 1.3683208529001143, - "grad_norm": 0.5390303134918213, - "learning_rate": 1.0877860980665907e-05, - "loss": 0.043, - "step": 53905 - }, - { - "epoch": 1.3684477725599695, - "grad_norm": 0.33280450105667114, - "learning_rate": 1.0877014849600205e-05, - "loss": 0.0472, - "step": 53910 - }, - { - "epoch": 1.3685746922198248, - "grad_norm": 0.5718231201171875, - "learning_rate": 1.0876168718534502e-05, - "loss": 0.0603, - "step": 53915 - }, - { - "epoch": 1.3687016118796802, - "grad_norm": 0.48928624391555786, - "learning_rate": 1.08753225874688e-05, - "loss": 0.0393, - "step": 53920 - }, - { - "epoch": 1.3688285315395354, - "grad_norm": 0.532082736492157, - "learning_rate": 1.0874476456403099e-05, - "loss": 0.0535, - "step": 53925 - }, - { - "epoch": 1.3689554511993909, - "grad_norm": 0.6088261604309082, - "learning_rate": 1.0873630325337397e-05, - "loss": 0.0326, - "step": 53930 - }, - { - "epoch": 1.3690823708592461, - "grad_norm": 0.5554413795471191, - "learning_rate": 1.0872784194271692e-05, - "loss": 0.0664, - "step": 53935 - }, - { - "epoch": 1.3692092905191013, - "grad_norm": 0.9021826386451721, - "learning_rate": 1.087193806320599e-05, - "loss": 0.0648, - "step": 53940 - }, - { - "epoch": 1.3693362101789568, - "grad_norm": 0.4414669871330261, - "learning_rate": 1.0871091932140289e-05, - "loss": 0.0379, - "step": 53945 - }, - { - "epoch": 1.369463129838812, - "grad_norm": 0.42942067980766296, - "learning_rate": 1.0870245801074587e-05, - "loss": 0.0462, - "step": 53950 - }, - { - "epoch": 1.3695900494986675, - "grad_norm": 1.0412225723266602, - "learning_rate": 1.0869399670008884e-05, - "loss": 0.0542, - "step": 53955 - }, - { - "epoch": 1.3697169691585227, - "grad_norm": 0.871788740158081, - "learning_rate": 1.0868553538943182e-05, - "loss": 0.0485, - "step": 53960 - }, - { - "epoch": 1.369843888818378, - "grad_norm": 0.35316580533981323, - "learning_rate": 1.086770740787748e-05, - "loss": 0.0433, - "step": 53965 - }, - { - "epoch": 1.3699708084782332, - "grad_norm": 0.29210469126701355, - "learning_rate": 1.086686127681178e-05, - "loss": 0.0346, - "step": 53970 - }, - { - "epoch": 1.3700977281380886, - "grad_norm": 0.6066737174987793, - "learning_rate": 1.0866015145746076e-05, - "loss": 0.0474, - "step": 53975 - }, - { - "epoch": 1.3702246477979438, - "grad_norm": 0.2971045970916748, - "learning_rate": 1.0865169014680374e-05, - "loss": 0.0284, - "step": 53980 - }, - { - "epoch": 1.3703515674577993, - "grad_norm": 0.3586972653865814, - "learning_rate": 1.0864322883614673e-05, - "loss": 0.0453, - "step": 53985 - }, - { - "epoch": 1.3704784871176545, - "grad_norm": 0.23866139352321625, - "learning_rate": 1.0863476752548971e-05, - "loss": 0.0449, - "step": 53990 - }, - { - "epoch": 1.3706054067775097, - "grad_norm": 0.6590098738670349, - "learning_rate": 1.0862630621483268e-05, - "loss": 0.0582, - "step": 53995 - }, - { - "epoch": 1.3707323264373652, - "grad_norm": 0.9517698287963867, - "learning_rate": 1.0861784490417566e-05, - "loss": 0.045, - "step": 54000 - }, - { - "epoch": 1.3708592460972204, - "grad_norm": 0.6557309031486511, - "learning_rate": 1.0860938359351865e-05, - "loss": 0.0514, - "step": 54005 - }, - { - "epoch": 1.3709861657570759, - "grad_norm": 0.32175102829933167, - "learning_rate": 1.0860092228286163e-05, - "loss": 0.0508, - "step": 54010 - }, - { - "epoch": 1.371113085416931, - "grad_norm": 0.21987183392047882, - "learning_rate": 1.085924609722046e-05, - "loss": 0.0491, - "step": 54015 - }, - { - "epoch": 1.3712400050767863, - "grad_norm": 0.41425859928131104, - "learning_rate": 1.0858399966154758e-05, - "loss": 0.0452, - "step": 54020 - }, - { - "epoch": 1.3713669247366416, - "grad_norm": 0.44020992517471313, - "learning_rate": 1.0857553835089057e-05, - "loss": 0.0546, - "step": 54025 - }, - { - "epoch": 1.371493844396497, - "grad_norm": 0.5105854272842407, - "learning_rate": 1.0856707704023355e-05, - "loss": 0.04, - "step": 54030 - }, - { - "epoch": 1.3716207640563522, - "grad_norm": 0.41120150685310364, - "learning_rate": 1.0855861572957652e-05, - "loss": 0.0389, - "step": 54035 - }, - { - "epoch": 1.3717476837162077, - "grad_norm": 0.7352084517478943, - "learning_rate": 1.085501544189195e-05, - "loss": 0.0364, - "step": 54040 - }, - { - "epoch": 1.371874603376063, - "grad_norm": 0.33406955003738403, - "learning_rate": 1.0854169310826248e-05, - "loss": 0.0502, - "step": 54045 - }, - { - "epoch": 1.3720015230359182, - "grad_norm": 0.5362266302108765, - "learning_rate": 1.0853323179760547e-05, - "loss": 0.0488, - "step": 54050 - }, - { - "epoch": 1.3721284426957736, - "grad_norm": 0.7410942316055298, - "learning_rate": 1.0852477048694842e-05, - "loss": 0.0464, - "step": 54055 - }, - { - "epoch": 1.3722553623556288, - "grad_norm": 0.3626421093940735, - "learning_rate": 1.0851630917629142e-05, - "loss": 0.052, - "step": 54060 - }, - { - "epoch": 1.3723822820154843, - "grad_norm": 0.45620235800743103, - "learning_rate": 1.085078478656344e-05, - "loss": 0.0494, - "step": 54065 - }, - { - "epoch": 1.3725092016753395, - "grad_norm": 0.4356229305267334, - "learning_rate": 1.0849938655497739e-05, - "loss": 0.0616, - "step": 54070 - }, - { - "epoch": 1.3726361213351947, - "grad_norm": 0.3046474754810333, - "learning_rate": 1.0849092524432034e-05, - "loss": 0.0441, - "step": 54075 - }, - { - "epoch": 1.3727630409950502, - "grad_norm": 0.3116309344768524, - "learning_rate": 1.0848246393366332e-05, - "loss": 0.0497, - "step": 54080 - }, - { - "epoch": 1.3728899606549054, - "grad_norm": 0.511840283870697, - "learning_rate": 1.084740026230063e-05, - "loss": 0.0631, - "step": 54085 - }, - { - "epoch": 1.3730168803147609, - "grad_norm": 0.6636495590209961, - "learning_rate": 1.0846554131234929e-05, - "loss": 0.0368, - "step": 54090 - }, - { - "epoch": 1.373143799974616, - "grad_norm": 0.5632157921791077, - "learning_rate": 1.0845708000169226e-05, - "loss": 0.0432, - "step": 54095 - }, - { - "epoch": 1.3732707196344713, - "grad_norm": 0.5253337621688843, - "learning_rate": 1.0844861869103524e-05, - "loss": 0.0403, - "step": 54100 - }, - { - "epoch": 1.3733976392943266, - "grad_norm": 0.400758296251297, - "learning_rate": 1.0844015738037823e-05, - "loss": 0.0561, - "step": 54105 - }, - { - "epoch": 1.373524558954182, - "grad_norm": 0.6472968459129333, - "learning_rate": 1.0843169606972121e-05, - "loss": 0.0409, - "step": 54110 - }, - { - "epoch": 1.3736514786140372, - "grad_norm": 0.4690782427787781, - "learning_rate": 1.0842323475906418e-05, - "loss": 0.0368, - "step": 54115 - }, - { - "epoch": 1.3737783982738927, - "grad_norm": 0.46753156185150146, - "learning_rate": 1.0841477344840716e-05, - "loss": 0.0567, - "step": 54120 - }, - { - "epoch": 1.373905317933748, - "grad_norm": 0.6458771824836731, - "learning_rate": 1.0840631213775014e-05, - "loss": 0.0469, - "step": 54125 - }, - { - "epoch": 1.3740322375936032, - "grad_norm": 0.559503972530365, - "learning_rate": 1.0839785082709313e-05, - "loss": 0.0476, - "step": 54130 - }, - { - "epoch": 1.3741591572534586, - "grad_norm": 0.38445210456848145, - "learning_rate": 1.083893895164361e-05, - "loss": 0.0521, - "step": 54135 - }, - { - "epoch": 1.3742860769133138, - "grad_norm": 1.032560110092163, - "learning_rate": 1.0838092820577908e-05, - "loss": 0.0386, - "step": 54140 - }, - { - "epoch": 1.3744129965731693, - "grad_norm": 0.5340622067451477, - "learning_rate": 1.0837246689512206e-05, - "loss": 0.0494, - "step": 54145 - }, - { - "epoch": 1.3745399162330245, - "grad_norm": 0.5406903028488159, - "learning_rate": 1.0836400558446505e-05, - "loss": 0.048, - "step": 54150 - }, - { - "epoch": 1.3746668358928797, - "grad_norm": 0.6397920846939087, - "learning_rate": 1.0835554427380801e-05, - "loss": 0.0496, - "step": 54155 - }, - { - "epoch": 1.3747937555527352, - "grad_norm": 0.41770139336586, - "learning_rate": 1.08347082963151e-05, - "loss": 0.0315, - "step": 54160 - }, - { - "epoch": 1.3749206752125904, - "grad_norm": 0.7275354266166687, - "learning_rate": 1.0833862165249398e-05, - "loss": 0.0467, - "step": 54165 - }, - { - "epoch": 1.3750475948724459, - "grad_norm": 0.36085283756256104, - "learning_rate": 1.0833016034183697e-05, - "loss": 0.0626, - "step": 54170 - }, - { - "epoch": 1.375174514532301, - "grad_norm": 0.2587214708328247, - "learning_rate": 1.0832169903117993e-05, - "loss": 0.0338, - "step": 54175 - }, - { - "epoch": 1.3753014341921563, - "grad_norm": 0.5923842191696167, - "learning_rate": 1.0831323772052292e-05, - "loss": 0.0459, - "step": 54180 - }, - { - "epoch": 1.3754283538520116, - "grad_norm": 0.3349825441837311, - "learning_rate": 1.083047764098659e-05, - "loss": 0.0506, - "step": 54185 - }, - { - "epoch": 1.375555273511867, - "grad_norm": 0.4663384258747101, - "learning_rate": 1.0829631509920889e-05, - "loss": 0.0453, - "step": 54190 - }, - { - "epoch": 1.3756821931717222, - "grad_norm": 0.9183983206748962, - "learning_rate": 1.0828785378855187e-05, - "loss": 0.0501, - "step": 54195 - }, - { - "epoch": 1.3758091128315777, - "grad_norm": 0.47049933671951294, - "learning_rate": 1.0827939247789484e-05, - "loss": 0.053, - "step": 54200 - }, - { - "epoch": 1.375936032491433, - "grad_norm": 0.5218068361282349, - "learning_rate": 1.0827093116723782e-05, - "loss": 0.0585, - "step": 54205 - }, - { - "epoch": 1.3760629521512882, - "grad_norm": 1.3670343160629272, - "learning_rate": 1.082624698565808e-05, - "loss": 0.035, - "step": 54210 - }, - { - "epoch": 1.3761898718111436, - "grad_norm": 0.5049926042556763, - "learning_rate": 1.0825400854592379e-05, - "loss": 0.0458, - "step": 54215 - }, - { - "epoch": 1.3763167914709988, - "grad_norm": 0.39925047755241394, - "learning_rate": 1.0824554723526674e-05, - "loss": 0.0269, - "step": 54220 - }, - { - "epoch": 1.3764437111308543, - "grad_norm": 0.49657413363456726, - "learning_rate": 1.0823708592460972e-05, - "loss": 0.0428, - "step": 54225 - }, - { - "epoch": 1.3765706307907095, - "grad_norm": 0.3923914432525635, - "learning_rate": 1.082286246139527e-05, - "loss": 0.0405, - "step": 54230 - }, - { - "epoch": 1.3766975504505647, - "grad_norm": 0.4696086347103119, - "learning_rate": 1.082201633032957e-05, - "loss": 0.0502, - "step": 54235 - }, - { - "epoch": 1.37682447011042, - "grad_norm": 0.5391739010810852, - "learning_rate": 1.0821170199263866e-05, - "loss": 0.0517, - "step": 54240 - }, - { - "epoch": 1.3769513897702754, - "grad_norm": 0.33368757367134094, - "learning_rate": 1.0820324068198164e-05, - "loss": 0.0398, - "step": 54245 - }, - { - "epoch": 1.3770783094301307, - "grad_norm": 0.5333112478256226, - "learning_rate": 1.0819477937132463e-05, - "loss": 0.0577, - "step": 54250 - }, - { - "epoch": 1.377205229089986, - "grad_norm": 0.9154989719390869, - "learning_rate": 1.0818631806066761e-05, - "loss": 0.0685, - "step": 54255 - }, - { - "epoch": 1.3773321487498413, - "grad_norm": 0.2277202308177948, - "learning_rate": 1.0817785675001058e-05, - "loss": 0.0407, - "step": 54260 - }, - { - "epoch": 1.3774590684096966, - "grad_norm": 0.3607838451862335, - "learning_rate": 1.0816939543935356e-05, - "loss": 0.0286, - "step": 54265 - }, - { - "epoch": 1.377585988069552, - "grad_norm": 0.43415164947509766, - "learning_rate": 1.0816093412869655e-05, - "loss": 0.0566, - "step": 54270 - }, - { - "epoch": 1.3777129077294072, - "grad_norm": 0.24350884556770325, - "learning_rate": 1.0815247281803953e-05, - "loss": 0.0298, - "step": 54275 - }, - { - "epoch": 1.3778398273892627, - "grad_norm": 0.33684489130973816, - "learning_rate": 1.081440115073825e-05, - "loss": 0.0483, - "step": 54280 - }, - { - "epoch": 1.377966747049118, - "grad_norm": 0.6302357316017151, - "learning_rate": 1.0813555019672548e-05, - "loss": 0.0633, - "step": 54285 - }, - { - "epoch": 1.3780936667089732, - "grad_norm": 0.5776455402374268, - "learning_rate": 1.0812708888606846e-05, - "loss": 0.058, - "step": 54290 - }, - { - "epoch": 1.3782205863688286, - "grad_norm": 0.3499896228313446, - "learning_rate": 1.0811862757541145e-05, - "loss": 0.0394, - "step": 54295 - }, - { - "epoch": 1.3783475060286838, - "grad_norm": 0.35086849331855774, - "learning_rate": 1.0811016626475442e-05, - "loss": 0.0384, - "step": 54300 - }, - { - "epoch": 1.3784744256885393, - "grad_norm": 0.41196900606155396, - "learning_rate": 1.081017049540974e-05, - "loss": 0.0535, - "step": 54305 - }, - { - "epoch": 1.3786013453483945, - "grad_norm": 0.6416341066360474, - "learning_rate": 1.0809324364344038e-05, - "loss": 0.0409, - "step": 54310 - }, - { - "epoch": 1.3787282650082497, - "grad_norm": 0.32309141755104065, - "learning_rate": 1.0808478233278337e-05, - "loss": 0.0486, - "step": 54315 - }, - { - "epoch": 1.378855184668105, - "grad_norm": 0.39212751388549805, - "learning_rate": 1.0807632102212633e-05, - "loss": 0.0407, - "step": 54320 - }, - { - "epoch": 1.3789821043279604, - "grad_norm": 0.5112642645835876, - "learning_rate": 1.0806785971146932e-05, - "loss": 0.0472, - "step": 54325 - }, - { - "epoch": 1.3791090239878157, - "grad_norm": 0.45517733693122864, - "learning_rate": 1.080593984008123e-05, - "loss": 0.0519, - "step": 54330 - }, - { - "epoch": 1.379235943647671, - "grad_norm": 0.4135662019252777, - "learning_rate": 1.0805093709015529e-05, - "loss": 0.0413, - "step": 54335 - }, - { - "epoch": 1.3793628633075263, - "grad_norm": 0.6555345058441162, - "learning_rate": 1.0804247577949825e-05, - "loss": 0.0391, - "step": 54340 - }, - { - "epoch": 1.3794897829673816, - "grad_norm": 0.5736551880836487, - "learning_rate": 1.0803401446884124e-05, - "loss": 0.0442, - "step": 54345 - }, - { - "epoch": 1.379616702627237, - "grad_norm": 0.339504599571228, - "learning_rate": 1.0802555315818422e-05, - "loss": 0.039, - "step": 54350 - }, - { - "epoch": 1.3797436222870922, - "grad_norm": 0.527916431427002, - "learning_rate": 1.080170918475272e-05, - "loss": 0.0503, - "step": 54355 - }, - { - "epoch": 1.3798705419469477, - "grad_norm": 1.7261453866958618, - "learning_rate": 1.0800863053687016e-05, - "loss": 0.0622, - "step": 54360 - }, - { - "epoch": 1.379997461606803, - "grad_norm": 0.42818284034729004, - "learning_rate": 1.0800016922621314e-05, - "loss": 0.0333, - "step": 54365 - }, - { - "epoch": 1.3801243812666582, - "grad_norm": 0.33570146560668945, - "learning_rate": 1.0799170791555612e-05, - "loss": 0.0388, - "step": 54370 - }, - { - "epoch": 1.3802513009265134, - "grad_norm": 0.26918500661849976, - "learning_rate": 1.079832466048991e-05, - "loss": 0.0489, - "step": 54375 - }, - { - "epoch": 1.3803782205863688, - "grad_norm": 0.5700088143348694, - "learning_rate": 1.0797478529424208e-05, - "loss": 0.0514, - "step": 54380 - }, - { - "epoch": 1.380505140246224, - "grad_norm": 0.33720293641090393, - "learning_rate": 1.0796632398358506e-05, - "loss": 0.0694, - "step": 54385 - }, - { - "epoch": 1.3806320599060795, - "grad_norm": 0.5421366095542908, - "learning_rate": 1.0795786267292804e-05, - "loss": 0.0721, - "step": 54390 - }, - { - "epoch": 1.3807589795659347, - "grad_norm": 0.43584224581718445, - "learning_rate": 1.0794940136227103e-05, - "loss": 0.048, - "step": 54395 - }, - { - "epoch": 1.38088589922579, - "grad_norm": 0.4870329201221466, - "learning_rate": 1.07940940051614e-05, - "loss": 0.0485, - "step": 54400 - }, - { - "epoch": 1.3810128188856454, - "grad_norm": 0.5662314295768738, - "learning_rate": 1.0793247874095698e-05, - "loss": 0.0402, - "step": 54405 - }, - { - "epoch": 1.3811397385455007, - "grad_norm": 0.4170321226119995, - "learning_rate": 1.0792401743029996e-05, - "loss": 0.0527, - "step": 54410 - }, - { - "epoch": 1.381266658205356, - "grad_norm": 0.35035550594329834, - "learning_rate": 1.0791555611964295e-05, - "loss": 0.0345, - "step": 54415 - }, - { - "epoch": 1.3813935778652113, - "grad_norm": 0.40216541290283203, - "learning_rate": 1.0790709480898591e-05, - "loss": 0.0568, - "step": 54420 - }, - { - "epoch": 1.3815204975250666, - "grad_norm": 0.38938525319099426, - "learning_rate": 1.078986334983289e-05, - "loss": 0.0358, - "step": 54425 - }, - { - "epoch": 1.381647417184922, - "grad_norm": 0.23780877888202667, - "learning_rate": 1.0789017218767188e-05, - "loss": 0.0372, - "step": 54430 - }, - { - "epoch": 1.3817743368447772, - "grad_norm": 0.3754282295703888, - "learning_rate": 1.0788171087701487e-05, - "loss": 0.0432, - "step": 54435 - }, - { - "epoch": 1.3819012565046327, - "grad_norm": 0.43819835782051086, - "learning_rate": 1.0787324956635783e-05, - "loss": 0.0545, - "step": 54440 - }, - { - "epoch": 1.382028176164488, - "grad_norm": 0.41555947065353394, - "learning_rate": 1.0786478825570082e-05, - "loss": 0.0437, - "step": 54445 - }, - { - "epoch": 1.3821550958243431, - "grad_norm": 0.5669215321540833, - "learning_rate": 1.078563269450438e-05, - "loss": 0.0407, - "step": 54450 - }, - { - "epoch": 1.3822820154841984, - "grad_norm": 0.5280998349189758, - "learning_rate": 1.0784786563438678e-05, - "loss": 0.0388, - "step": 54455 - }, - { - "epoch": 1.3824089351440538, - "grad_norm": 0.4742697477340698, - "learning_rate": 1.0783940432372975e-05, - "loss": 0.0538, - "step": 54460 - }, - { - "epoch": 1.382535854803909, - "grad_norm": 0.9755678176879883, - "learning_rate": 1.0783094301307274e-05, - "loss": 0.0525, - "step": 54465 - }, - { - "epoch": 1.3826627744637645, - "grad_norm": 0.705847978591919, - "learning_rate": 1.0782248170241572e-05, - "loss": 0.0357, - "step": 54470 - }, - { - "epoch": 1.3827896941236197, - "grad_norm": 0.557060956954956, - "learning_rate": 1.078140203917587e-05, - "loss": 0.0346, - "step": 54475 - }, - { - "epoch": 1.382916613783475, - "grad_norm": 0.19295154511928558, - "learning_rate": 1.0780555908110167e-05, - "loss": 0.0262, - "step": 54480 - }, - { - "epoch": 1.3830435334433304, - "grad_norm": 0.7033517956733704, - "learning_rate": 1.0779709777044466e-05, - "loss": 0.0552, - "step": 54485 - }, - { - "epoch": 1.3831704531031856, - "grad_norm": 0.4452029764652252, - "learning_rate": 1.0778863645978764e-05, - "loss": 0.0392, - "step": 54490 - }, - { - "epoch": 1.383297372763041, - "grad_norm": 0.5312698483467102, - "learning_rate": 1.0778017514913062e-05, - "loss": 0.0571, - "step": 54495 - }, - { - "epoch": 1.3834242924228963, - "grad_norm": 0.34210866689682007, - "learning_rate": 1.0777171383847357e-05, - "loss": 0.0442, - "step": 54500 - }, - { - "epoch": 1.3835512120827516, - "grad_norm": 0.44086429476737976, - "learning_rate": 1.0776325252781656e-05, - "loss": 0.0415, - "step": 54505 - }, - { - "epoch": 1.383678131742607, - "grad_norm": 0.5575777292251587, - "learning_rate": 1.0775479121715954e-05, - "loss": 0.0411, - "step": 54510 - }, - { - "epoch": 1.3838050514024622, - "grad_norm": 0.4079473912715912, - "learning_rate": 1.0774632990650253e-05, - "loss": 0.0539, - "step": 54515 - }, - { - "epoch": 1.3839319710623177, - "grad_norm": 4.647985935211182, - "learning_rate": 1.077378685958455e-05, - "loss": 0.0362, - "step": 54520 - }, - { - "epoch": 1.384058890722173, - "grad_norm": 0.5157573223114014, - "learning_rate": 1.0772940728518848e-05, - "loss": 0.0312, - "step": 54525 - }, - { - "epoch": 1.3841858103820281, - "grad_norm": 0.24273477494716644, - "learning_rate": 1.0772094597453146e-05, - "loss": 0.0535, - "step": 54530 - }, - { - "epoch": 1.3843127300418834, - "grad_norm": 0.5104020833969116, - "learning_rate": 1.0771248466387444e-05, - "loss": 0.0371, - "step": 54535 - }, - { - "epoch": 1.3844396497017388, - "grad_norm": 0.7145867347717285, - "learning_rate": 1.0770402335321741e-05, - "loss": 0.0687, - "step": 54540 - }, - { - "epoch": 1.384566569361594, - "grad_norm": 1.0761445760726929, - "learning_rate": 1.076955620425604e-05, - "loss": 0.0529, - "step": 54545 - }, - { - "epoch": 1.3846934890214495, - "grad_norm": 0.5145509839057922, - "learning_rate": 1.0768710073190338e-05, - "loss": 0.0346, - "step": 54550 - }, - { - "epoch": 1.3848204086813047, - "grad_norm": 0.4839056432247162, - "learning_rate": 1.0767863942124636e-05, - "loss": 0.0427, - "step": 54555 - }, - { - "epoch": 1.38494732834116, - "grad_norm": 0.36353176832199097, - "learning_rate": 1.0767017811058933e-05, - "loss": 0.0606, - "step": 54560 - }, - { - "epoch": 1.3850742480010154, - "grad_norm": 0.6963041424751282, - "learning_rate": 1.0766171679993231e-05, - "loss": 0.0438, - "step": 54565 - }, - { - "epoch": 1.3852011676608706, - "grad_norm": 0.6354207396507263, - "learning_rate": 1.076532554892753e-05, - "loss": 0.0425, - "step": 54570 - }, - { - "epoch": 1.385328087320726, - "grad_norm": 0.49890726804733276, - "learning_rate": 1.0764479417861828e-05, - "loss": 0.0466, - "step": 54575 - }, - { - "epoch": 1.3854550069805813, - "grad_norm": 0.5168671011924744, - "learning_rate": 1.0763633286796125e-05, - "loss": 0.0588, - "step": 54580 - }, - { - "epoch": 1.3855819266404366, - "grad_norm": 0.36286643147468567, - "learning_rate": 1.0762787155730423e-05, - "loss": 0.0294, - "step": 54585 - }, - { - "epoch": 1.3857088463002918, - "grad_norm": 0.4356544017791748, - "learning_rate": 1.0761941024664722e-05, - "loss": 0.0437, - "step": 54590 - }, - { - "epoch": 1.3858357659601472, - "grad_norm": 0.4573720097541809, - "learning_rate": 1.076109489359902e-05, - "loss": 0.0523, - "step": 54595 - }, - { - "epoch": 1.3859626856200025, - "grad_norm": 0.8799592852592468, - "learning_rate": 1.0760248762533317e-05, - "loss": 0.038, - "step": 54600 - }, - { - "epoch": 1.386089605279858, - "grad_norm": 0.48785990476608276, - "learning_rate": 1.0759402631467615e-05, - "loss": 0.0507, - "step": 54605 - }, - { - "epoch": 1.3862165249397131, - "grad_norm": 0.42177894711494446, - "learning_rate": 1.0758556500401914e-05, - "loss": 0.0529, - "step": 54610 - }, - { - "epoch": 1.3863434445995684, - "grad_norm": 0.4410828649997711, - "learning_rate": 1.0757710369336212e-05, - "loss": 0.0451, - "step": 54615 - }, - { - "epoch": 1.3864703642594238, - "grad_norm": 0.5414563417434692, - "learning_rate": 1.0756864238270507e-05, - "loss": 0.0496, - "step": 54620 - }, - { - "epoch": 1.386597283919279, - "grad_norm": 0.4063747227191925, - "learning_rate": 1.0756018107204807e-05, - "loss": 0.0511, - "step": 54625 - }, - { - "epoch": 1.3867242035791345, - "grad_norm": 0.726323127746582, - "learning_rate": 1.0755171976139106e-05, - "loss": 0.0582, - "step": 54630 - }, - { - "epoch": 1.3868511232389897, - "grad_norm": 0.6582604050636292, - "learning_rate": 1.0754325845073404e-05, - "loss": 0.045, - "step": 54635 - }, - { - "epoch": 1.386978042898845, - "grad_norm": 0.47922927141189575, - "learning_rate": 1.0753479714007699e-05, - "loss": 0.0436, - "step": 54640 - }, - { - "epoch": 1.3871049625587004, - "grad_norm": 0.3833918571472168, - "learning_rate": 1.0752633582941997e-05, - "loss": 0.036, - "step": 54645 - }, - { - "epoch": 1.3872318822185556, - "grad_norm": 0.4149291217327118, - "learning_rate": 1.0751787451876296e-05, - "loss": 0.058, - "step": 54650 - }, - { - "epoch": 1.387358801878411, - "grad_norm": 0.8011088371276855, - "learning_rate": 1.0750941320810594e-05, - "loss": 0.0383, - "step": 54655 - }, - { - "epoch": 1.3874857215382663, - "grad_norm": 0.5592812895774841, - "learning_rate": 1.0750095189744891e-05, - "loss": 0.0345, - "step": 54660 - }, - { - "epoch": 1.3876126411981216, - "grad_norm": 0.38928651809692383, - "learning_rate": 1.074924905867919e-05, - "loss": 0.0363, - "step": 54665 - }, - { - "epoch": 1.3877395608579768, - "grad_norm": 0.5074005722999573, - "learning_rate": 1.0748402927613488e-05, - "loss": 0.0415, - "step": 54670 - }, - { - "epoch": 1.3878664805178322, - "grad_norm": 0.6130107641220093, - "learning_rate": 1.0747556796547786e-05, - "loss": 0.0415, - "step": 54675 - }, - { - "epoch": 1.3879934001776875, - "grad_norm": 1.8375334739685059, - "learning_rate": 1.0746710665482083e-05, - "loss": 0.0418, - "step": 54680 - }, - { - "epoch": 1.388120319837543, - "grad_norm": 0.5313819050788879, - "learning_rate": 1.0745864534416381e-05, - "loss": 0.0459, - "step": 54685 - }, - { - "epoch": 1.3882472394973981, - "grad_norm": 0.3451443016529083, - "learning_rate": 1.074501840335068e-05, - "loss": 0.0376, - "step": 54690 - }, - { - "epoch": 1.3883741591572534, - "grad_norm": 0.42382606863975525, - "learning_rate": 1.0744172272284978e-05, - "loss": 0.0503, - "step": 54695 - }, - { - "epoch": 1.3885010788171088, - "grad_norm": 0.36700865626335144, - "learning_rate": 1.0743326141219275e-05, - "loss": 0.0431, - "step": 54700 - }, - { - "epoch": 1.388627998476964, - "grad_norm": 0.5045138001441956, - "learning_rate": 1.0742480010153573e-05, - "loss": 0.0313, - "step": 54705 - }, - { - "epoch": 1.3887549181368195, - "grad_norm": 0.45205408334732056, - "learning_rate": 1.0741633879087872e-05, - "loss": 0.0499, - "step": 54710 - }, - { - "epoch": 1.3888818377966747, - "grad_norm": 0.5772424936294556, - "learning_rate": 1.074078774802217e-05, - "loss": 0.041, - "step": 54715 - }, - { - "epoch": 1.38900875745653, - "grad_norm": 0.4622069299221039, - "learning_rate": 1.0739941616956468e-05, - "loss": 0.0427, - "step": 54720 - }, - { - "epoch": 1.3891356771163852, - "grad_norm": 0.4806802272796631, - "learning_rate": 1.0739095485890765e-05, - "loss": 0.0422, - "step": 54725 - }, - { - "epoch": 1.3892625967762406, - "grad_norm": 0.49485209584236145, - "learning_rate": 1.0738249354825063e-05, - "loss": 0.0428, - "step": 54730 - }, - { - "epoch": 1.3893895164360959, - "grad_norm": 0.40126705169677734, - "learning_rate": 1.0737403223759362e-05, - "loss": 0.0455, - "step": 54735 - }, - { - "epoch": 1.3895164360959513, - "grad_norm": 0.3087025284767151, - "learning_rate": 1.073655709269366e-05, - "loss": 0.0335, - "step": 54740 - }, - { - "epoch": 1.3896433557558066, - "grad_norm": 0.6291918158531189, - "learning_rate": 1.0735710961627957e-05, - "loss": 0.0422, - "step": 54745 - }, - { - "epoch": 1.3897702754156618, - "grad_norm": 0.20026078820228577, - "learning_rate": 1.0734864830562255e-05, - "loss": 0.0333, - "step": 54750 - }, - { - "epoch": 1.3898971950755172, - "grad_norm": 0.954300582408905, - "learning_rate": 1.0734018699496554e-05, - "loss": 0.0599, - "step": 54755 - }, - { - "epoch": 1.3900241147353725, - "grad_norm": 0.5012336373329163, - "learning_rate": 1.0733172568430852e-05, - "loss": 0.0516, - "step": 54760 - }, - { - "epoch": 1.390151034395228, - "grad_norm": 0.758690357208252, - "learning_rate": 1.0732326437365149e-05, - "loss": 0.0456, - "step": 54765 - }, - { - "epoch": 1.3902779540550831, - "grad_norm": 0.4343341588973999, - "learning_rate": 1.0731480306299447e-05, - "loss": 0.0356, - "step": 54770 - }, - { - "epoch": 1.3904048737149384, - "grad_norm": 0.24366921186447144, - "learning_rate": 1.0730634175233746e-05, - "loss": 0.0377, - "step": 54775 - }, - { - "epoch": 1.3905317933747938, - "grad_norm": 0.5069406628608704, - "learning_rate": 1.0729788044168044e-05, - "loss": 0.0523, - "step": 54780 - }, - { - "epoch": 1.390658713034649, - "grad_norm": 0.6215029954910278, - "learning_rate": 1.0728941913102339e-05, - "loss": 0.0344, - "step": 54785 - }, - { - "epoch": 1.3907856326945045, - "grad_norm": 0.4564222991466522, - "learning_rate": 1.0728095782036638e-05, - "loss": 0.0475, - "step": 54790 - }, - { - "epoch": 1.3909125523543597, - "grad_norm": 0.5443860292434692, - "learning_rate": 1.0727249650970936e-05, - "loss": 0.0418, - "step": 54795 - }, - { - "epoch": 1.391039472014215, - "grad_norm": 0.44519180059432983, - "learning_rate": 1.0726403519905236e-05, - "loss": 0.0288, - "step": 54800 - }, - { - "epoch": 1.3911663916740702, - "grad_norm": 1.5478479862213135, - "learning_rate": 1.0725557388839531e-05, - "loss": 0.0623, - "step": 54805 - }, - { - "epoch": 1.3912933113339256, - "grad_norm": 0.4394479990005493, - "learning_rate": 1.072471125777383e-05, - "loss": 0.0601, - "step": 54810 - }, - { - "epoch": 1.3914202309937809, - "grad_norm": 0.8031744360923767, - "learning_rate": 1.0723865126708128e-05, - "loss": 0.0597, - "step": 54815 - }, - { - "epoch": 1.3915471506536363, - "grad_norm": 0.45300471782684326, - "learning_rate": 1.0723018995642426e-05, - "loss": 0.0559, - "step": 54820 - }, - { - "epoch": 1.3916740703134916, - "grad_norm": 0.5895292162895203, - "learning_rate": 1.0722172864576723e-05, - "loss": 0.0398, - "step": 54825 - }, - { - "epoch": 1.3918009899733468, - "grad_norm": 0.4709932804107666, - "learning_rate": 1.0721326733511021e-05, - "loss": 0.0536, - "step": 54830 - }, - { - "epoch": 1.3919279096332022, - "grad_norm": 0.6527802348136902, - "learning_rate": 1.072048060244532e-05, - "loss": 0.048, - "step": 54835 - }, - { - "epoch": 1.3920548292930575, - "grad_norm": 0.5338982939720154, - "learning_rate": 1.0719634471379618e-05, - "loss": 0.0494, - "step": 54840 - }, - { - "epoch": 1.392181748952913, - "grad_norm": 0.3266105055809021, - "learning_rate": 1.0718788340313915e-05, - "loss": 0.0322, - "step": 54845 - }, - { - "epoch": 1.3923086686127681, - "grad_norm": 0.6051393151283264, - "learning_rate": 1.0717942209248213e-05, - "loss": 0.0563, - "step": 54850 - }, - { - "epoch": 1.3924355882726234, - "grad_norm": 0.4770505428314209, - "learning_rate": 1.0717096078182512e-05, - "loss": 0.0561, - "step": 54855 - }, - { - "epoch": 1.3925625079324788, - "grad_norm": 0.4646206796169281, - "learning_rate": 1.071624994711681e-05, - "loss": 0.0471, - "step": 54860 - }, - { - "epoch": 1.392689427592334, - "grad_norm": 0.5704447627067566, - "learning_rate": 1.0715403816051107e-05, - "loss": 0.0341, - "step": 54865 - }, - { - "epoch": 1.3928163472521895, - "grad_norm": 0.840937614440918, - "learning_rate": 1.0714557684985405e-05, - "loss": 0.0483, - "step": 54870 - }, - { - "epoch": 1.3929432669120447, - "grad_norm": 0.5263403058052063, - "learning_rate": 1.0713711553919704e-05, - "loss": 0.0522, - "step": 54875 - }, - { - "epoch": 1.3930701865719, - "grad_norm": 0.456423819065094, - "learning_rate": 1.0712865422854002e-05, - "loss": 0.0346, - "step": 54880 - }, - { - "epoch": 1.3931971062317552, - "grad_norm": 0.4313506484031677, - "learning_rate": 1.0712019291788299e-05, - "loss": 0.0439, - "step": 54885 - }, - { - "epoch": 1.3933240258916106, - "grad_norm": 0.3219373822212219, - "learning_rate": 1.0711173160722597e-05, - "loss": 0.0347, - "step": 54890 - }, - { - "epoch": 1.3934509455514659, - "grad_norm": 0.37717369198799133, - "learning_rate": 1.0710327029656896e-05, - "loss": 0.0514, - "step": 54895 - }, - { - "epoch": 1.3935778652113213, - "grad_norm": 0.4645342528820038, - "learning_rate": 1.0709480898591194e-05, - "loss": 0.0518, - "step": 54900 - }, - { - "epoch": 1.3937047848711765, - "grad_norm": 0.6083818078041077, - "learning_rate": 1.070863476752549e-05, - "loss": 0.0385, - "step": 54905 - }, - { - "epoch": 1.3938317045310318, - "grad_norm": 0.445985347032547, - "learning_rate": 1.0707788636459789e-05, - "loss": 0.0486, - "step": 54910 - }, - { - "epoch": 1.3939586241908872, - "grad_norm": 0.28502947092056274, - "learning_rate": 1.0706942505394087e-05, - "loss": 0.0343, - "step": 54915 - }, - { - "epoch": 1.3940855438507425, - "grad_norm": 0.48621538281440735, - "learning_rate": 1.0706096374328386e-05, - "loss": 0.0341, - "step": 54920 - }, - { - "epoch": 1.394212463510598, - "grad_norm": 0.44121313095092773, - "learning_rate": 1.0705250243262681e-05, - "loss": 0.0473, - "step": 54925 - }, - { - "epoch": 1.3943393831704531, - "grad_norm": 0.4535137414932251, - "learning_rate": 1.070440411219698e-05, - "loss": 0.037, - "step": 54930 - }, - { - "epoch": 1.3944663028303084, - "grad_norm": 0.39137372374534607, - "learning_rate": 1.0703557981131278e-05, - "loss": 0.0534, - "step": 54935 - }, - { - "epoch": 1.3945932224901636, - "grad_norm": 0.26158714294433594, - "learning_rate": 1.0702711850065576e-05, - "loss": 0.0442, - "step": 54940 - }, - { - "epoch": 1.394720142150019, - "grad_norm": 0.48892682790756226, - "learning_rate": 1.0701865718999873e-05, - "loss": 0.0343, - "step": 54945 - }, - { - "epoch": 1.3948470618098743, - "grad_norm": 0.35977232456207275, - "learning_rate": 1.0701019587934171e-05, - "loss": 0.0425, - "step": 54950 - }, - { - "epoch": 1.3949739814697297, - "grad_norm": 0.5531177520751953, - "learning_rate": 1.070017345686847e-05, - "loss": 0.0483, - "step": 54955 - }, - { - "epoch": 1.395100901129585, - "grad_norm": 0.5615354776382446, - "learning_rate": 1.0699327325802768e-05, - "loss": 0.0289, - "step": 54960 - }, - { - "epoch": 1.3952278207894402, - "grad_norm": 0.5400334596633911, - "learning_rate": 1.0698481194737065e-05, - "loss": 0.0429, - "step": 54965 - }, - { - "epoch": 1.3953547404492956, - "grad_norm": 0.2968641221523285, - "learning_rate": 1.0697635063671363e-05, - "loss": 0.032, - "step": 54970 - }, - { - "epoch": 1.3954816601091509, - "grad_norm": 0.39486563205718994, - "learning_rate": 1.0696788932605661e-05, - "loss": 0.0459, - "step": 54975 - }, - { - "epoch": 1.3956085797690063, - "grad_norm": 0.595596969127655, - "learning_rate": 1.069594280153996e-05, - "loss": 0.0572, - "step": 54980 - }, - { - "epoch": 1.3957354994288615, - "grad_norm": 0.366106778383255, - "learning_rate": 1.0695096670474257e-05, - "loss": 0.0561, - "step": 54985 - }, - { - "epoch": 1.3958624190887168, - "grad_norm": 0.6310754418373108, - "learning_rate": 1.0694250539408555e-05, - "loss": 0.0589, - "step": 54990 - }, - { - "epoch": 1.3959893387485722, - "grad_norm": 0.4698925316333771, - "learning_rate": 1.0693404408342853e-05, - "loss": 0.0365, - "step": 54995 - }, - { - "epoch": 1.3961162584084275, - "grad_norm": 0.4090391993522644, - "learning_rate": 1.0692558277277152e-05, - "loss": 0.0358, - "step": 55000 - }, - { - "epoch": 1.396243178068283, - "grad_norm": 0.4518356919288635, - "learning_rate": 1.0691712146211449e-05, - "loss": 0.038, - "step": 55005 - }, - { - "epoch": 1.3963700977281381, - "grad_norm": 0.4485310912132263, - "learning_rate": 1.0690866015145747e-05, - "loss": 0.0406, - "step": 55010 - }, - { - "epoch": 1.3964970173879934, - "grad_norm": 0.6583123803138733, - "learning_rate": 1.0690019884080045e-05, - "loss": 0.0659, - "step": 55015 - }, - { - "epoch": 1.3966239370478486, - "grad_norm": 0.799342930316925, - "learning_rate": 1.0689173753014344e-05, - "loss": 0.0352, - "step": 55020 - }, - { - "epoch": 1.396750856707704, - "grad_norm": 0.5848604440689087, - "learning_rate": 1.068832762194864e-05, - "loss": 0.047, - "step": 55025 - }, - { - "epoch": 1.3968777763675593, - "grad_norm": 0.5096336603164673, - "learning_rate": 1.0687481490882939e-05, - "loss": 0.0487, - "step": 55030 - }, - { - "epoch": 1.3970046960274147, - "grad_norm": 0.4147201478481293, - "learning_rate": 1.0686635359817237e-05, - "loss": 0.0359, - "step": 55035 - }, - { - "epoch": 1.39713161568727, - "grad_norm": 0.3461834192276001, - "learning_rate": 1.0685789228751536e-05, - "loss": 0.0354, - "step": 55040 - }, - { - "epoch": 1.3972585353471252, - "grad_norm": 0.4471239149570465, - "learning_rate": 1.0684943097685832e-05, - "loss": 0.0317, - "step": 55045 - }, - { - "epoch": 1.3973854550069806, - "grad_norm": 0.4565145671367645, - "learning_rate": 1.068409696662013e-05, - "loss": 0.0406, - "step": 55050 - }, - { - "epoch": 1.3975123746668359, - "grad_norm": 0.29571691155433655, - "learning_rate": 1.0683250835554429e-05, - "loss": 0.0443, - "step": 55055 - }, - { - "epoch": 1.3976392943266913, - "grad_norm": 0.3088287115097046, - "learning_rate": 1.0682404704488728e-05, - "loss": 0.0486, - "step": 55060 - }, - { - "epoch": 1.3977662139865465, - "grad_norm": 0.5220115184783936, - "learning_rate": 1.0681558573423023e-05, - "loss": 0.0442, - "step": 55065 - }, - { - "epoch": 1.3978931336464018, - "grad_norm": 0.5304118990898132, - "learning_rate": 1.0680712442357321e-05, - "loss": 0.0493, - "step": 55070 - }, - { - "epoch": 1.398020053306257, - "grad_norm": 0.4639875292778015, - "learning_rate": 1.067986631129162e-05, - "loss": 0.0388, - "step": 55075 - }, - { - "epoch": 1.3981469729661125, - "grad_norm": 0.7325186729431152, - "learning_rate": 1.0679020180225918e-05, - "loss": 0.0354, - "step": 55080 - }, - { - "epoch": 1.3982738926259677, - "grad_norm": 0.43298661708831787, - "learning_rate": 1.0678174049160214e-05, - "loss": 0.0725, - "step": 55085 - }, - { - "epoch": 1.3984008122858231, - "grad_norm": 3.023315906524658, - "learning_rate": 1.0677327918094513e-05, - "loss": 0.0633, - "step": 55090 - }, - { - "epoch": 1.3985277319456784, - "grad_norm": 0.28572410345077515, - "learning_rate": 1.0676481787028811e-05, - "loss": 0.0379, - "step": 55095 - }, - { - "epoch": 1.3986546516055336, - "grad_norm": 1.3292096853256226, - "learning_rate": 1.067563565596311e-05, - "loss": 0.0402, - "step": 55100 - }, - { - "epoch": 1.398781571265389, - "grad_norm": 0.530971348285675, - "learning_rate": 1.0674789524897406e-05, - "loss": 0.0378, - "step": 55105 - }, - { - "epoch": 1.3989084909252443, - "grad_norm": 0.4313793182373047, - "learning_rate": 1.0673943393831705e-05, - "loss": 0.0418, - "step": 55110 - }, - { - "epoch": 1.3990354105850997, - "grad_norm": 0.30757564306259155, - "learning_rate": 1.0673097262766003e-05, - "loss": 0.0503, - "step": 55115 - }, - { - "epoch": 1.399162330244955, - "grad_norm": 0.34457507729530334, - "learning_rate": 1.0672251131700302e-05, - "loss": 0.0334, - "step": 55120 - }, - { - "epoch": 1.3992892499048102, - "grad_norm": 0.3911019563674927, - "learning_rate": 1.0671405000634598e-05, - "loss": 0.0469, - "step": 55125 - }, - { - "epoch": 1.3994161695646656, - "grad_norm": 0.46548399329185486, - "learning_rate": 1.0670558869568897e-05, - "loss": 0.0312, - "step": 55130 - }, - { - "epoch": 1.3995430892245209, - "grad_norm": 0.6377719640731812, - "learning_rate": 1.0669712738503195e-05, - "loss": 0.043, - "step": 55135 - }, - { - "epoch": 1.3996700088843763, - "grad_norm": 1.8095251321792603, - "learning_rate": 1.0668866607437494e-05, - "loss": 0.045, - "step": 55140 - }, - { - "epoch": 1.3997969285442315, - "grad_norm": 0.45798879861831665, - "learning_rate": 1.066802047637179e-05, - "loss": 0.0424, - "step": 55145 - }, - { - "epoch": 1.3999238482040868, - "grad_norm": 0.45520082116127014, - "learning_rate": 1.0667174345306089e-05, - "loss": 0.042, - "step": 55150 - }, - { - "epoch": 1.400050767863942, - "grad_norm": 0.406439870595932, - "learning_rate": 1.0666328214240387e-05, - "loss": 0.0459, - "step": 55155 - }, - { - "epoch": 1.4001776875237975, - "grad_norm": 0.579373300075531, - "learning_rate": 1.0665482083174685e-05, - "loss": 0.039, - "step": 55160 - }, - { - "epoch": 1.4003046071836527, - "grad_norm": 0.4781292974948883, - "learning_rate": 1.0664635952108982e-05, - "loss": 0.0379, - "step": 55165 - }, - { - "epoch": 1.4004315268435081, - "grad_norm": 0.8100433945655823, - "learning_rate": 1.066378982104328e-05, - "loss": 0.0486, - "step": 55170 - }, - { - "epoch": 1.4005584465033634, - "grad_norm": 0.4993525743484497, - "learning_rate": 1.0662943689977579e-05, - "loss": 0.0522, - "step": 55175 - }, - { - "epoch": 1.4006853661632186, - "grad_norm": 0.33322083950042725, - "learning_rate": 1.0662097558911877e-05, - "loss": 0.0289, - "step": 55180 - }, - { - "epoch": 1.400812285823074, - "grad_norm": 0.357845276594162, - "learning_rate": 1.0661251427846172e-05, - "loss": 0.043, - "step": 55185 - }, - { - "epoch": 1.4009392054829293, - "grad_norm": 0.6835535764694214, - "learning_rate": 1.0660405296780472e-05, - "loss": 0.0319, - "step": 55190 - }, - { - "epoch": 1.4010661251427847, - "grad_norm": 0.41906827688217163, - "learning_rate": 1.065955916571477e-05, - "loss": 0.0478, - "step": 55195 - }, - { - "epoch": 1.40119304480264, - "grad_norm": 0.37586238980293274, - "learning_rate": 1.065871303464907e-05, - "loss": 0.0369, - "step": 55200 - }, - { - "epoch": 1.4013199644624952, - "grad_norm": 0.3650151491165161, - "learning_rate": 1.0657866903583364e-05, - "loss": 0.0296, - "step": 55205 - }, - { - "epoch": 1.4014468841223506, - "grad_norm": 0.502247154712677, - "learning_rate": 1.0657020772517663e-05, - "loss": 0.047, - "step": 55210 - }, - { - "epoch": 1.4015738037822059, - "grad_norm": 0.3173270523548126, - "learning_rate": 1.0656174641451961e-05, - "loss": 0.0446, - "step": 55215 - }, - { - "epoch": 1.4017007234420613, - "grad_norm": 0.45315831899642944, - "learning_rate": 1.065532851038626e-05, - "loss": 0.062, - "step": 55220 - }, - { - "epoch": 1.4018276431019165, - "grad_norm": 0.24827320873737335, - "learning_rate": 1.065448237932056e-05, - "loss": 0.0513, - "step": 55225 - }, - { - "epoch": 1.4019545627617718, - "grad_norm": 0.5030393600463867, - "learning_rate": 1.0653636248254855e-05, - "loss": 0.0348, - "step": 55230 - }, - { - "epoch": 1.402081482421627, - "grad_norm": 0.49238550662994385, - "learning_rate": 1.0652790117189153e-05, - "loss": 0.0384, - "step": 55235 - }, - { - "epoch": 1.4022084020814825, - "grad_norm": 0.3332418203353882, - "learning_rate": 1.0651943986123451e-05, - "loss": 0.0248, - "step": 55240 - }, - { - "epoch": 1.4023353217413377, - "grad_norm": 0.6689284443855286, - "learning_rate": 1.065109785505775e-05, - "loss": 0.0287, - "step": 55245 - }, - { - "epoch": 1.4024622414011931, - "grad_norm": 0.5293350219726562, - "learning_rate": 1.0650251723992046e-05, - "loss": 0.0445, - "step": 55250 - }, - { - "epoch": 1.4025891610610484, - "grad_norm": 0.5664911270141602, - "learning_rate": 1.0649405592926345e-05, - "loss": 0.0542, - "step": 55255 - }, - { - "epoch": 1.4027160807209036, - "grad_norm": 0.2907988131046295, - "learning_rate": 1.0648559461860643e-05, - "loss": 0.0327, - "step": 55260 - }, - { - "epoch": 1.402843000380759, - "grad_norm": 0.4460514485836029, - "learning_rate": 1.0647713330794942e-05, - "loss": 0.0434, - "step": 55265 - }, - { - "epoch": 1.4029699200406143, - "grad_norm": 0.5234701633453369, - "learning_rate": 1.0646867199729238e-05, - "loss": 0.0286, - "step": 55270 - }, - { - "epoch": 1.4030968397004697, - "grad_norm": 0.510266125202179, - "learning_rate": 1.0646021068663537e-05, - "loss": 0.0646, - "step": 55275 - }, - { - "epoch": 1.403223759360325, - "grad_norm": 0.3846030831336975, - "learning_rate": 1.0645174937597835e-05, - "loss": 0.0433, - "step": 55280 - }, - { - "epoch": 1.4033506790201802, - "grad_norm": 0.4012121260166168, - "learning_rate": 1.0644328806532134e-05, - "loss": 0.0421, - "step": 55285 - }, - { - "epoch": 1.4034775986800354, - "grad_norm": 0.36582985520362854, - "learning_rate": 1.064348267546643e-05, - "loss": 0.0456, - "step": 55290 - }, - { - "epoch": 1.4036045183398909, - "grad_norm": 0.5988675951957703, - "learning_rate": 1.0642636544400729e-05, - "loss": 0.0517, - "step": 55295 - }, - { - "epoch": 1.403731437999746, - "grad_norm": 0.5367146730422974, - "learning_rate": 1.0641790413335027e-05, - "loss": 0.0551, - "step": 55300 - }, - { - "epoch": 1.4038583576596015, - "grad_norm": 0.5274094343185425, - "learning_rate": 1.0640944282269326e-05, - "loss": 0.0537, - "step": 55305 - }, - { - "epoch": 1.4039852773194568, - "grad_norm": 0.34550976753234863, - "learning_rate": 1.0640098151203622e-05, - "loss": 0.0455, - "step": 55310 - }, - { - "epoch": 1.404112196979312, - "grad_norm": 0.40197065472602844, - "learning_rate": 1.063925202013792e-05, - "loss": 0.0498, - "step": 55315 - }, - { - "epoch": 1.4042391166391675, - "grad_norm": 0.3327765166759491, - "learning_rate": 1.0638405889072219e-05, - "loss": 0.0631, - "step": 55320 - }, - { - "epoch": 1.4043660362990227, - "grad_norm": 0.31068676710128784, - "learning_rate": 1.0637559758006517e-05, - "loss": 0.0333, - "step": 55325 - }, - { - "epoch": 1.4044929559588781, - "grad_norm": 0.44175633788108826, - "learning_rate": 1.0636713626940814e-05, - "loss": 0.0499, - "step": 55330 - }, - { - "epoch": 1.4046198756187334, - "grad_norm": 0.4191090762615204, - "learning_rate": 1.0635867495875113e-05, - "loss": 0.031, - "step": 55335 - }, - { - "epoch": 1.4047467952785886, - "grad_norm": 0.44463419914245605, - "learning_rate": 1.0635021364809411e-05, - "loss": 0.0605, - "step": 55340 - }, - { - "epoch": 1.404873714938444, - "grad_norm": 0.36490827798843384, - "learning_rate": 1.063417523374371e-05, - "loss": 0.0484, - "step": 55345 - }, - { - "epoch": 1.4050006345982993, - "grad_norm": 0.47686219215393066, - "learning_rate": 1.0633329102678004e-05, - "loss": 0.0405, - "step": 55350 - }, - { - "epoch": 1.4051275542581547, - "grad_norm": 0.29133355617523193, - "learning_rate": 1.0632482971612303e-05, - "loss": 0.0272, - "step": 55355 - }, - { - "epoch": 1.40525447391801, - "grad_norm": 0.5232179760932922, - "learning_rate": 1.0631636840546601e-05, - "loss": 0.0288, - "step": 55360 - }, - { - "epoch": 1.4053813935778652, - "grad_norm": 0.46086814999580383, - "learning_rate": 1.0630790709480901e-05, - "loss": 0.0328, - "step": 55365 - }, - { - "epoch": 1.4055083132377204, - "grad_norm": 0.7383076548576355, - "learning_rate": 1.0629944578415196e-05, - "loss": 0.0373, - "step": 55370 - }, - { - "epoch": 1.4056352328975759, - "grad_norm": 0.4345282018184662, - "learning_rate": 1.0629098447349495e-05, - "loss": 0.0402, - "step": 55375 - }, - { - "epoch": 1.405762152557431, - "grad_norm": 0.7833594679832458, - "learning_rate": 1.0628252316283793e-05, - "loss": 0.0497, - "step": 55380 - }, - { - "epoch": 1.4058890722172865, - "grad_norm": 0.4731292128562927, - "learning_rate": 1.0627406185218091e-05, - "loss": 0.0318, - "step": 55385 - }, - { - "epoch": 1.4060159918771418, - "grad_norm": 0.3182207942008972, - "learning_rate": 1.0626560054152388e-05, - "loss": 0.0213, - "step": 55390 - }, - { - "epoch": 1.406142911536997, - "grad_norm": 0.7674030661582947, - "learning_rate": 1.0625713923086687e-05, - "loss": 0.0367, - "step": 55395 - }, - { - "epoch": 1.4062698311968524, - "grad_norm": 0.3759962022304535, - "learning_rate": 1.0624867792020985e-05, - "loss": 0.0423, - "step": 55400 - }, - { - "epoch": 1.4063967508567077, - "grad_norm": 0.3207372725009918, - "learning_rate": 1.0624021660955283e-05, - "loss": 0.0428, - "step": 55405 - }, - { - "epoch": 1.4065236705165631, - "grad_norm": 0.43908801674842834, - "learning_rate": 1.062317552988958e-05, - "loss": 0.0442, - "step": 55410 - }, - { - "epoch": 1.4066505901764184, - "grad_norm": 0.6346955299377441, - "learning_rate": 1.0622329398823879e-05, - "loss": 0.0486, - "step": 55415 - }, - { - "epoch": 1.4067775098362736, - "grad_norm": 0.36437177658081055, - "learning_rate": 1.0621483267758177e-05, - "loss": 0.0505, - "step": 55420 - }, - { - "epoch": 1.4069044294961288, - "grad_norm": 0.1915905475616455, - "learning_rate": 1.0620637136692475e-05, - "loss": 0.0464, - "step": 55425 - }, - { - "epoch": 1.4070313491559843, - "grad_norm": 0.4024757146835327, - "learning_rate": 1.0619791005626772e-05, - "loss": 0.0406, - "step": 55430 - }, - { - "epoch": 1.4071582688158395, - "grad_norm": 1.1408355236053467, - "learning_rate": 1.061894487456107e-05, - "loss": 0.0384, - "step": 55435 - }, - { - "epoch": 1.407285188475695, - "grad_norm": 0.494718074798584, - "learning_rate": 1.0618098743495369e-05, - "loss": 0.0392, - "step": 55440 - }, - { - "epoch": 1.4074121081355502, - "grad_norm": 0.470222532749176, - "learning_rate": 1.0617252612429667e-05, - "loss": 0.0425, - "step": 55445 - }, - { - "epoch": 1.4075390277954054, - "grad_norm": 0.4357333481311798, - "learning_rate": 1.0616406481363964e-05, - "loss": 0.03, - "step": 55450 - }, - { - "epoch": 1.4076659474552609, - "grad_norm": 1.078187108039856, - "learning_rate": 1.0615560350298262e-05, - "loss": 0.055, - "step": 55455 - }, - { - "epoch": 1.407792867115116, - "grad_norm": 0.3887692391872406, - "learning_rate": 1.061471421923256e-05, - "loss": 0.0452, - "step": 55460 - }, - { - "epoch": 1.4079197867749715, - "grad_norm": 0.6248031854629517, - "learning_rate": 1.0613868088166859e-05, - "loss": 0.0547, - "step": 55465 - }, - { - "epoch": 1.4080467064348268, - "grad_norm": 0.6368305087089539, - "learning_rate": 1.0613021957101156e-05, - "loss": 0.0391, - "step": 55470 - }, - { - "epoch": 1.408173626094682, - "grad_norm": 0.7316281795501709, - "learning_rate": 1.0612175826035454e-05, - "loss": 0.0351, - "step": 55475 - }, - { - "epoch": 1.4083005457545374, - "grad_norm": 1.7395769357681274, - "learning_rate": 1.0611329694969753e-05, - "loss": 0.031, - "step": 55480 - }, - { - "epoch": 1.4084274654143927, - "grad_norm": 1.5869712829589844, - "learning_rate": 1.0610483563904051e-05, - "loss": 0.0618, - "step": 55485 - }, - { - "epoch": 1.4085543850742481, - "grad_norm": 0.6263651251792908, - "learning_rate": 1.0609637432838346e-05, - "loss": 0.0539, - "step": 55490 - }, - { - "epoch": 1.4086813047341034, - "grad_norm": 0.3739353120326996, - "learning_rate": 1.0608791301772644e-05, - "loss": 0.0614, - "step": 55495 - }, - { - "epoch": 1.4088082243939586, - "grad_norm": 1.6368530988693237, - "learning_rate": 1.0607945170706943e-05, - "loss": 0.0557, - "step": 55500 - }, - { - "epoch": 1.4089351440538138, - "grad_norm": 0.70174241065979, - "learning_rate": 1.0607099039641241e-05, - "loss": 0.0366, - "step": 55505 - }, - { - "epoch": 1.4090620637136693, - "grad_norm": 0.5474753975868225, - "learning_rate": 1.0606252908575538e-05, - "loss": 0.0481, - "step": 55510 - }, - { - "epoch": 1.4091889833735245, - "grad_norm": 0.5980707406997681, - "learning_rate": 1.0605406777509836e-05, - "loss": 0.0464, - "step": 55515 - }, - { - "epoch": 1.40931590303338, - "grad_norm": 0.47189223766326904, - "learning_rate": 1.0604560646444135e-05, - "loss": 0.038, - "step": 55520 - }, - { - "epoch": 1.4094428226932352, - "grad_norm": 0.5363193154335022, - "learning_rate": 1.0603714515378433e-05, - "loss": 0.0438, - "step": 55525 - }, - { - "epoch": 1.4095697423530904, - "grad_norm": 0.5871829390525818, - "learning_rate": 1.060286838431273e-05, - "loss": 0.055, - "step": 55530 - }, - { - "epoch": 1.4096966620129459, - "grad_norm": 0.36411571502685547, - "learning_rate": 1.0602022253247028e-05, - "loss": 0.0341, - "step": 55535 - }, - { - "epoch": 1.409823581672801, - "grad_norm": 0.5314422249794006, - "learning_rate": 1.0601176122181327e-05, - "loss": 0.0515, - "step": 55540 - }, - { - "epoch": 1.4099505013326565, - "grad_norm": 0.3663199245929718, - "learning_rate": 1.0600329991115625e-05, - "loss": 0.0366, - "step": 55545 - }, - { - "epoch": 1.4100774209925118, - "grad_norm": 0.4242531657218933, - "learning_rate": 1.0599483860049922e-05, - "loss": 0.0434, - "step": 55550 - }, - { - "epoch": 1.410204340652367, - "grad_norm": 0.497484415769577, - "learning_rate": 1.059863772898422e-05, - "loss": 0.0315, - "step": 55555 - }, - { - "epoch": 1.4103312603122224, - "grad_norm": 0.3888702988624573, - "learning_rate": 1.0597791597918519e-05, - "loss": 0.0475, - "step": 55560 - }, - { - "epoch": 1.4104581799720777, - "grad_norm": 0.5225132703781128, - "learning_rate": 1.0596945466852817e-05, - "loss": 0.0473, - "step": 55565 - }, - { - "epoch": 1.410585099631933, - "grad_norm": 0.5145633816719055, - "learning_rate": 1.0596099335787114e-05, - "loss": 0.0657, - "step": 55570 - }, - { - "epoch": 1.4107120192917884, - "grad_norm": 0.4441062808036804, - "learning_rate": 1.0595253204721412e-05, - "loss": 0.0389, - "step": 55575 - }, - { - "epoch": 1.4108389389516436, - "grad_norm": 0.5639595985412598, - "learning_rate": 1.059440707365571e-05, - "loss": 0.0309, - "step": 55580 - }, - { - "epoch": 1.4109658586114988, - "grad_norm": 0.42478370666503906, - "learning_rate": 1.0593560942590009e-05, - "loss": 0.0295, - "step": 55585 - }, - { - "epoch": 1.4110927782713543, - "grad_norm": 0.35218045115470886, - "learning_rate": 1.0592714811524306e-05, - "loss": 0.0289, - "step": 55590 - }, - { - "epoch": 1.4112196979312095, - "grad_norm": 0.8284274339675903, - "learning_rate": 1.0591868680458604e-05, - "loss": 0.0513, - "step": 55595 - }, - { - "epoch": 1.411346617591065, - "grad_norm": 0.40589776635169983, - "learning_rate": 1.0591022549392902e-05, - "loss": 0.0569, - "step": 55600 - }, - { - "epoch": 1.4114735372509202, - "grad_norm": 0.592866063117981, - "learning_rate": 1.05901764183272e-05, - "loss": 0.0423, - "step": 55605 - }, - { - "epoch": 1.4116004569107754, - "grad_norm": 0.8443232178688049, - "learning_rate": 1.0589330287261496e-05, - "loss": 0.0398, - "step": 55610 - }, - { - "epoch": 1.4117273765706309, - "grad_norm": 0.47554880380630493, - "learning_rate": 1.0588484156195796e-05, - "loss": 0.0425, - "step": 55615 - }, - { - "epoch": 1.411854296230486, - "grad_norm": 0.2487233281135559, - "learning_rate": 1.0587638025130094e-05, - "loss": 0.0344, - "step": 55620 - }, - { - "epoch": 1.4119812158903415, - "grad_norm": 0.32445961236953735, - "learning_rate": 1.0586791894064393e-05, - "loss": 0.0489, - "step": 55625 - }, - { - "epoch": 1.4121081355501968, - "grad_norm": 0.5747373700141907, - "learning_rate": 1.0585945762998688e-05, - "loss": 0.0357, - "step": 55630 - }, - { - "epoch": 1.412235055210052, - "grad_norm": 0.5054619312286377, - "learning_rate": 1.0585099631932986e-05, - "loss": 0.0533, - "step": 55635 - }, - { - "epoch": 1.4123619748699072, - "grad_norm": 0.3967200517654419, - "learning_rate": 1.0584253500867285e-05, - "loss": 0.0291, - "step": 55640 - }, - { - "epoch": 1.4124888945297627, - "grad_norm": 0.23160120844841003, - "learning_rate": 1.0583407369801583e-05, - "loss": 0.0559, - "step": 55645 - }, - { - "epoch": 1.412615814189618, - "grad_norm": 0.3936816155910492, - "learning_rate": 1.058256123873588e-05, - "loss": 0.0429, - "step": 55650 - }, - { - "epoch": 1.4127427338494734, - "grad_norm": 0.5428242683410645, - "learning_rate": 1.0581715107670178e-05, - "loss": 0.0504, - "step": 55655 - }, - { - "epoch": 1.4128696535093286, - "grad_norm": 0.29157182574272156, - "learning_rate": 1.0580868976604476e-05, - "loss": 0.0277, - "step": 55660 - }, - { - "epoch": 1.4129965731691838, - "grad_norm": 0.3884866535663605, - "learning_rate": 1.0580022845538775e-05, - "loss": 0.0463, - "step": 55665 - }, - { - "epoch": 1.4131234928290393, - "grad_norm": 0.5760250687599182, - "learning_rate": 1.0579176714473072e-05, - "loss": 0.0404, - "step": 55670 - }, - { - "epoch": 1.4132504124888945, - "grad_norm": 0.4170297086238861, - "learning_rate": 1.057833058340737e-05, - "loss": 0.029, - "step": 55675 - }, - { - "epoch": 1.41337733214875, - "grad_norm": 0.49008139967918396, - "learning_rate": 1.0577484452341668e-05, - "loss": 0.0443, - "step": 55680 - }, - { - "epoch": 1.4135042518086052, - "grad_norm": 0.49738550186157227, - "learning_rate": 1.0576638321275967e-05, - "loss": 0.0623, - "step": 55685 - }, - { - "epoch": 1.4136311714684604, - "grad_norm": 0.4535476565361023, - "learning_rate": 1.0575792190210264e-05, - "loss": 0.0346, - "step": 55690 - }, - { - "epoch": 1.4137580911283159, - "grad_norm": 0.3513830006122589, - "learning_rate": 1.0574946059144562e-05, - "loss": 0.0294, - "step": 55695 - }, - { - "epoch": 1.413885010788171, - "grad_norm": 0.4812723994255066, - "learning_rate": 1.057409992807886e-05, - "loss": 0.0317, - "step": 55700 - }, - { - "epoch": 1.4140119304480265, - "grad_norm": 0.6118952631950378, - "learning_rate": 1.0573253797013159e-05, - "loss": 0.0533, - "step": 55705 - }, - { - "epoch": 1.4141388501078818, - "grad_norm": 0.40380290150642395, - "learning_rate": 1.0572407665947455e-05, - "loss": 0.0514, - "step": 55710 - }, - { - "epoch": 1.414265769767737, - "grad_norm": 0.5271044373512268, - "learning_rate": 1.0571561534881754e-05, - "loss": 0.0392, - "step": 55715 - }, - { - "epoch": 1.4143926894275922, - "grad_norm": 0.6026173830032349, - "learning_rate": 1.0570715403816052e-05, - "loss": 0.0495, - "step": 55720 - }, - { - "epoch": 1.4145196090874477, - "grad_norm": 0.46896085143089294, - "learning_rate": 1.056986927275035e-05, - "loss": 0.0459, - "step": 55725 - }, - { - "epoch": 1.414646528747303, - "grad_norm": 0.2892122268676758, - "learning_rate": 1.0569023141684647e-05, - "loss": 0.0366, - "step": 55730 - }, - { - "epoch": 1.4147734484071584, - "grad_norm": 0.4904959797859192, - "learning_rate": 1.0568177010618946e-05, - "loss": 0.0415, - "step": 55735 - }, - { - "epoch": 1.4149003680670136, - "grad_norm": 0.36178117990493774, - "learning_rate": 1.0567330879553244e-05, - "loss": 0.0454, - "step": 55740 - }, - { - "epoch": 1.4150272877268688, - "grad_norm": 0.33807557821273804, - "learning_rate": 1.0566484748487543e-05, - "loss": 0.0369, - "step": 55745 - }, - { - "epoch": 1.4151542073867243, - "grad_norm": 0.3211967647075653, - "learning_rate": 1.0565638617421841e-05, - "loss": 0.0291, - "step": 55750 - }, - { - "epoch": 1.4152811270465795, - "grad_norm": 0.5579877495765686, - "learning_rate": 1.0564792486356138e-05, - "loss": 0.0452, - "step": 55755 - }, - { - "epoch": 1.415408046706435, - "grad_norm": 0.6059853434562683, - "learning_rate": 1.0563946355290436e-05, - "loss": 0.0438, - "step": 55760 - }, - { - "epoch": 1.4155349663662902, - "grad_norm": 0.4436766505241394, - "learning_rate": 1.0563100224224734e-05, - "loss": 0.0477, - "step": 55765 - }, - { - "epoch": 1.4156618860261454, - "grad_norm": 0.26150986552238464, - "learning_rate": 1.0562254093159033e-05, - "loss": 0.038, - "step": 55770 - }, - { - "epoch": 1.4157888056860006, - "grad_norm": 0.6085768342018127, - "learning_rate": 1.0561407962093328e-05, - "loss": 0.0476, - "step": 55775 - }, - { - "epoch": 1.415915725345856, - "grad_norm": 1.4162802696228027, - "learning_rate": 1.0560561831027626e-05, - "loss": 0.0404, - "step": 55780 - }, - { - "epoch": 1.4160426450057113, - "grad_norm": 0.43222731351852417, - "learning_rate": 1.0559715699961925e-05, - "loss": 0.0397, - "step": 55785 - }, - { - "epoch": 1.4161695646655668, - "grad_norm": 0.25348007678985596, - "learning_rate": 1.0558869568896225e-05, - "loss": 0.0444, - "step": 55790 - }, - { - "epoch": 1.416296484325422, - "grad_norm": 0.46034055948257446, - "learning_rate": 1.055802343783052e-05, - "loss": 0.0363, - "step": 55795 - }, - { - "epoch": 1.4164234039852772, - "grad_norm": 0.5457726120948792, - "learning_rate": 1.0557177306764818e-05, - "loss": 0.0682, - "step": 55800 - }, - { - "epoch": 1.4165503236451327, - "grad_norm": 0.42890918254852295, - "learning_rate": 1.0556331175699117e-05, - "loss": 0.0466, - "step": 55805 - }, - { - "epoch": 1.416677243304988, - "grad_norm": 0.3383115530014038, - "learning_rate": 1.0555485044633415e-05, - "loss": 0.0492, - "step": 55810 - }, - { - "epoch": 1.4168041629648433, - "grad_norm": 0.4270532429218292, - "learning_rate": 1.0554638913567712e-05, - "loss": 0.0503, - "step": 55815 - }, - { - "epoch": 1.4169310826246986, - "grad_norm": 0.3480062782764435, - "learning_rate": 1.055379278250201e-05, - "loss": 0.0363, - "step": 55820 - }, - { - "epoch": 1.4170580022845538, - "grad_norm": 0.4408123195171356, - "learning_rate": 1.0552946651436309e-05, - "loss": 0.0477, - "step": 55825 - }, - { - "epoch": 1.4171849219444093, - "grad_norm": 0.39010873436927795, - "learning_rate": 1.0552100520370607e-05, - "loss": 0.0478, - "step": 55830 - }, - { - "epoch": 1.4173118416042645, - "grad_norm": 0.2808758318424225, - "learning_rate": 1.0551254389304904e-05, - "loss": 0.0434, - "step": 55835 - }, - { - "epoch": 1.41743876126412, - "grad_norm": 0.4004443287849426, - "learning_rate": 1.0550408258239202e-05, - "loss": 0.0434, - "step": 55840 - }, - { - "epoch": 1.4175656809239752, - "grad_norm": 0.34843572974205017, - "learning_rate": 1.05495621271735e-05, - "loss": 0.0412, - "step": 55845 - }, - { - "epoch": 1.4176926005838304, - "grad_norm": 0.25377488136291504, - "learning_rate": 1.0548715996107799e-05, - "loss": 0.0329, - "step": 55850 - }, - { - "epoch": 1.4178195202436856, - "grad_norm": 1.0707764625549316, - "learning_rate": 1.0547869865042096e-05, - "loss": 0.0283, - "step": 55855 - }, - { - "epoch": 1.417946439903541, - "grad_norm": 0.6438915729522705, - "learning_rate": 1.0547023733976394e-05, - "loss": 0.0434, - "step": 55860 - }, - { - "epoch": 1.4180733595633963, - "grad_norm": 0.5307868123054504, - "learning_rate": 1.0546177602910692e-05, - "loss": 0.059, - "step": 55865 - }, - { - "epoch": 1.4182002792232518, - "grad_norm": 0.5650908946990967, - "learning_rate": 1.054533147184499e-05, - "loss": 0.0448, - "step": 55870 - }, - { - "epoch": 1.418327198883107, - "grad_norm": 0.45198458433151245, - "learning_rate": 1.0544485340779287e-05, - "loss": 0.0626, - "step": 55875 - }, - { - "epoch": 1.4184541185429622, - "grad_norm": 0.42163893580436707, - "learning_rate": 1.0543639209713586e-05, - "loss": 0.055, - "step": 55880 - }, - { - "epoch": 1.4185810382028177, - "grad_norm": 0.3834121823310852, - "learning_rate": 1.0542793078647884e-05, - "loss": 0.055, - "step": 55885 - }, - { - "epoch": 1.418707957862673, - "grad_norm": 0.6058289408683777, - "learning_rate": 1.0541946947582183e-05, - "loss": 0.0467, - "step": 55890 - }, - { - "epoch": 1.4188348775225283, - "grad_norm": 0.36429280042648315, - "learning_rate": 1.054110081651648e-05, - "loss": 0.041, - "step": 55895 - }, - { - "epoch": 1.4189617971823836, - "grad_norm": 0.25410765409469604, - "learning_rate": 1.0540254685450778e-05, - "loss": 0.0268, - "step": 55900 - }, - { - "epoch": 1.4190887168422388, - "grad_norm": 0.43290361762046814, - "learning_rate": 1.0539408554385076e-05, - "loss": 0.0383, - "step": 55905 - }, - { - "epoch": 1.419215636502094, - "grad_norm": 0.42802196741104126, - "learning_rate": 1.0538562423319375e-05, - "loss": 0.0451, - "step": 55910 - }, - { - "epoch": 1.4193425561619495, - "grad_norm": 0.4703369736671448, - "learning_rate": 1.053771629225367e-05, - "loss": 0.0305, - "step": 55915 - }, - { - "epoch": 1.4194694758218047, - "grad_norm": 1.1461683511734009, - "learning_rate": 1.0536870161187968e-05, - "loss": 0.0428, - "step": 55920 - }, - { - "epoch": 1.4195963954816602, - "grad_norm": 0.32500118017196655, - "learning_rate": 1.0536024030122266e-05, - "loss": 0.0598, - "step": 55925 - }, - { - "epoch": 1.4197233151415154, - "grad_norm": 0.2805018723011017, - "learning_rate": 1.0535177899056565e-05, - "loss": 0.0513, - "step": 55930 - }, - { - "epoch": 1.4198502348013706, - "grad_norm": 0.44849419593811035, - "learning_rate": 1.0534331767990861e-05, - "loss": 0.0371, - "step": 55935 - }, - { - "epoch": 1.419977154461226, - "grad_norm": 0.4139915406703949, - "learning_rate": 1.053348563692516e-05, - "loss": 0.0446, - "step": 55940 - }, - { - "epoch": 1.4201040741210813, - "grad_norm": 0.33174875378608704, - "learning_rate": 1.0532639505859458e-05, - "loss": 0.032, - "step": 55945 - }, - { - "epoch": 1.4202309937809368, - "grad_norm": 0.23415711522102356, - "learning_rate": 1.0531793374793757e-05, - "loss": 0.0411, - "step": 55950 - }, - { - "epoch": 1.420357913440792, - "grad_norm": 0.5807295441627502, - "learning_rate": 1.0530947243728053e-05, - "loss": 0.0516, - "step": 55955 - }, - { - "epoch": 1.4204848331006472, - "grad_norm": 0.5422258377075195, - "learning_rate": 1.0530101112662352e-05, - "loss": 0.047, - "step": 55960 - }, - { - "epoch": 1.4206117527605027, - "grad_norm": 0.394619882106781, - "learning_rate": 1.052925498159665e-05, - "loss": 0.0499, - "step": 55965 - }, - { - "epoch": 1.420738672420358, - "grad_norm": 0.7829577326774597, - "learning_rate": 1.0528408850530949e-05, - "loss": 0.0549, - "step": 55970 - }, - { - "epoch": 1.4208655920802133, - "grad_norm": 0.41978713870048523, - "learning_rate": 1.0527562719465245e-05, - "loss": 0.0374, - "step": 55975 - }, - { - "epoch": 1.4209925117400686, - "grad_norm": 0.44662827253341675, - "learning_rate": 1.0526716588399544e-05, - "loss": 0.0369, - "step": 55980 - }, - { - "epoch": 1.4211194313999238, - "grad_norm": 0.7699069976806641, - "learning_rate": 1.0525870457333842e-05, - "loss": 0.0435, - "step": 55985 - }, - { - "epoch": 1.421246351059779, - "grad_norm": 0.5677546262741089, - "learning_rate": 1.052502432626814e-05, - "loss": 0.0334, - "step": 55990 - }, - { - "epoch": 1.4213732707196345, - "grad_norm": 0.753502368927002, - "learning_rate": 1.0524178195202437e-05, - "loss": 0.0472, - "step": 55995 - }, - { - "epoch": 1.4215001903794897, - "grad_norm": 1.648773431777954, - "learning_rate": 1.0523332064136736e-05, - "loss": 0.038, - "step": 56000 - }, - { - "epoch": 1.4216271100393452, - "grad_norm": 0.46680688858032227, - "learning_rate": 1.0522485933071034e-05, - "loss": 0.0359, - "step": 56005 - }, - { - "epoch": 1.4217540296992004, - "grad_norm": 0.41498851776123047, - "learning_rate": 1.0521639802005332e-05, - "loss": 0.0476, - "step": 56010 - }, - { - "epoch": 1.4218809493590556, - "grad_norm": 0.6210684180259705, - "learning_rate": 1.0520793670939629e-05, - "loss": 0.0417, - "step": 56015 - }, - { - "epoch": 1.422007869018911, - "grad_norm": 0.45283523201942444, - "learning_rate": 1.0519947539873928e-05, - "loss": 0.0362, - "step": 56020 - }, - { - "epoch": 1.4221347886787663, - "grad_norm": 0.5901384949684143, - "learning_rate": 1.0519101408808226e-05, - "loss": 0.0539, - "step": 56025 - }, - { - "epoch": 1.4222617083386218, - "grad_norm": 0.44071197509765625, - "learning_rate": 1.0518255277742524e-05, - "loss": 0.0372, - "step": 56030 - }, - { - "epoch": 1.422388627998477, - "grad_norm": 0.5540007948875427, - "learning_rate": 1.0517409146676821e-05, - "loss": 0.048, - "step": 56035 - }, - { - "epoch": 1.4225155476583322, - "grad_norm": 0.5218774676322937, - "learning_rate": 1.051656301561112e-05, - "loss": 0.0463, - "step": 56040 - }, - { - "epoch": 1.4226424673181877, - "grad_norm": 0.3972030282020569, - "learning_rate": 1.0515716884545418e-05, - "loss": 0.042, - "step": 56045 - }, - { - "epoch": 1.422769386978043, - "grad_norm": 0.5022811889648438, - "learning_rate": 1.0514870753479716e-05, - "loss": 0.0483, - "step": 56050 - }, - { - "epoch": 1.4228963066378983, - "grad_norm": 0.6518881916999817, - "learning_rate": 1.0514024622414011e-05, - "loss": 0.047, - "step": 56055 - }, - { - "epoch": 1.4230232262977536, - "grad_norm": 0.505180835723877, - "learning_rate": 1.051317849134831e-05, - "loss": 0.0563, - "step": 56060 - }, - { - "epoch": 1.4231501459576088, - "grad_norm": 0.5062366724014282, - "learning_rate": 1.0512332360282608e-05, - "loss": 0.0508, - "step": 56065 - }, - { - "epoch": 1.423277065617464, - "grad_norm": 0.29828789830207825, - "learning_rate": 1.0511486229216906e-05, - "loss": 0.0327, - "step": 56070 - }, - { - "epoch": 1.4234039852773195, - "grad_norm": 0.41667917370796204, - "learning_rate": 1.0510640098151203e-05, - "loss": 0.045, - "step": 56075 - }, - { - "epoch": 1.4235309049371747, - "grad_norm": 1.4039312601089478, - "learning_rate": 1.0509793967085502e-05, - "loss": 0.0557, - "step": 56080 - }, - { - "epoch": 1.4236578245970302, - "grad_norm": 0.2463080734014511, - "learning_rate": 1.05089478360198e-05, - "loss": 0.0389, - "step": 56085 - }, - { - "epoch": 1.4237847442568854, - "grad_norm": 0.6725683212280273, - "learning_rate": 1.0508101704954098e-05, - "loss": 0.0476, - "step": 56090 - }, - { - "epoch": 1.4239116639167406, - "grad_norm": 0.4788493514060974, - "learning_rate": 1.0507255573888395e-05, - "loss": 0.032, - "step": 56095 - }, - { - "epoch": 1.424038583576596, - "grad_norm": 0.4323277175426483, - "learning_rate": 1.0506409442822694e-05, - "loss": 0.0428, - "step": 56100 - }, - { - "epoch": 1.4241655032364513, - "grad_norm": 0.6241247653961182, - "learning_rate": 1.0505563311756992e-05, - "loss": 0.0309, - "step": 56105 - }, - { - "epoch": 1.4242924228963068, - "grad_norm": 0.45828646421432495, - "learning_rate": 1.050471718069129e-05, - "loss": 0.0462, - "step": 56110 - }, - { - "epoch": 1.424419342556162, - "grad_norm": 0.47613900899887085, - "learning_rate": 1.0503871049625587e-05, - "loss": 0.0469, - "step": 56115 - }, - { - "epoch": 1.4245462622160172, - "grad_norm": 0.48187056183815, - "learning_rate": 1.0503024918559885e-05, - "loss": 0.0477, - "step": 56120 - }, - { - "epoch": 1.4246731818758724, - "grad_norm": 0.32941439747810364, - "learning_rate": 1.0502178787494184e-05, - "loss": 0.0377, - "step": 56125 - }, - { - "epoch": 1.424800101535728, - "grad_norm": 0.50816810131073, - "learning_rate": 1.0501332656428482e-05, - "loss": 0.0462, - "step": 56130 - }, - { - "epoch": 1.4249270211955831, - "grad_norm": 0.278118371963501, - "learning_rate": 1.0500486525362779e-05, - "loss": 0.0349, - "step": 56135 - }, - { - "epoch": 1.4250539408554386, - "grad_norm": 0.25808730721473694, - "learning_rate": 1.0499640394297077e-05, - "loss": 0.0443, - "step": 56140 - }, - { - "epoch": 1.4251808605152938, - "grad_norm": 0.5962511897087097, - "learning_rate": 1.0498794263231376e-05, - "loss": 0.0497, - "step": 56145 - }, - { - "epoch": 1.425307780175149, - "grad_norm": 0.24137161672115326, - "learning_rate": 1.0497948132165674e-05, - "loss": 0.0392, - "step": 56150 - }, - { - "epoch": 1.4254346998350045, - "grad_norm": 0.3219912648200989, - "learning_rate": 1.0497102001099971e-05, - "loss": 0.05, - "step": 56155 - }, - { - "epoch": 1.4255616194948597, - "grad_norm": 0.2455541044473648, - "learning_rate": 1.049625587003427e-05, - "loss": 0.0336, - "step": 56160 - }, - { - "epoch": 1.4256885391547152, - "grad_norm": 0.5258591771125793, - "learning_rate": 1.0495409738968568e-05, - "loss": 0.0354, - "step": 56165 - }, - { - "epoch": 1.4258154588145704, - "grad_norm": 0.6123616099357605, - "learning_rate": 1.0494563607902866e-05, - "loss": 0.0385, - "step": 56170 - }, - { - "epoch": 1.4259423784744256, - "grad_norm": 0.5571764707565308, - "learning_rate": 1.0493717476837161e-05, - "loss": 0.0398, - "step": 56175 - }, - { - "epoch": 1.426069298134281, - "grad_norm": 0.5264490842819214, - "learning_rate": 1.0492871345771461e-05, - "loss": 0.0434, - "step": 56180 - }, - { - "epoch": 1.4261962177941363, - "grad_norm": 1.135246992111206, - "learning_rate": 1.049202521470576e-05, - "loss": 0.0394, - "step": 56185 - }, - { - "epoch": 1.4263231374539918, - "grad_norm": 0.36922016739845276, - "learning_rate": 1.0491179083640058e-05, - "loss": 0.0473, - "step": 56190 - }, - { - "epoch": 1.426450057113847, - "grad_norm": 0.5613768100738525, - "learning_rate": 1.0490332952574353e-05, - "loss": 0.0418, - "step": 56195 - }, - { - "epoch": 1.4265769767737022, - "grad_norm": 0.46509379148483276, - "learning_rate": 1.0489486821508651e-05, - "loss": 0.0371, - "step": 56200 - }, - { - "epoch": 1.4267038964335574, - "grad_norm": 0.6931379437446594, - "learning_rate": 1.048864069044295e-05, - "loss": 0.0283, - "step": 56205 - }, - { - "epoch": 1.426830816093413, - "grad_norm": 0.640570878982544, - "learning_rate": 1.0487794559377248e-05, - "loss": 0.0513, - "step": 56210 - }, - { - "epoch": 1.4269577357532681, - "grad_norm": 0.5549613237380981, - "learning_rate": 1.0486948428311545e-05, - "loss": 0.0434, - "step": 56215 - }, - { - "epoch": 1.4270846554131236, - "grad_norm": 0.7627966403961182, - "learning_rate": 1.0486102297245843e-05, - "loss": 0.046, - "step": 56220 - }, - { - "epoch": 1.4272115750729788, - "grad_norm": 0.3493845462799072, - "learning_rate": 1.0485256166180142e-05, - "loss": 0.046, - "step": 56225 - }, - { - "epoch": 1.427338494732834, - "grad_norm": 0.5369846224784851, - "learning_rate": 1.048441003511444e-05, - "loss": 0.0584, - "step": 56230 - }, - { - "epoch": 1.4274654143926895, - "grad_norm": 0.3687084913253784, - "learning_rate": 1.0483563904048737e-05, - "loss": 0.0574, - "step": 56235 - }, - { - "epoch": 1.4275923340525447, - "grad_norm": 0.27972766757011414, - "learning_rate": 1.0482717772983035e-05, - "loss": 0.0503, - "step": 56240 - }, - { - "epoch": 1.4277192537124002, - "grad_norm": 0.5022234916687012, - "learning_rate": 1.0481871641917334e-05, - "loss": 0.0544, - "step": 56245 - }, - { - "epoch": 1.4278461733722554, - "grad_norm": 0.2624525725841522, - "learning_rate": 1.0481025510851632e-05, - "loss": 0.0342, - "step": 56250 - }, - { - "epoch": 1.4279730930321106, - "grad_norm": 0.542948842048645, - "learning_rate": 1.048017937978593e-05, - "loss": 0.0326, - "step": 56255 - }, - { - "epoch": 1.4281000126919658, - "grad_norm": 0.48014357686042786, - "learning_rate": 1.0479333248720227e-05, - "loss": 0.0499, - "step": 56260 - }, - { - "epoch": 1.4282269323518213, - "grad_norm": 0.7082223892211914, - "learning_rate": 1.0478487117654526e-05, - "loss": 0.046, - "step": 56265 - }, - { - "epoch": 1.4283538520116765, - "grad_norm": 0.41832369565963745, - "learning_rate": 1.0477640986588824e-05, - "loss": 0.0499, - "step": 56270 - }, - { - "epoch": 1.428480771671532, - "grad_norm": 0.4084480404853821, - "learning_rate": 1.0476794855523122e-05, - "loss": 0.0614, - "step": 56275 - }, - { - "epoch": 1.4286076913313872, - "grad_norm": 0.6191757917404175, - "learning_rate": 1.0475948724457419e-05, - "loss": 0.0389, - "step": 56280 - }, - { - "epoch": 1.4287346109912424, - "grad_norm": 0.4276295304298401, - "learning_rate": 1.0475102593391717e-05, - "loss": 0.0468, - "step": 56285 - }, - { - "epoch": 1.4288615306510979, - "grad_norm": 0.5390713214874268, - "learning_rate": 1.0474256462326016e-05, - "loss": 0.0521, - "step": 56290 - }, - { - "epoch": 1.4289884503109531, - "grad_norm": 0.43254175782203674, - "learning_rate": 1.0473410331260314e-05, - "loss": 0.0653, - "step": 56295 - }, - { - "epoch": 1.4291153699708086, - "grad_norm": 0.3674091398715973, - "learning_rate": 1.0472564200194611e-05, - "loss": 0.0356, - "step": 56300 - }, - { - "epoch": 1.4292422896306638, - "grad_norm": 0.6071692705154419, - "learning_rate": 1.047171806912891e-05, - "loss": 0.0332, - "step": 56305 - }, - { - "epoch": 1.429369209290519, - "grad_norm": 0.6864773035049438, - "learning_rate": 1.0470871938063208e-05, - "loss": 0.0299, - "step": 56310 - }, - { - "epoch": 1.4294961289503745, - "grad_norm": 0.6048799753189087, - "learning_rate": 1.0470025806997506e-05, - "loss": 0.0558, - "step": 56315 - }, - { - "epoch": 1.4296230486102297, - "grad_norm": 0.3720712661743164, - "learning_rate": 1.0469179675931803e-05, - "loss": 0.0449, - "step": 56320 - }, - { - "epoch": 1.4297499682700852, - "grad_norm": 0.6224943995475769, - "learning_rate": 1.0468333544866101e-05, - "loss": 0.0455, - "step": 56325 - }, - { - "epoch": 1.4298768879299404, - "grad_norm": 0.5117452144622803, - "learning_rate": 1.04674874138004e-05, - "loss": 0.0414, - "step": 56330 - }, - { - "epoch": 1.4300038075897956, - "grad_norm": 0.39757490158081055, - "learning_rate": 1.0466641282734698e-05, - "loss": 0.043, - "step": 56335 - }, - { - "epoch": 1.4301307272496508, - "grad_norm": 0.7961227893829346, - "learning_rate": 1.0465795151668993e-05, - "loss": 0.0534, - "step": 56340 - }, - { - "epoch": 1.4302576469095063, - "grad_norm": 0.49353229999542236, - "learning_rate": 1.0464949020603291e-05, - "loss": 0.0452, - "step": 56345 - }, - { - "epoch": 1.4303845665693615, - "grad_norm": 0.2890416085720062, - "learning_rate": 1.046410288953759e-05, - "loss": 0.0258, - "step": 56350 - }, - { - "epoch": 1.430511486229217, - "grad_norm": 0.2891247272491455, - "learning_rate": 1.046325675847189e-05, - "loss": 0.0418, - "step": 56355 - }, - { - "epoch": 1.4306384058890722, - "grad_norm": 0.37365803122520447, - "learning_rate": 1.0462410627406185e-05, - "loss": 0.0331, - "step": 56360 - }, - { - "epoch": 1.4307653255489274, - "grad_norm": 0.3194270431995392, - "learning_rate": 1.0461564496340483e-05, - "loss": 0.0421, - "step": 56365 - }, - { - "epoch": 1.4308922452087829, - "grad_norm": 0.5701406002044678, - "learning_rate": 1.0460718365274782e-05, - "loss": 0.0525, - "step": 56370 - }, - { - "epoch": 1.4310191648686381, - "grad_norm": 0.5451099872589111, - "learning_rate": 1.045987223420908e-05, - "loss": 0.0446, - "step": 56375 - }, - { - "epoch": 1.4311460845284936, - "grad_norm": 0.3452075719833374, - "learning_rate": 1.0459026103143377e-05, - "loss": 0.0356, - "step": 56380 - }, - { - "epoch": 1.4312730041883488, - "grad_norm": 0.6912210583686829, - "learning_rate": 1.0458179972077675e-05, - "loss": 0.0544, - "step": 56385 - }, - { - "epoch": 1.431399923848204, - "grad_norm": 0.5242384672164917, - "learning_rate": 1.0457333841011974e-05, - "loss": 0.0441, - "step": 56390 - }, - { - "epoch": 1.4315268435080595, - "grad_norm": 0.4747695028781891, - "learning_rate": 1.0456487709946272e-05, - "loss": 0.052, - "step": 56395 - }, - { - "epoch": 1.4316537631679147, - "grad_norm": 0.3000935912132263, - "learning_rate": 1.0455641578880569e-05, - "loss": 0.0361, - "step": 56400 - }, - { - "epoch": 1.4317806828277702, - "grad_norm": 0.43237507343292236, - "learning_rate": 1.0454795447814867e-05, - "loss": 0.037, - "step": 56405 - }, - { - "epoch": 1.4319076024876254, - "grad_norm": 0.32925939559936523, - "learning_rate": 1.0453949316749166e-05, - "loss": 0.0302, - "step": 56410 - }, - { - "epoch": 1.4320345221474806, - "grad_norm": 0.5479841828346252, - "learning_rate": 1.0453103185683464e-05, - "loss": 0.0573, - "step": 56415 - }, - { - "epoch": 1.4321614418073358, - "grad_norm": 0.4501815736293793, - "learning_rate": 1.045225705461776e-05, - "loss": 0.0404, - "step": 56420 - }, - { - "epoch": 1.4322883614671913, - "grad_norm": 0.4759184420108795, - "learning_rate": 1.0451410923552059e-05, - "loss": 0.0276, - "step": 56425 - }, - { - "epoch": 1.4324152811270465, - "grad_norm": 0.6201631426811218, - "learning_rate": 1.0450564792486358e-05, - "loss": 0.0431, - "step": 56430 - }, - { - "epoch": 1.432542200786902, - "grad_norm": 0.7175920009613037, - "learning_rate": 1.0449718661420656e-05, - "loss": 0.0394, - "step": 56435 - }, - { - "epoch": 1.4326691204467572, - "grad_norm": 0.37973541021347046, - "learning_rate": 1.0448872530354953e-05, - "loss": 0.0394, - "step": 56440 - }, - { - "epoch": 1.4327960401066124, - "grad_norm": 0.3155314326286316, - "learning_rate": 1.0448026399289251e-05, - "loss": 0.0434, - "step": 56445 - }, - { - "epoch": 1.4329229597664679, - "grad_norm": 0.2940407991409302, - "learning_rate": 1.044718026822355e-05, - "loss": 0.0392, - "step": 56450 - }, - { - "epoch": 1.4330498794263231, - "grad_norm": 0.4739591181278229, - "learning_rate": 1.0446334137157848e-05, - "loss": 0.0371, - "step": 56455 - }, - { - "epoch": 1.4331767990861786, - "grad_norm": 0.4033241868019104, - "learning_rate": 1.0445488006092145e-05, - "loss": 0.0647, - "step": 56460 - }, - { - "epoch": 1.4333037187460338, - "grad_norm": 0.4157336354255676, - "learning_rate": 1.0444641875026443e-05, - "loss": 0.0369, - "step": 56465 - }, - { - "epoch": 1.433430638405889, - "grad_norm": 0.9031152129173279, - "learning_rate": 1.0443795743960741e-05, - "loss": 0.0757, - "step": 56470 - }, - { - "epoch": 1.4335575580657443, - "grad_norm": 0.472131609916687, - "learning_rate": 1.044294961289504e-05, - "loss": 0.0393, - "step": 56475 - }, - { - "epoch": 1.4336844777255997, - "grad_norm": 0.5151037573814392, - "learning_rate": 1.0442103481829335e-05, - "loss": 0.0632, - "step": 56480 - }, - { - "epoch": 1.433811397385455, - "grad_norm": 0.5767431855201721, - "learning_rate": 1.0441257350763633e-05, - "loss": 0.0379, - "step": 56485 - }, - { - "epoch": 1.4339383170453104, - "grad_norm": 0.5226774215698242, - "learning_rate": 1.0440411219697932e-05, - "loss": 0.0609, - "step": 56490 - }, - { - "epoch": 1.4340652367051656, - "grad_norm": 0.39026981592178345, - "learning_rate": 1.043956508863223e-05, - "loss": 0.0319, - "step": 56495 - }, - { - "epoch": 1.4341921563650208, - "grad_norm": 0.4213190972805023, - "learning_rate": 1.0438718957566527e-05, - "loss": 0.0376, - "step": 56500 - }, - { - "epoch": 1.4343190760248763, - "grad_norm": 0.4671189785003662, - "learning_rate": 1.0437872826500825e-05, - "loss": 0.0296, - "step": 56505 - }, - { - "epoch": 1.4344459956847315, - "grad_norm": 0.5778351426124573, - "learning_rate": 1.0437026695435124e-05, - "loss": 0.0436, - "step": 56510 - }, - { - "epoch": 1.434572915344587, - "grad_norm": 0.464216947555542, - "learning_rate": 1.0436180564369422e-05, - "loss": 0.068, - "step": 56515 - }, - { - "epoch": 1.4346998350044422, - "grad_norm": 0.4239196181297302, - "learning_rate": 1.0435334433303719e-05, - "loss": 0.0363, - "step": 56520 - }, - { - "epoch": 1.4348267546642974, - "grad_norm": 0.6177068948745728, - "learning_rate": 1.0434488302238017e-05, - "loss": 0.0588, - "step": 56525 - }, - { - "epoch": 1.4349536743241529, - "grad_norm": 0.3698032796382904, - "learning_rate": 1.0433642171172315e-05, - "loss": 0.0331, - "step": 56530 - }, - { - "epoch": 1.4350805939840081, - "grad_norm": 0.703195333480835, - "learning_rate": 1.0432796040106614e-05, - "loss": 0.046, - "step": 56535 - }, - { - "epoch": 1.4352075136438636, - "grad_norm": 1.052847146987915, - "learning_rate": 1.043194990904091e-05, - "loss": 0.0491, - "step": 56540 - }, - { - "epoch": 1.4353344333037188, - "grad_norm": 0.3767940104007721, - "learning_rate": 1.0431103777975209e-05, - "loss": 0.0614, - "step": 56545 - }, - { - "epoch": 1.435461352963574, - "grad_norm": 0.3578842878341675, - "learning_rate": 1.0430257646909507e-05, - "loss": 0.0349, - "step": 56550 - }, - { - "epoch": 1.4355882726234293, - "grad_norm": 0.5305662155151367, - "learning_rate": 1.0429411515843806e-05, - "loss": 0.0643, - "step": 56555 - }, - { - "epoch": 1.4357151922832847, - "grad_norm": 0.4874299168586731, - "learning_rate": 1.0428565384778102e-05, - "loss": 0.0303, - "step": 56560 - }, - { - "epoch": 1.43584211194314, - "grad_norm": 0.5401081442832947, - "learning_rate": 1.0427719253712401e-05, - "loss": 0.0441, - "step": 56565 - }, - { - "epoch": 1.4359690316029954, - "grad_norm": 0.467827707529068, - "learning_rate": 1.04268731226467e-05, - "loss": 0.0473, - "step": 56570 - }, - { - "epoch": 1.4360959512628506, - "grad_norm": 0.3067062199115753, - "learning_rate": 1.0426026991580998e-05, - "loss": 0.046, - "step": 56575 - }, - { - "epoch": 1.4362228709227058, - "grad_norm": 0.4936257302761078, - "learning_rate": 1.0425180860515294e-05, - "loss": 0.0456, - "step": 56580 - }, - { - "epoch": 1.4363497905825613, - "grad_norm": 1.3016178607940674, - "learning_rate": 1.0424334729449593e-05, - "loss": 0.0384, - "step": 56585 - }, - { - "epoch": 1.4364767102424165, - "grad_norm": 0.533986508846283, - "learning_rate": 1.0423488598383891e-05, - "loss": 0.0557, - "step": 56590 - }, - { - "epoch": 1.436603629902272, - "grad_norm": 0.3964393138885498, - "learning_rate": 1.042264246731819e-05, - "loss": 0.0407, - "step": 56595 - }, - { - "epoch": 1.4367305495621272, - "grad_norm": 0.4379774332046509, - "learning_rate": 1.0421796336252486e-05, - "loss": 0.0363, - "step": 56600 - }, - { - "epoch": 1.4368574692219824, - "grad_norm": 0.5176167488098145, - "learning_rate": 1.0420950205186785e-05, - "loss": 0.0321, - "step": 56605 - }, - { - "epoch": 1.4369843888818377, - "grad_norm": 0.6916541457176208, - "learning_rate": 1.0420104074121083e-05, - "loss": 0.0376, - "step": 56610 - }, - { - "epoch": 1.437111308541693, - "grad_norm": 0.37009748816490173, - "learning_rate": 1.0419257943055381e-05, - "loss": 0.0532, - "step": 56615 - }, - { - "epoch": 1.4372382282015483, - "grad_norm": 0.5459516048431396, - "learning_rate": 1.0418411811989676e-05, - "loss": 0.0446, - "step": 56620 - }, - { - "epoch": 1.4373651478614038, - "grad_norm": 0.3956669569015503, - "learning_rate": 1.0417565680923975e-05, - "loss": 0.0465, - "step": 56625 - }, - { - "epoch": 1.437492067521259, - "grad_norm": 0.5231447219848633, - "learning_rate": 1.0416719549858273e-05, - "loss": 0.0448, - "step": 56630 - }, - { - "epoch": 1.4376189871811142, - "grad_norm": 0.5098571181297302, - "learning_rate": 1.0415873418792572e-05, - "loss": 0.0558, - "step": 56635 - }, - { - "epoch": 1.4377459068409697, - "grad_norm": 0.5637958645820618, - "learning_rate": 1.0415027287726868e-05, - "loss": 0.041, - "step": 56640 - }, - { - "epoch": 1.437872826500825, - "grad_norm": 0.5552061200141907, - "learning_rate": 1.0414181156661167e-05, - "loss": 0.0552, - "step": 56645 - }, - { - "epoch": 1.4379997461606804, - "grad_norm": 0.5176917314529419, - "learning_rate": 1.0413335025595465e-05, - "loss": 0.0484, - "step": 56650 - }, - { - "epoch": 1.4381266658205356, - "grad_norm": 0.4102959632873535, - "learning_rate": 1.0412488894529764e-05, - "loss": 0.0845, - "step": 56655 - }, - { - "epoch": 1.4382535854803908, - "grad_norm": 0.4668254554271698, - "learning_rate": 1.041164276346406e-05, - "loss": 0.0483, - "step": 56660 - }, - { - "epoch": 1.4383805051402463, - "grad_norm": 0.4242311418056488, - "learning_rate": 1.0410796632398359e-05, - "loss": 0.0613, - "step": 56665 - }, - { - "epoch": 1.4385074248001015, - "grad_norm": 0.3885183036327362, - "learning_rate": 1.0409950501332657e-05, - "loss": 0.035, - "step": 56670 - }, - { - "epoch": 1.438634344459957, - "grad_norm": 0.5463365912437439, - "learning_rate": 1.0409104370266956e-05, - "loss": 0.0443, - "step": 56675 - }, - { - "epoch": 1.4387612641198122, - "grad_norm": 0.8686562180519104, - "learning_rate": 1.0408258239201252e-05, - "loss": 0.0519, - "step": 56680 - }, - { - "epoch": 1.4388881837796674, - "grad_norm": 0.3174028694629669, - "learning_rate": 1.040741210813555e-05, - "loss": 0.0404, - "step": 56685 - }, - { - "epoch": 1.4390151034395227, - "grad_norm": 0.3950863778591156, - "learning_rate": 1.0406565977069849e-05, - "loss": 0.0322, - "step": 56690 - }, - { - "epoch": 1.439142023099378, - "grad_norm": 0.7863554358482361, - "learning_rate": 1.0405719846004147e-05, - "loss": 0.0409, - "step": 56695 - }, - { - "epoch": 1.4392689427592333, - "grad_norm": 0.47867321968078613, - "learning_rate": 1.0404873714938444e-05, - "loss": 0.0385, - "step": 56700 - }, - { - "epoch": 1.4393958624190888, - "grad_norm": 0.667880117893219, - "learning_rate": 1.0404027583872743e-05, - "loss": 0.0519, - "step": 56705 - }, - { - "epoch": 1.439522782078944, - "grad_norm": 0.3568713366985321, - "learning_rate": 1.0403181452807041e-05, - "loss": 0.0233, - "step": 56710 - }, - { - "epoch": 1.4396497017387992, - "grad_norm": 0.6260867118835449, - "learning_rate": 1.040233532174134e-05, - "loss": 0.056, - "step": 56715 - }, - { - "epoch": 1.4397766213986547, - "grad_norm": 0.5823176503181458, - "learning_rate": 1.0401489190675636e-05, - "loss": 0.0547, - "step": 56720 - }, - { - "epoch": 1.43990354105851, - "grad_norm": 0.3226340413093567, - "learning_rate": 1.0400643059609934e-05, - "loss": 0.0306, - "step": 56725 - }, - { - "epoch": 1.4400304607183654, - "grad_norm": 0.5385991930961609, - "learning_rate": 1.0399796928544233e-05, - "loss": 0.0381, - "step": 56730 - }, - { - "epoch": 1.4401573803782206, - "grad_norm": 0.478528767824173, - "learning_rate": 1.0398950797478531e-05, - "loss": 0.0436, - "step": 56735 - }, - { - "epoch": 1.4402843000380758, - "grad_norm": 0.6391499638557434, - "learning_rate": 1.0398104666412826e-05, - "loss": 0.0382, - "step": 56740 - }, - { - "epoch": 1.4404112196979313, - "grad_norm": 0.5149902701377869, - "learning_rate": 1.0397258535347126e-05, - "loss": 0.0594, - "step": 56745 - }, - { - "epoch": 1.4405381393577865, - "grad_norm": 0.508085310459137, - "learning_rate": 1.0396412404281425e-05, - "loss": 0.0431, - "step": 56750 - }, - { - "epoch": 1.440665059017642, - "grad_norm": 0.45087966322898865, - "learning_rate": 1.0395566273215723e-05, - "loss": 0.0426, - "step": 56755 - }, - { - "epoch": 1.4407919786774972, - "grad_norm": 0.5161864161491394, - "learning_rate": 1.0394720142150018e-05, - "loss": 0.0449, - "step": 56760 - }, - { - "epoch": 1.4409188983373524, - "grad_norm": 0.35292619466781616, - "learning_rate": 1.0393874011084317e-05, - "loss": 0.0371, - "step": 56765 - }, - { - "epoch": 1.4410458179972077, - "grad_norm": 0.455156147480011, - "learning_rate": 1.0393027880018615e-05, - "loss": 0.0261, - "step": 56770 - }, - { - "epoch": 1.441172737657063, - "grad_norm": 0.8181995749473572, - "learning_rate": 1.0392181748952913e-05, - "loss": 0.0538, - "step": 56775 - }, - { - "epoch": 1.4412996573169183, - "grad_norm": 0.4558003544807434, - "learning_rate": 1.0391335617887214e-05, - "loss": 0.0383, - "step": 56780 - }, - { - "epoch": 1.4414265769767738, - "grad_norm": 0.6299349665641785, - "learning_rate": 1.0390489486821509e-05, - "loss": 0.0395, - "step": 56785 - }, - { - "epoch": 1.441553496636629, - "grad_norm": 0.4081857204437256, - "learning_rate": 1.0389643355755807e-05, - "loss": 0.0511, - "step": 56790 - }, - { - "epoch": 1.4416804162964842, - "grad_norm": 1.145560383796692, - "learning_rate": 1.0388797224690105e-05, - "loss": 0.0577, - "step": 56795 - }, - { - "epoch": 1.4418073359563397, - "grad_norm": 0.45289182662963867, - "learning_rate": 1.0387951093624404e-05, - "loss": 0.0633, - "step": 56800 - }, - { - "epoch": 1.441934255616195, - "grad_norm": 0.6870124936103821, - "learning_rate": 1.03871049625587e-05, - "loss": 0.0301, - "step": 56805 - }, - { - "epoch": 1.4420611752760504, - "grad_norm": 0.5851796269416809, - "learning_rate": 1.0386258831492999e-05, - "loss": 0.0445, - "step": 56810 - }, - { - "epoch": 1.4421880949359056, - "grad_norm": 0.46035054326057434, - "learning_rate": 1.0385412700427297e-05, - "loss": 0.0427, - "step": 56815 - }, - { - "epoch": 1.4423150145957608, - "grad_norm": 0.44107723236083984, - "learning_rate": 1.0384566569361596e-05, - "loss": 0.0338, - "step": 56820 - }, - { - "epoch": 1.442441934255616, - "grad_norm": 0.4551447331905365, - "learning_rate": 1.0383720438295892e-05, - "loss": 0.0425, - "step": 56825 - }, - { - "epoch": 1.4425688539154715, - "grad_norm": 0.46262869238853455, - "learning_rate": 1.038287430723019e-05, - "loss": 0.0543, - "step": 56830 - }, - { - "epoch": 1.4426957735753267, - "grad_norm": 0.7882266640663147, - "learning_rate": 1.0382028176164489e-05, - "loss": 0.0419, - "step": 56835 - }, - { - "epoch": 1.4428226932351822, - "grad_norm": 0.44978567957878113, - "learning_rate": 1.0381182045098788e-05, - "loss": 0.0582, - "step": 56840 - }, - { - "epoch": 1.4429496128950374, - "grad_norm": 0.380971223115921, - "learning_rate": 1.0380335914033084e-05, - "loss": 0.0428, - "step": 56845 - }, - { - "epoch": 1.4430765325548927, - "grad_norm": 0.6167284846305847, - "learning_rate": 1.0379489782967383e-05, - "loss": 0.0602, - "step": 56850 - }, - { - "epoch": 1.443203452214748, - "grad_norm": 0.3831866979598999, - "learning_rate": 1.0378643651901681e-05, - "loss": 0.0388, - "step": 56855 - }, - { - "epoch": 1.4433303718746033, - "grad_norm": 0.8935263156890869, - "learning_rate": 1.037779752083598e-05, - "loss": 0.0299, - "step": 56860 - }, - { - "epoch": 1.4434572915344588, - "grad_norm": 0.4437727928161621, - "learning_rate": 1.0376951389770276e-05, - "loss": 0.044, - "step": 56865 - }, - { - "epoch": 1.443584211194314, - "grad_norm": 0.627811849117279, - "learning_rate": 1.0376105258704575e-05, - "loss": 0.0427, - "step": 56870 - }, - { - "epoch": 1.4437111308541692, - "grad_norm": 0.3445684313774109, - "learning_rate": 1.0375259127638873e-05, - "loss": 0.0396, - "step": 56875 - }, - { - "epoch": 1.4438380505140247, - "grad_norm": 0.6496514081954956, - "learning_rate": 1.0374412996573171e-05, - "loss": 0.0523, - "step": 56880 - }, - { - "epoch": 1.44396497017388, - "grad_norm": 0.704859733581543, - "learning_rate": 1.0373566865507468e-05, - "loss": 0.0646, - "step": 56885 - }, - { - "epoch": 1.4440918898337354, - "grad_norm": 0.6473986506462097, - "learning_rate": 1.0372720734441766e-05, - "loss": 0.0589, - "step": 56890 - }, - { - "epoch": 1.4442188094935906, - "grad_norm": 0.3220409154891968, - "learning_rate": 1.0371874603376065e-05, - "loss": 0.0584, - "step": 56895 - }, - { - "epoch": 1.4443457291534458, - "grad_norm": 0.43365463614463806, - "learning_rate": 1.0371028472310363e-05, - "loss": 0.054, - "step": 56900 - }, - { - "epoch": 1.444472648813301, - "grad_norm": 0.25587987899780273, - "learning_rate": 1.0370182341244658e-05, - "loss": 0.0337, - "step": 56905 - }, - { - "epoch": 1.4445995684731565, - "grad_norm": 0.4876289963722229, - "learning_rate": 1.0369336210178957e-05, - "loss": 0.0452, - "step": 56910 - }, - { - "epoch": 1.4447264881330117, - "grad_norm": 0.29082006216049194, - "learning_rate": 1.0368490079113255e-05, - "loss": 0.0357, - "step": 56915 - }, - { - "epoch": 1.4448534077928672, - "grad_norm": 0.3872690200805664, - "learning_rate": 1.0367643948047555e-05, - "loss": 0.0549, - "step": 56920 - }, - { - "epoch": 1.4449803274527224, - "grad_norm": 0.5197175741195679, - "learning_rate": 1.036679781698185e-05, - "loss": 0.0282, - "step": 56925 - }, - { - "epoch": 1.4451072471125777, - "grad_norm": 0.45169326663017273, - "learning_rate": 1.0365951685916149e-05, - "loss": 0.0514, - "step": 56930 - }, - { - "epoch": 1.445234166772433, - "grad_norm": 0.5550673007965088, - "learning_rate": 1.0365105554850447e-05, - "loss": 0.0432, - "step": 56935 - }, - { - "epoch": 1.4453610864322883, - "grad_norm": 0.7522106170654297, - "learning_rate": 1.0364259423784745e-05, - "loss": 0.0393, - "step": 56940 - }, - { - "epoch": 1.4454880060921438, - "grad_norm": 0.544536292552948, - "learning_rate": 1.0363413292719042e-05, - "loss": 0.0357, - "step": 56945 - }, - { - "epoch": 1.445614925751999, - "grad_norm": 0.4298868179321289, - "learning_rate": 1.036256716165334e-05, - "loss": 0.0384, - "step": 56950 - }, - { - "epoch": 1.4457418454118542, - "grad_norm": 1.117456316947937, - "learning_rate": 1.0361721030587639e-05, - "loss": 0.0388, - "step": 56955 - }, - { - "epoch": 1.4458687650717095, - "grad_norm": 0.48018544912338257, - "learning_rate": 1.0360874899521937e-05, - "loss": 0.0412, - "step": 56960 - }, - { - "epoch": 1.445995684731565, - "grad_norm": 0.4062504768371582, - "learning_rate": 1.0360028768456234e-05, - "loss": 0.0665, - "step": 56965 - }, - { - "epoch": 1.4461226043914202, - "grad_norm": 0.520821750164032, - "learning_rate": 1.0359182637390532e-05, - "loss": 0.0468, - "step": 56970 - }, - { - "epoch": 1.4462495240512756, - "grad_norm": 0.3135218024253845, - "learning_rate": 1.0358336506324831e-05, - "loss": 0.0409, - "step": 56975 - }, - { - "epoch": 1.4463764437111308, - "grad_norm": 0.42561131715774536, - "learning_rate": 1.035749037525913e-05, - "loss": 0.0335, - "step": 56980 - }, - { - "epoch": 1.446503363370986, - "grad_norm": 0.3131294846534729, - "learning_rate": 1.0356644244193426e-05, - "loss": 0.0386, - "step": 56985 - }, - { - "epoch": 1.4466302830308415, - "grad_norm": 1.4235014915466309, - "learning_rate": 1.0355798113127724e-05, - "loss": 0.0286, - "step": 56990 - }, - { - "epoch": 1.4467572026906967, - "grad_norm": 0.8350403308868408, - "learning_rate": 1.0354951982062023e-05, - "loss": 0.0442, - "step": 56995 - }, - { - "epoch": 1.4468841223505522, - "grad_norm": 0.5636041164398193, - "learning_rate": 1.0354105850996321e-05, - "loss": 0.0437, - "step": 57000 - }, - { - "epoch": 1.4470110420104074, - "grad_norm": 0.6736129522323608, - "learning_rate": 1.0353259719930618e-05, - "loss": 0.0453, - "step": 57005 - }, - { - "epoch": 1.4471379616702627, - "grad_norm": 0.4962770640850067, - "learning_rate": 1.0352413588864916e-05, - "loss": 0.0425, - "step": 57010 - }, - { - "epoch": 1.447264881330118, - "grad_norm": 0.3230833113193512, - "learning_rate": 1.0351567457799215e-05, - "loss": 0.0425, - "step": 57015 - }, - { - "epoch": 1.4473918009899733, - "grad_norm": 0.33488771319389343, - "learning_rate": 1.0350721326733513e-05, - "loss": 0.0434, - "step": 57020 - }, - { - "epoch": 1.4475187206498288, - "grad_norm": 0.3443496823310852, - "learning_rate": 1.034987519566781e-05, - "loss": 0.028, - "step": 57025 - }, - { - "epoch": 1.447645640309684, - "grad_norm": 0.22076307237148285, - "learning_rate": 1.0349029064602108e-05, - "loss": 0.0247, - "step": 57030 - }, - { - "epoch": 1.4477725599695392, - "grad_norm": 0.661797821521759, - "learning_rate": 1.0348182933536407e-05, - "loss": 0.0448, - "step": 57035 - }, - { - "epoch": 1.4478994796293945, - "grad_norm": 0.5108788013458252, - "learning_rate": 1.0347336802470705e-05, - "loss": 0.0313, - "step": 57040 - }, - { - "epoch": 1.44802639928925, - "grad_norm": 0.4051973521709442, - "learning_rate": 1.0346490671405e-05, - "loss": 0.0276, - "step": 57045 - }, - { - "epoch": 1.4481533189491052, - "grad_norm": 0.522165834903717, - "learning_rate": 1.0345644540339298e-05, - "loss": 0.0377, - "step": 57050 - }, - { - "epoch": 1.4482802386089606, - "grad_norm": 0.2483712136745453, - "learning_rate": 1.0344798409273597e-05, - "loss": 0.0318, - "step": 57055 - }, - { - "epoch": 1.4484071582688158, - "grad_norm": 0.610000729560852, - "learning_rate": 1.0343952278207895e-05, - "loss": 0.0483, - "step": 57060 - }, - { - "epoch": 1.448534077928671, - "grad_norm": 0.38960063457489014, - "learning_rate": 1.0343106147142192e-05, - "loss": 0.0477, - "step": 57065 - }, - { - "epoch": 1.4486609975885265, - "grad_norm": 0.45118919014930725, - "learning_rate": 1.034226001607649e-05, - "loss": 0.0459, - "step": 57070 - }, - { - "epoch": 1.4487879172483817, - "grad_norm": 0.3822205364704132, - "learning_rate": 1.0341413885010789e-05, - "loss": 0.0326, - "step": 57075 - }, - { - "epoch": 1.4489148369082372, - "grad_norm": 0.8314823508262634, - "learning_rate": 1.0340567753945087e-05, - "loss": 0.0631, - "step": 57080 - }, - { - "epoch": 1.4490417565680924, - "grad_norm": 0.6305332779884338, - "learning_rate": 1.0339721622879384e-05, - "loss": 0.0478, - "step": 57085 - }, - { - "epoch": 1.4491686762279476, - "grad_norm": 0.5884213447570801, - "learning_rate": 1.0338875491813682e-05, - "loss": 0.0618, - "step": 57090 - }, - { - "epoch": 1.449295595887803, - "grad_norm": 0.4123144745826721, - "learning_rate": 1.033802936074798e-05, - "loss": 0.0436, - "step": 57095 - }, - { - "epoch": 1.4494225155476583, - "grad_norm": 0.5194267630577087, - "learning_rate": 1.0337183229682279e-05, - "loss": 0.0629, - "step": 57100 - }, - { - "epoch": 1.4495494352075138, - "grad_norm": 0.6130552291870117, - "learning_rate": 1.0336337098616576e-05, - "loss": 0.0451, - "step": 57105 - }, - { - "epoch": 1.449676354867369, - "grad_norm": 1.0259630680084229, - "learning_rate": 1.0335490967550874e-05, - "loss": 0.0563, - "step": 57110 - }, - { - "epoch": 1.4498032745272242, - "grad_norm": 0.427579402923584, - "learning_rate": 1.0334644836485173e-05, - "loss": 0.0356, - "step": 57115 - }, - { - "epoch": 1.4499301941870795, - "grad_norm": 0.45111820101737976, - "learning_rate": 1.0333798705419471e-05, - "loss": 0.0366, - "step": 57120 - }, - { - "epoch": 1.450057113846935, - "grad_norm": 0.4044736325740814, - "learning_rate": 1.0332952574353768e-05, - "loss": 0.0488, - "step": 57125 - }, - { - "epoch": 1.4501840335067901, - "grad_norm": 0.5672404766082764, - "learning_rate": 1.0332106443288066e-05, - "loss": 0.0422, - "step": 57130 - }, - { - "epoch": 1.4503109531666456, - "grad_norm": 0.6287928819656372, - "learning_rate": 1.0331260312222364e-05, - "loss": 0.0404, - "step": 57135 - }, - { - "epoch": 1.4504378728265008, - "grad_norm": 0.31877440214157104, - "learning_rate": 1.0330414181156663e-05, - "loss": 0.0431, - "step": 57140 - }, - { - "epoch": 1.450564792486356, - "grad_norm": 0.5866785645484924, - "learning_rate": 1.032956805009096e-05, - "loss": 0.0356, - "step": 57145 - }, - { - "epoch": 1.4506917121462115, - "grad_norm": 0.68967604637146, - "learning_rate": 1.0328721919025258e-05, - "loss": 0.0646, - "step": 57150 - }, - { - "epoch": 1.4508186318060667, - "grad_norm": 0.4169256091117859, - "learning_rate": 1.0327875787959556e-05, - "loss": 0.0304, - "step": 57155 - }, - { - "epoch": 1.4509455514659222, - "grad_norm": 0.5200921893119812, - "learning_rate": 1.0327029656893855e-05, - "loss": 0.0588, - "step": 57160 - }, - { - "epoch": 1.4510724711257774, - "grad_norm": 0.38351869583129883, - "learning_rate": 1.032618352582815e-05, - "loss": 0.046, - "step": 57165 - }, - { - "epoch": 1.4511993907856326, - "grad_norm": 0.3236944079399109, - "learning_rate": 1.032533739476245e-05, - "loss": 0.0231, - "step": 57170 - }, - { - "epoch": 1.4513263104454879, - "grad_norm": 1.2690759897232056, - "learning_rate": 1.0324491263696748e-05, - "loss": 0.0746, - "step": 57175 - }, - { - "epoch": 1.4514532301053433, - "grad_norm": 0.5594444274902344, - "learning_rate": 1.0323645132631047e-05, - "loss": 0.0342, - "step": 57180 - }, - { - "epoch": 1.4515801497651986, - "grad_norm": 0.3246179223060608, - "learning_rate": 1.0322799001565342e-05, - "loss": 0.0448, - "step": 57185 - }, - { - "epoch": 1.451707069425054, - "grad_norm": 0.6027479767799377, - "learning_rate": 1.032195287049964e-05, - "loss": 0.0318, - "step": 57190 - }, - { - "epoch": 1.4518339890849092, - "grad_norm": 0.3169104754924774, - "learning_rate": 1.0321106739433939e-05, - "loss": 0.0367, - "step": 57195 - }, - { - "epoch": 1.4519609087447645, - "grad_norm": 0.6685197949409485, - "learning_rate": 1.0320260608368237e-05, - "loss": 0.0472, - "step": 57200 - }, - { - "epoch": 1.45208782840462, - "grad_norm": 0.5153339505195618, - "learning_rate": 1.0319414477302534e-05, - "loss": 0.0362, - "step": 57205 - }, - { - "epoch": 1.4522147480644751, - "grad_norm": 1.099181056022644, - "learning_rate": 1.0318568346236832e-05, - "loss": 0.0574, - "step": 57210 - }, - { - "epoch": 1.4523416677243306, - "grad_norm": 0.37276867032051086, - "learning_rate": 1.031772221517113e-05, - "loss": 0.0489, - "step": 57215 - }, - { - "epoch": 1.4524685873841858, - "grad_norm": 0.6114072203636169, - "learning_rate": 1.0316876084105429e-05, - "loss": 0.0712, - "step": 57220 - }, - { - "epoch": 1.452595507044041, - "grad_norm": 0.33970552682876587, - "learning_rate": 1.0316029953039726e-05, - "loss": 0.0444, - "step": 57225 - }, - { - "epoch": 1.4527224267038965, - "grad_norm": 0.5126498341560364, - "learning_rate": 1.0315183821974024e-05, - "loss": 0.0423, - "step": 57230 - }, - { - "epoch": 1.4528493463637517, - "grad_norm": 0.6387706398963928, - "learning_rate": 1.0314337690908322e-05, - "loss": 0.0326, - "step": 57235 - }, - { - "epoch": 1.4529762660236072, - "grad_norm": 0.3660387396812439, - "learning_rate": 1.031349155984262e-05, - "loss": 0.0356, - "step": 57240 - }, - { - "epoch": 1.4531031856834624, - "grad_norm": 0.5318552851676941, - "learning_rate": 1.0312645428776917e-05, - "loss": 0.0398, - "step": 57245 - }, - { - "epoch": 1.4532301053433176, - "grad_norm": 0.6341652870178223, - "learning_rate": 1.0311799297711216e-05, - "loss": 0.0436, - "step": 57250 - }, - { - "epoch": 1.4533570250031729, - "grad_norm": 0.4610164761543274, - "learning_rate": 1.0310953166645514e-05, - "loss": 0.0387, - "step": 57255 - }, - { - "epoch": 1.4534839446630283, - "grad_norm": 0.3284331262111664, - "learning_rate": 1.0310107035579813e-05, - "loss": 0.0302, - "step": 57260 - }, - { - "epoch": 1.4536108643228836, - "grad_norm": 0.5031713247299194, - "learning_rate": 1.030926090451411e-05, - "loss": 0.046, - "step": 57265 - }, - { - "epoch": 1.453737783982739, - "grad_norm": 0.5418296456336975, - "learning_rate": 1.0308414773448408e-05, - "loss": 0.0418, - "step": 57270 - }, - { - "epoch": 1.4538647036425942, - "grad_norm": 0.5297380685806274, - "learning_rate": 1.0307568642382706e-05, - "loss": 0.04, - "step": 57275 - }, - { - "epoch": 1.4539916233024495, - "grad_norm": 0.4345568120479584, - "learning_rate": 1.0306722511317005e-05, - "loss": 0.0404, - "step": 57280 - }, - { - "epoch": 1.454118542962305, - "grad_norm": 0.2731430232524872, - "learning_rate": 1.0305876380251301e-05, - "loss": 0.0422, - "step": 57285 - }, - { - "epoch": 1.4542454626221601, - "grad_norm": 0.3361470401287079, - "learning_rate": 1.03050302491856e-05, - "loss": 0.0339, - "step": 57290 - }, - { - "epoch": 1.4543723822820156, - "grad_norm": 0.4498646557331085, - "learning_rate": 1.0304184118119898e-05, - "loss": 0.0284, - "step": 57295 - }, - { - "epoch": 1.4544993019418708, - "grad_norm": 0.7768688201904297, - "learning_rate": 1.0303337987054196e-05, - "loss": 0.0347, - "step": 57300 - }, - { - "epoch": 1.454626221601726, - "grad_norm": 0.5056100487709045, - "learning_rate": 1.0302491855988495e-05, - "loss": 0.0371, - "step": 57305 - }, - { - "epoch": 1.4547531412615813, - "grad_norm": 1.0766395330429077, - "learning_rate": 1.0301645724922792e-05, - "loss": 0.0533, - "step": 57310 - }, - { - "epoch": 1.4548800609214367, - "grad_norm": 0.9903053641319275, - "learning_rate": 1.030079959385709e-05, - "loss": 0.0747, - "step": 57315 - }, - { - "epoch": 1.455006980581292, - "grad_norm": 0.39538806676864624, - "learning_rate": 1.0299953462791388e-05, - "loss": 0.0289, - "step": 57320 - }, - { - "epoch": 1.4551339002411474, - "grad_norm": 0.8291842341423035, - "learning_rate": 1.0299107331725687e-05, - "loss": 0.0554, - "step": 57325 - }, - { - "epoch": 1.4552608199010026, - "grad_norm": 0.42039552330970764, - "learning_rate": 1.0298261200659982e-05, - "loss": 0.0373, - "step": 57330 - }, - { - "epoch": 1.4553877395608579, - "grad_norm": 0.2848394811153412, - "learning_rate": 1.029741506959428e-05, - "loss": 0.0478, - "step": 57335 - }, - { - "epoch": 1.4555146592207133, - "grad_norm": 0.5117058753967285, - "learning_rate": 1.0296568938528579e-05, - "loss": 0.0457, - "step": 57340 - }, - { - "epoch": 1.4556415788805686, - "grad_norm": 0.4320240914821625, - "learning_rate": 1.0295722807462879e-05, - "loss": 0.045, - "step": 57345 - }, - { - "epoch": 1.455768498540424, - "grad_norm": 0.42955076694488525, - "learning_rate": 1.0294876676397174e-05, - "loss": 0.0364, - "step": 57350 - }, - { - "epoch": 1.4558954182002792, - "grad_norm": 0.39868640899658203, - "learning_rate": 1.0294030545331472e-05, - "loss": 0.0449, - "step": 57355 - }, - { - "epoch": 1.4560223378601345, - "grad_norm": 0.36587485671043396, - "learning_rate": 1.029318441426577e-05, - "loss": 0.0375, - "step": 57360 - }, - { - "epoch": 1.45614925751999, - "grad_norm": 1.365244746208191, - "learning_rate": 1.0292338283200069e-05, - "loss": 0.0513, - "step": 57365 - }, - { - "epoch": 1.4562761771798451, - "grad_norm": 2.1787970066070557, - "learning_rate": 1.0291492152134366e-05, - "loss": 0.0374, - "step": 57370 - }, - { - "epoch": 1.4564030968397006, - "grad_norm": 0.7315258383750916, - "learning_rate": 1.0290646021068664e-05, - "loss": 0.0382, - "step": 57375 - }, - { - "epoch": 1.4565300164995558, - "grad_norm": 0.5089686512947083, - "learning_rate": 1.0289799890002962e-05, - "loss": 0.0625, - "step": 57380 - }, - { - "epoch": 1.456656936159411, - "grad_norm": 0.3290874660015106, - "learning_rate": 1.0288953758937261e-05, - "loss": 0.0314, - "step": 57385 - }, - { - "epoch": 1.4567838558192663, - "grad_norm": 0.4308525621891022, - "learning_rate": 1.0288107627871558e-05, - "loss": 0.0315, - "step": 57390 - }, - { - "epoch": 1.4569107754791217, - "grad_norm": 0.43553951382637024, - "learning_rate": 1.0287261496805856e-05, - "loss": 0.0564, - "step": 57395 - }, - { - "epoch": 1.457037695138977, - "grad_norm": 0.6226843595504761, - "learning_rate": 1.0286415365740154e-05, - "loss": 0.0597, - "step": 57400 - }, - { - "epoch": 1.4571646147988324, - "grad_norm": 0.3355701267719269, - "learning_rate": 1.0285569234674453e-05, - "loss": 0.0409, - "step": 57405 - }, - { - "epoch": 1.4572915344586876, - "grad_norm": 0.5790958404541016, - "learning_rate": 1.028472310360875e-05, - "loss": 0.0754, - "step": 57410 - }, - { - "epoch": 1.4574184541185429, - "grad_norm": 0.3916302025318146, - "learning_rate": 1.0283876972543048e-05, - "loss": 0.0473, - "step": 57415 - }, - { - "epoch": 1.4575453737783983, - "grad_norm": 0.41153058409690857, - "learning_rate": 1.0283030841477346e-05, - "loss": 0.0411, - "step": 57420 - }, - { - "epoch": 1.4576722934382536, - "grad_norm": 0.4776480197906494, - "learning_rate": 1.0282184710411645e-05, - "loss": 0.0368, - "step": 57425 - }, - { - "epoch": 1.457799213098109, - "grad_norm": 0.41523855924606323, - "learning_rate": 1.0281338579345941e-05, - "loss": 0.0324, - "step": 57430 - }, - { - "epoch": 1.4579261327579642, - "grad_norm": 0.559894859790802, - "learning_rate": 1.028049244828024e-05, - "loss": 0.0364, - "step": 57435 - }, - { - "epoch": 1.4580530524178195, - "grad_norm": 0.4542734920978546, - "learning_rate": 1.0279646317214538e-05, - "loss": 0.0339, - "step": 57440 - }, - { - "epoch": 1.458179972077675, - "grad_norm": 0.4136545956134796, - "learning_rate": 1.0278800186148837e-05, - "loss": 0.0369, - "step": 57445 - }, - { - "epoch": 1.4583068917375301, - "grad_norm": 0.3381107449531555, - "learning_rate": 1.0277954055083133e-05, - "loss": 0.0446, - "step": 57450 - }, - { - "epoch": 1.4584338113973856, - "grad_norm": 0.42705821990966797, - "learning_rate": 1.0277107924017432e-05, - "loss": 0.05, - "step": 57455 - }, - { - "epoch": 1.4585607310572408, - "grad_norm": 1.0259970426559448, - "learning_rate": 1.027626179295173e-05, - "loss": 0.0514, - "step": 57460 - }, - { - "epoch": 1.458687650717096, - "grad_norm": 0.36902210116386414, - "learning_rate": 1.0275415661886029e-05, - "loss": 0.0331, - "step": 57465 - }, - { - "epoch": 1.4588145703769513, - "grad_norm": 1.0255235433578491, - "learning_rate": 1.0274569530820324e-05, - "loss": 0.0401, - "step": 57470 - }, - { - "epoch": 1.4589414900368067, - "grad_norm": 0.8408224582672119, - "learning_rate": 1.0273723399754622e-05, - "loss": 0.0498, - "step": 57475 - }, - { - "epoch": 1.459068409696662, - "grad_norm": 0.3827611804008484, - "learning_rate": 1.027287726868892e-05, - "loss": 0.0382, - "step": 57480 - }, - { - "epoch": 1.4591953293565174, - "grad_norm": 0.5358274579048157, - "learning_rate": 1.0272031137623219e-05, - "loss": 0.0551, - "step": 57485 - }, - { - "epoch": 1.4593222490163726, - "grad_norm": 0.5554309487342834, - "learning_rate": 1.0271185006557515e-05, - "loss": 0.0558, - "step": 57490 - }, - { - "epoch": 1.4594491686762279, - "grad_norm": 1.2155404090881348, - "learning_rate": 1.0270338875491814e-05, - "loss": 0.0354, - "step": 57495 - }, - { - "epoch": 1.4595760883360833, - "grad_norm": 0.5891308188438416, - "learning_rate": 1.0269492744426112e-05, - "loss": 0.0547, - "step": 57500 - }, - { - "epoch": 1.4597030079959386, - "grad_norm": 0.684951901435852, - "learning_rate": 1.026864661336041e-05, - "loss": 0.0482, - "step": 57505 - }, - { - "epoch": 1.459829927655794, - "grad_norm": 0.4775146245956421, - "learning_rate": 1.0267800482294707e-05, - "loss": 0.0578, - "step": 57510 - }, - { - "epoch": 1.4599568473156492, - "grad_norm": 0.48113852739334106, - "learning_rate": 1.0266954351229006e-05, - "loss": 0.0485, - "step": 57515 - }, - { - "epoch": 1.4600837669755045, - "grad_norm": 0.4378429353237152, - "learning_rate": 1.0266108220163304e-05, - "loss": 0.0681, - "step": 57520 - }, - { - "epoch": 1.4602106866353597, - "grad_norm": 0.38392573595046997, - "learning_rate": 1.0265262089097603e-05, - "loss": 0.0636, - "step": 57525 - }, - { - "epoch": 1.4603376062952151, - "grad_norm": 0.38056156039237976, - "learning_rate": 1.02644159580319e-05, - "loss": 0.0364, - "step": 57530 - }, - { - "epoch": 1.4604645259550704, - "grad_norm": 0.4733940660953522, - "learning_rate": 1.0263569826966198e-05, - "loss": 0.0433, - "step": 57535 - }, - { - "epoch": 1.4605914456149258, - "grad_norm": 0.3903731107711792, - "learning_rate": 1.0262723695900496e-05, - "loss": 0.0377, - "step": 57540 - }, - { - "epoch": 1.460718365274781, - "grad_norm": 0.5273787379264832, - "learning_rate": 1.0261877564834794e-05, - "loss": 0.038, - "step": 57545 - }, - { - "epoch": 1.4608452849346363, - "grad_norm": 0.4312836229801178, - "learning_rate": 1.0261031433769091e-05, - "loss": 0.0454, - "step": 57550 - }, - { - "epoch": 1.4609722045944917, - "grad_norm": 0.24978633224964142, - "learning_rate": 1.026018530270339e-05, - "loss": 0.0279, - "step": 57555 - }, - { - "epoch": 1.461099124254347, - "grad_norm": 0.3535216748714447, - "learning_rate": 1.0259339171637688e-05, - "loss": 0.0425, - "step": 57560 - }, - { - "epoch": 1.4612260439142024, - "grad_norm": 0.6044338345527649, - "learning_rate": 1.0258493040571986e-05, - "loss": 0.0345, - "step": 57565 - }, - { - "epoch": 1.4613529635740576, - "grad_norm": 0.521202802658081, - "learning_rate": 1.0257646909506283e-05, - "loss": 0.0349, - "step": 57570 - }, - { - "epoch": 1.4614798832339129, - "grad_norm": 0.5704819560050964, - "learning_rate": 1.0256800778440581e-05, - "loss": 0.0472, - "step": 57575 - }, - { - "epoch": 1.4616068028937683, - "grad_norm": 0.4575478732585907, - "learning_rate": 1.025595464737488e-05, - "loss": 0.042, - "step": 57580 - }, - { - "epoch": 1.4617337225536235, - "grad_norm": 0.6168156266212463, - "learning_rate": 1.0255108516309178e-05, - "loss": 0.0373, - "step": 57585 - }, - { - "epoch": 1.461860642213479, - "grad_norm": 0.8368439674377441, - "learning_rate": 1.0254262385243475e-05, - "loss": 0.0326, - "step": 57590 - }, - { - "epoch": 1.4619875618733342, - "grad_norm": 0.40890803933143616, - "learning_rate": 1.0253416254177773e-05, - "loss": 0.0505, - "step": 57595 - }, - { - "epoch": 1.4621144815331895, - "grad_norm": 0.8517943024635315, - "learning_rate": 1.0252570123112072e-05, - "loss": 0.0627, - "step": 57600 - }, - { - "epoch": 1.4622414011930447, - "grad_norm": 0.37841561436653137, - "learning_rate": 1.025172399204637e-05, - "loss": 0.042, - "step": 57605 - }, - { - "epoch": 1.4623683208529001, - "grad_norm": 0.26331594586372375, - "learning_rate": 1.0250877860980665e-05, - "loss": 0.0292, - "step": 57610 - }, - { - "epoch": 1.4624952405127554, - "grad_norm": 0.4700849950313568, - "learning_rate": 1.0250031729914964e-05, - "loss": 0.0473, - "step": 57615 - }, - { - "epoch": 1.4626221601726108, - "grad_norm": 0.43841803073883057, - "learning_rate": 1.0249185598849262e-05, - "loss": 0.029, - "step": 57620 - }, - { - "epoch": 1.462749079832466, - "grad_norm": 0.49703383445739746, - "learning_rate": 1.024833946778356e-05, - "loss": 0.0506, - "step": 57625 - }, - { - "epoch": 1.4628759994923213, - "grad_norm": 0.3576927185058594, - "learning_rate": 1.0247493336717857e-05, - "loss": 0.0426, - "step": 57630 - }, - { - "epoch": 1.4630029191521767, - "grad_norm": 0.3102799355983734, - "learning_rate": 1.0246647205652156e-05, - "loss": 0.0519, - "step": 57635 - }, - { - "epoch": 1.463129838812032, - "grad_norm": 0.6356692314147949, - "learning_rate": 1.0245801074586454e-05, - "loss": 0.0383, - "step": 57640 - }, - { - "epoch": 1.4632567584718874, - "grad_norm": 0.689870297908783, - "learning_rate": 1.0244954943520752e-05, - "loss": 0.0539, - "step": 57645 - }, - { - "epoch": 1.4633836781317426, - "grad_norm": 0.7616189122200012, - "learning_rate": 1.0244108812455049e-05, - "loss": 0.0491, - "step": 57650 - }, - { - "epoch": 1.4635105977915979, - "grad_norm": 0.4450399577617645, - "learning_rate": 1.0243262681389347e-05, - "loss": 0.0433, - "step": 57655 - }, - { - "epoch": 1.463637517451453, - "grad_norm": 0.3895208537578583, - "learning_rate": 1.0242416550323646e-05, - "loss": 0.0528, - "step": 57660 - }, - { - "epoch": 1.4637644371113085, - "grad_norm": 0.5437615513801575, - "learning_rate": 1.0241570419257944e-05, - "loss": 0.0334, - "step": 57665 - }, - { - "epoch": 1.4638913567711638, - "grad_norm": 0.39885449409484863, - "learning_rate": 1.0240724288192241e-05, - "loss": 0.0498, - "step": 57670 - }, - { - "epoch": 1.4640182764310192, - "grad_norm": 0.4108491837978363, - "learning_rate": 1.023987815712654e-05, - "loss": 0.0449, - "step": 57675 - }, - { - "epoch": 1.4641451960908745, - "grad_norm": 0.5442909598350525, - "learning_rate": 1.0239032026060838e-05, - "loss": 0.0511, - "step": 57680 - }, - { - "epoch": 1.4642721157507297, - "grad_norm": 0.552434504032135, - "learning_rate": 1.0238185894995136e-05, - "loss": 0.0717, - "step": 57685 - }, - { - "epoch": 1.4643990354105851, - "grad_norm": 0.3619535267353058, - "learning_rate": 1.0237339763929433e-05, - "loss": 0.0499, - "step": 57690 - }, - { - "epoch": 1.4645259550704404, - "grad_norm": 0.6972812414169312, - "learning_rate": 1.0236493632863731e-05, - "loss": 0.0425, - "step": 57695 - }, - { - "epoch": 1.4646528747302958, - "grad_norm": 0.9594479203224182, - "learning_rate": 1.023564750179803e-05, - "loss": 0.0514, - "step": 57700 - }, - { - "epoch": 1.464779794390151, - "grad_norm": 0.31601080298423767, - "learning_rate": 1.0234801370732328e-05, - "loss": 0.0431, - "step": 57705 - }, - { - "epoch": 1.4649067140500063, - "grad_norm": 0.47993144392967224, - "learning_rate": 1.0233955239666625e-05, - "loss": 0.0423, - "step": 57710 - }, - { - "epoch": 1.4650336337098617, - "grad_norm": 0.3237684667110443, - "learning_rate": 1.0233109108600923e-05, - "loss": 0.0289, - "step": 57715 - }, - { - "epoch": 1.465160553369717, - "grad_norm": 0.3614519536495209, - "learning_rate": 1.0232262977535222e-05, - "loss": 0.051, - "step": 57720 - }, - { - "epoch": 1.4652874730295724, - "grad_norm": 0.8414545655250549, - "learning_rate": 1.023141684646952e-05, - "loss": 0.0628, - "step": 57725 - }, - { - "epoch": 1.4654143926894276, - "grad_norm": 0.3505496680736542, - "learning_rate": 1.0230570715403815e-05, - "loss": 0.0603, - "step": 57730 - }, - { - "epoch": 1.4655413123492829, - "grad_norm": 0.41334521770477295, - "learning_rate": 1.0229724584338115e-05, - "loss": 0.0478, - "step": 57735 - }, - { - "epoch": 1.465668232009138, - "grad_norm": 0.528475284576416, - "learning_rate": 1.0228878453272414e-05, - "loss": 0.0342, - "step": 57740 - }, - { - "epoch": 1.4657951516689935, - "grad_norm": 0.369789183139801, - "learning_rate": 1.0228032322206712e-05, - "loss": 0.0278, - "step": 57745 - }, - { - "epoch": 1.4659220713288488, - "grad_norm": 0.33558008074760437, - "learning_rate": 1.0227186191141007e-05, - "loss": 0.0397, - "step": 57750 - }, - { - "epoch": 1.4660489909887042, - "grad_norm": 0.4689003527164459, - "learning_rate": 1.0226340060075305e-05, - "loss": 0.0378, - "step": 57755 - }, - { - "epoch": 1.4661759106485595, - "grad_norm": 0.5659369826316833, - "learning_rate": 1.0225493929009604e-05, - "loss": 0.0375, - "step": 57760 - }, - { - "epoch": 1.4663028303084147, - "grad_norm": 0.41193217039108276, - "learning_rate": 1.0224647797943902e-05, - "loss": 0.0413, - "step": 57765 - }, - { - "epoch": 1.4664297499682701, - "grad_norm": 0.6478328704833984, - "learning_rate": 1.0223801666878199e-05, - "loss": 0.0483, - "step": 57770 - }, - { - "epoch": 1.4665566696281254, - "grad_norm": 0.40455037355422974, - "learning_rate": 1.0222955535812497e-05, - "loss": 0.0488, - "step": 57775 - }, - { - "epoch": 1.4666835892879808, - "grad_norm": 0.4060910940170288, - "learning_rate": 1.0222109404746796e-05, - "loss": 0.0482, - "step": 57780 - }, - { - "epoch": 1.466810508947836, - "grad_norm": 0.4529455900192261, - "learning_rate": 1.0221263273681094e-05, - "loss": 0.0428, - "step": 57785 - }, - { - "epoch": 1.4669374286076913, - "grad_norm": 0.38442814350128174, - "learning_rate": 1.022041714261539e-05, - "loss": 0.023, - "step": 57790 - }, - { - "epoch": 1.4670643482675467, - "grad_norm": 0.6095982193946838, - "learning_rate": 1.021957101154969e-05, - "loss": 0.0666, - "step": 57795 - }, - { - "epoch": 1.467191267927402, - "grad_norm": 0.22877709567546844, - "learning_rate": 1.0218724880483988e-05, - "loss": 0.0312, - "step": 57800 - }, - { - "epoch": 1.4673181875872572, - "grad_norm": 0.4660886228084564, - "learning_rate": 1.0217878749418286e-05, - "loss": 0.042, - "step": 57805 - }, - { - "epoch": 1.4674451072471126, - "grad_norm": 0.47132885456085205, - "learning_rate": 1.0217032618352584e-05, - "loss": 0.0471, - "step": 57810 - }, - { - "epoch": 1.4675720269069679, - "grad_norm": 0.5380565524101257, - "learning_rate": 1.0216186487286881e-05, - "loss": 0.0337, - "step": 57815 - }, - { - "epoch": 1.467698946566823, - "grad_norm": 0.31263408064842224, - "learning_rate": 1.021534035622118e-05, - "loss": 0.0406, - "step": 57820 - }, - { - "epoch": 1.4678258662266785, - "grad_norm": 0.5643606185913086, - "learning_rate": 1.0214494225155478e-05, - "loss": 0.0398, - "step": 57825 - }, - { - "epoch": 1.4679527858865338, - "grad_norm": 0.42535218596458435, - "learning_rate": 1.0213648094089776e-05, - "loss": 0.04, - "step": 57830 - }, - { - "epoch": 1.4680797055463892, - "grad_norm": 0.4540955722332001, - "learning_rate": 1.0212801963024073e-05, - "loss": 0.0447, - "step": 57835 - }, - { - "epoch": 1.4682066252062445, - "grad_norm": 0.6584701538085938, - "learning_rate": 1.0211955831958371e-05, - "loss": 0.0417, - "step": 57840 - }, - { - "epoch": 1.4683335448660997, - "grad_norm": 0.44968870282173157, - "learning_rate": 1.021110970089267e-05, - "loss": 0.0429, - "step": 57845 - }, - { - "epoch": 1.4684604645259551, - "grad_norm": 0.45650896430015564, - "learning_rate": 1.0210263569826968e-05, - "loss": 0.0492, - "step": 57850 - }, - { - "epoch": 1.4685873841858104, - "grad_norm": 0.5612104535102844, - "learning_rate": 1.0209417438761265e-05, - "loss": 0.0534, - "step": 57855 - }, - { - "epoch": 1.4687143038456658, - "grad_norm": 0.3990301191806793, - "learning_rate": 1.0208571307695563e-05, - "loss": 0.0513, - "step": 57860 - }, - { - "epoch": 1.468841223505521, - "grad_norm": 0.4732712507247925, - "learning_rate": 1.0207725176629862e-05, - "loss": 0.0509, - "step": 57865 - }, - { - "epoch": 1.4689681431653763, - "grad_norm": 0.806731104850769, - "learning_rate": 1.020687904556416e-05, - "loss": 0.0387, - "step": 57870 - }, - { - "epoch": 1.4690950628252315, - "grad_norm": 0.4137793481349945, - "learning_rate": 1.0206032914498457e-05, - "loss": 0.0367, - "step": 57875 - }, - { - "epoch": 1.469221982485087, - "grad_norm": 0.3255736231803894, - "learning_rate": 1.0205186783432755e-05, - "loss": 0.0384, - "step": 57880 - }, - { - "epoch": 1.4693489021449422, - "grad_norm": 0.6859708428382874, - "learning_rate": 1.0204340652367054e-05, - "loss": 0.0333, - "step": 57885 - }, - { - "epoch": 1.4694758218047976, - "grad_norm": 0.30808764696121216, - "learning_rate": 1.0203494521301352e-05, - "loss": 0.0339, - "step": 57890 - }, - { - "epoch": 1.4696027414646529, - "grad_norm": 0.449878066778183, - "learning_rate": 1.0202648390235647e-05, - "loss": 0.0478, - "step": 57895 - }, - { - "epoch": 1.469729661124508, - "grad_norm": 0.5542464852333069, - "learning_rate": 1.0201802259169945e-05, - "loss": 0.0514, - "step": 57900 - }, - { - "epoch": 1.4698565807843635, - "grad_norm": 0.33679139614105225, - "learning_rate": 1.0200956128104244e-05, - "loss": 0.0543, - "step": 57905 - }, - { - "epoch": 1.4699835004442188, - "grad_norm": 0.4054517149925232, - "learning_rate": 1.0200109997038544e-05, - "loss": 0.0492, - "step": 57910 - }, - { - "epoch": 1.4701104201040742, - "grad_norm": 0.4465276300907135, - "learning_rate": 1.0199263865972839e-05, - "loss": 0.0355, - "step": 57915 - }, - { - "epoch": 1.4702373397639295, - "grad_norm": 0.5317280292510986, - "learning_rate": 1.0198417734907137e-05, - "loss": 0.0424, - "step": 57920 - }, - { - "epoch": 1.4703642594237847, - "grad_norm": 0.4948006570339203, - "learning_rate": 1.0197571603841436e-05, - "loss": 0.0422, - "step": 57925 - }, - { - "epoch": 1.4704911790836401, - "grad_norm": 0.3282269239425659, - "learning_rate": 1.0196725472775734e-05, - "loss": 0.0524, - "step": 57930 - }, - { - "epoch": 1.4706180987434954, - "grad_norm": 0.6224183440208435, - "learning_rate": 1.0195879341710031e-05, - "loss": 0.0578, - "step": 57935 - }, - { - "epoch": 1.4707450184033508, - "grad_norm": 0.25189757347106934, - "learning_rate": 1.019503321064433e-05, - "loss": 0.0227, - "step": 57940 - }, - { - "epoch": 1.470871938063206, - "grad_norm": 0.6274273991584778, - "learning_rate": 1.0194187079578628e-05, - "loss": 0.0449, - "step": 57945 - }, - { - "epoch": 1.4709988577230613, - "grad_norm": 0.649787962436676, - "learning_rate": 1.0193340948512926e-05, - "loss": 0.045, - "step": 57950 - }, - { - "epoch": 1.4711257773829165, - "grad_norm": 0.6544120907783508, - "learning_rate": 1.0192494817447223e-05, - "loss": 0.0554, - "step": 57955 - }, - { - "epoch": 1.471252697042772, - "grad_norm": 0.5746735334396362, - "learning_rate": 1.0191648686381521e-05, - "loss": 0.0269, - "step": 57960 - }, - { - "epoch": 1.4713796167026272, - "grad_norm": 0.467900812625885, - "learning_rate": 1.019080255531582e-05, - "loss": 0.0281, - "step": 57965 - }, - { - "epoch": 1.4715065363624826, - "grad_norm": 0.5951516032218933, - "learning_rate": 1.0189956424250118e-05, - "loss": 0.0441, - "step": 57970 - }, - { - "epoch": 1.4716334560223379, - "grad_norm": 0.7087815999984741, - "learning_rate": 1.0189110293184415e-05, - "loss": 0.0466, - "step": 57975 - }, - { - "epoch": 1.471760375682193, - "grad_norm": 0.49828705191612244, - "learning_rate": 1.0188264162118713e-05, - "loss": 0.0477, - "step": 57980 - }, - { - "epoch": 1.4718872953420485, - "grad_norm": 0.3485831320285797, - "learning_rate": 1.0187418031053011e-05, - "loss": 0.0475, - "step": 57985 - }, - { - "epoch": 1.4720142150019038, - "grad_norm": 0.3933449387550354, - "learning_rate": 1.018657189998731e-05, - "loss": 0.0322, - "step": 57990 - }, - { - "epoch": 1.4721411346617592, - "grad_norm": 0.4734862744808197, - "learning_rate": 1.0185725768921607e-05, - "loss": 0.0447, - "step": 57995 - }, - { - "epoch": 1.4722680543216144, - "grad_norm": 0.6771801710128784, - "learning_rate": 1.0184879637855905e-05, - "loss": 0.0605, - "step": 58000 - }, - { - "epoch": 1.4723949739814697, - "grad_norm": 0.7105848789215088, - "learning_rate": 1.0184033506790203e-05, - "loss": 0.0483, - "step": 58005 - }, - { - "epoch": 1.472521893641325, - "grad_norm": 0.4018862545490265, - "learning_rate": 1.0183187375724502e-05, - "loss": 0.0501, - "step": 58010 - }, - { - "epoch": 1.4726488133011804, - "grad_norm": 0.4532265365123749, - "learning_rate": 1.0182341244658799e-05, - "loss": 0.0464, - "step": 58015 - }, - { - "epoch": 1.4727757329610356, - "grad_norm": 0.488071084022522, - "learning_rate": 1.0181495113593097e-05, - "loss": 0.0355, - "step": 58020 - }, - { - "epoch": 1.472902652620891, - "grad_norm": 0.2934676706790924, - "learning_rate": 1.0180648982527395e-05, - "loss": 0.0438, - "step": 58025 - }, - { - "epoch": 1.4730295722807463, - "grad_norm": 0.7286655902862549, - "learning_rate": 1.0179802851461694e-05, - "loss": 0.0528, - "step": 58030 - }, - { - "epoch": 1.4731564919406015, - "grad_norm": 0.3163600564002991, - "learning_rate": 1.0178956720395989e-05, - "loss": 0.0365, - "step": 58035 - }, - { - "epoch": 1.473283411600457, - "grad_norm": 1.321088194847107, - "learning_rate": 1.0178110589330287e-05, - "loss": 0.0464, - "step": 58040 - }, - { - "epoch": 1.4734103312603122, - "grad_norm": 0.3577856421470642, - "learning_rate": 1.0177264458264586e-05, - "loss": 0.0351, - "step": 58045 - }, - { - "epoch": 1.4735372509201676, - "grad_norm": 0.36442580819129944, - "learning_rate": 1.0176418327198884e-05, - "loss": 0.0515, - "step": 58050 - }, - { - "epoch": 1.4736641705800229, - "grad_norm": 0.36599963903427124, - "learning_rate": 1.017557219613318e-05, - "loss": 0.0295, - "step": 58055 - }, - { - "epoch": 1.473791090239878, - "grad_norm": 0.6003066301345825, - "learning_rate": 1.0174726065067479e-05, - "loss": 0.0409, - "step": 58060 - }, - { - "epoch": 1.4739180098997335, - "grad_norm": 0.633961021900177, - "learning_rate": 1.0173879934001777e-05, - "loss": 0.0385, - "step": 58065 - }, - { - "epoch": 1.4740449295595888, - "grad_norm": 0.4841915965080261, - "learning_rate": 1.0173033802936076e-05, - "loss": 0.0481, - "step": 58070 - }, - { - "epoch": 1.4741718492194442, - "grad_norm": 0.40963414311408997, - "learning_rate": 1.0172187671870373e-05, - "loss": 0.0466, - "step": 58075 - }, - { - "epoch": 1.4742987688792994, - "grad_norm": 0.3247918486595154, - "learning_rate": 1.0171341540804671e-05, - "loss": 0.0364, - "step": 58080 - }, - { - "epoch": 1.4744256885391547, - "grad_norm": 0.4445253312587738, - "learning_rate": 1.017049540973897e-05, - "loss": 0.0365, - "step": 58085 - }, - { - "epoch": 1.47455260819901, - "grad_norm": 0.5852125287055969, - "learning_rate": 1.0169649278673268e-05, - "loss": 0.0586, - "step": 58090 - }, - { - "epoch": 1.4746795278588654, - "grad_norm": 0.6410995721817017, - "learning_rate": 1.0168803147607564e-05, - "loss": 0.0389, - "step": 58095 - }, - { - "epoch": 1.4748064475187206, - "grad_norm": 0.30884650349617004, - "learning_rate": 1.0167957016541863e-05, - "loss": 0.0369, - "step": 58100 - }, - { - "epoch": 1.474933367178576, - "grad_norm": 0.5729794502258301, - "learning_rate": 1.0167110885476161e-05, - "loss": 0.0588, - "step": 58105 - }, - { - "epoch": 1.4750602868384313, - "grad_norm": 0.30492112040519714, - "learning_rate": 1.016626475441046e-05, - "loss": 0.0426, - "step": 58110 - }, - { - "epoch": 1.4751872064982865, - "grad_norm": 0.4143698513507843, - "learning_rate": 1.0165418623344756e-05, - "loss": 0.0446, - "step": 58115 - }, - { - "epoch": 1.475314126158142, - "grad_norm": 0.555191695690155, - "learning_rate": 1.0164572492279055e-05, - "loss": 0.046, - "step": 58120 - }, - { - "epoch": 1.4754410458179972, - "grad_norm": 1.4075287580490112, - "learning_rate": 1.0163726361213353e-05, - "loss": 0.0513, - "step": 58125 - }, - { - "epoch": 1.4755679654778526, - "grad_norm": 0.5967289805412292, - "learning_rate": 1.0162880230147652e-05, - "loss": 0.0459, - "step": 58130 - }, - { - "epoch": 1.4756948851377079, - "grad_norm": 0.5058615207672119, - "learning_rate": 1.0162034099081948e-05, - "loss": 0.0405, - "step": 58135 - }, - { - "epoch": 1.475821804797563, - "grad_norm": 0.4655885100364685, - "learning_rate": 1.0161187968016247e-05, - "loss": 0.0287, - "step": 58140 - }, - { - "epoch": 1.4759487244574185, - "grad_norm": 0.36019188165664673, - "learning_rate": 1.0160341836950545e-05, - "loss": 0.0371, - "step": 58145 - }, - { - "epoch": 1.4760756441172738, - "grad_norm": 0.3739977777004242, - "learning_rate": 1.0159495705884844e-05, - "loss": 0.0457, - "step": 58150 - }, - { - "epoch": 1.476202563777129, - "grad_norm": 0.48365920782089233, - "learning_rate": 1.015864957481914e-05, - "loss": 0.0288, - "step": 58155 - }, - { - "epoch": 1.4763294834369844, - "grad_norm": 0.4192814826965332, - "learning_rate": 1.0157803443753439e-05, - "loss": 0.0405, - "step": 58160 - }, - { - "epoch": 1.4764564030968397, - "grad_norm": 0.46122080087661743, - "learning_rate": 1.0156957312687737e-05, - "loss": 0.0524, - "step": 58165 - }, - { - "epoch": 1.476583322756695, - "grad_norm": 1.2885829210281372, - "learning_rate": 1.0156111181622035e-05, - "loss": 0.054, - "step": 58170 - }, - { - "epoch": 1.4767102424165504, - "grad_norm": 0.38660764694213867, - "learning_rate": 1.015526505055633e-05, - "loss": 0.0598, - "step": 58175 - }, - { - "epoch": 1.4768371620764056, - "grad_norm": 0.21945780515670776, - "learning_rate": 1.0154418919490629e-05, - "loss": 0.0407, - "step": 58180 - }, - { - "epoch": 1.476964081736261, - "grad_norm": 0.7248370051383972, - "learning_rate": 1.0153572788424927e-05, - "loss": 0.0517, - "step": 58185 - }, - { - "epoch": 1.4770910013961163, - "grad_norm": 0.5833450555801392, - "learning_rate": 1.0152726657359226e-05, - "loss": 0.0506, - "step": 58190 - }, - { - "epoch": 1.4772179210559715, - "grad_norm": 0.7209710478782654, - "learning_rate": 1.0151880526293522e-05, - "loss": 0.0383, - "step": 58195 - }, - { - "epoch": 1.477344840715827, - "grad_norm": 1.4113601446151733, - "learning_rate": 1.015103439522782e-05, - "loss": 0.0394, - "step": 58200 - }, - { - "epoch": 1.4774717603756822, - "grad_norm": 0.4803179204463959, - "learning_rate": 1.015018826416212e-05, - "loss": 0.0603, - "step": 58205 - }, - { - "epoch": 1.4775986800355376, - "grad_norm": 0.26115819811820984, - "learning_rate": 1.0149342133096418e-05, - "loss": 0.0551, - "step": 58210 - }, - { - "epoch": 1.4777255996953929, - "grad_norm": 0.2956438958644867, - "learning_rate": 1.0148496002030714e-05, - "loss": 0.0418, - "step": 58215 - }, - { - "epoch": 1.477852519355248, - "grad_norm": 0.35887444019317627, - "learning_rate": 1.0147649870965013e-05, - "loss": 0.0396, - "step": 58220 - }, - { - "epoch": 1.4779794390151033, - "grad_norm": 0.5845176577568054, - "learning_rate": 1.0146803739899311e-05, - "loss": 0.0363, - "step": 58225 - }, - { - "epoch": 1.4781063586749588, - "grad_norm": 0.43701109290122986, - "learning_rate": 1.014595760883361e-05, - "loss": 0.0619, - "step": 58230 - }, - { - "epoch": 1.478233278334814, - "grad_norm": 0.5491514801979065, - "learning_rate": 1.0145111477767906e-05, - "loss": 0.0377, - "step": 58235 - }, - { - "epoch": 1.4783601979946694, - "grad_norm": 0.6068772673606873, - "learning_rate": 1.0144265346702205e-05, - "loss": 0.0407, - "step": 58240 - }, - { - "epoch": 1.4784871176545247, - "grad_norm": 0.38548505306243896, - "learning_rate": 1.0143419215636503e-05, - "loss": 0.039, - "step": 58245 - }, - { - "epoch": 1.47861403731438, - "grad_norm": 1.132225513458252, - "learning_rate": 1.0142573084570801e-05, - "loss": 0.0397, - "step": 58250 - }, - { - "epoch": 1.4787409569742354, - "grad_norm": 0.5688320994377136, - "learning_rate": 1.0141726953505098e-05, - "loss": 0.0635, - "step": 58255 - }, - { - "epoch": 1.4788678766340906, - "grad_norm": 0.3541191816329956, - "learning_rate": 1.0140880822439397e-05, - "loss": 0.0643, - "step": 58260 - }, - { - "epoch": 1.478994796293946, - "grad_norm": 1.2701818943023682, - "learning_rate": 1.0140034691373695e-05, - "loss": 0.0651, - "step": 58265 - }, - { - "epoch": 1.4791217159538013, - "grad_norm": 0.5288259983062744, - "learning_rate": 1.0139188560307993e-05, - "loss": 0.038, - "step": 58270 - }, - { - "epoch": 1.4792486356136565, - "grad_norm": 0.732527494430542, - "learning_rate": 1.013834242924229e-05, - "loss": 0.0486, - "step": 58275 - }, - { - "epoch": 1.479375555273512, - "grad_norm": 0.7819499373435974, - "learning_rate": 1.0137496298176588e-05, - "loss": 0.0455, - "step": 58280 - }, - { - "epoch": 1.4795024749333672, - "grad_norm": 0.5869538187980652, - "learning_rate": 1.0136650167110887e-05, - "loss": 0.0352, - "step": 58285 - }, - { - "epoch": 1.4796293945932226, - "grad_norm": 0.32243481278419495, - "learning_rate": 1.0135804036045185e-05, - "loss": 0.0411, - "step": 58290 - }, - { - "epoch": 1.4797563142530779, - "grad_norm": 0.40007802844047546, - "learning_rate": 1.013495790497948e-05, - "loss": 0.0425, - "step": 58295 - }, - { - "epoch": 1.479883233912933, - "grad_norm": 0.5839366912841797, - "learning_rate": 1.013411177391378e-05, - "loss": 0.0495, - "step": 58300 - }, - { - "epoch": 1.4800101535727883, - "grad_norm": 0.38507503271102905, - "learning_rate": 1.0133265642848079e-05, - "loss": 0.032, - "step": 58305 - }, - { - "epoch": 1.4801370732326438, - "grad_norm": 0.31527358293533325, - "learning_rate": 1.0132419511782377e-05, - "loss": 0.0389, - "step": 58310 - }, - { - "epoch": 1.480263992892499, - "grad_norm": 1.4076955318450928, - "learning_rate": 1.0131573380716672e-05, - "loss": 0.0416, - "step": 58315 - }, - { - "epoch": 1.4803909125523544, - "grad_norm": 0.8155735731124878, - "learning_rate": 1.013072724965097e-05, - "loss": 0.0391, - "step": 58320 - }, - { - "epoch": 1.4805178322122097, - "grad_norm": 0.276388555765152, - "learning_rate": 1.0129881118585269e-05, - "loss": 0.0401, - "step": 58325 - }, - { - "epoch": 1.480644751872065, - "grad_norm": 0.5592435002326965, - "learning_rate": 1.0129034987519567e-05, - "loss": 0.0479, - "step": 58330 - }, - { - "epoch": 1.4807716715319204, - "grad_norm": 0.40977635979652405, - "learning_rate": 1.0128188856453867e-05, - "loss": 0.0467, - "step": 58335 - }, - { - "epoch": 1.4808985911917756, - "grad_norm": 0.5536388158798218, - "learning_rate": 1.0127342725388162e-05, - "loss": 0.038, - "step": 58340 - }, - { - "epoch": 1.481025510851631, - "grad_norm": 0.3730122148990631, - "learning_rate": 1.0126496594322461e-05, - "loss": 0.0397, - "step": 58345 - }, - { - "epoch": 1.4811524305114863, - "grad_norm": 0.3574788570404053, - "learning_rate": 1.012565046325676e-05, - "loss": 0.0334, - "step": 58350 - }, - { - "epoch": 1.4812793501713415, - "grad_norm": 0.3450295925140381, - "learning_rate": 1.0124804332191058e-05, - "loss": 0.0403, - "step": 58355 - }, - { - "epoch": 1.4814062698311967, - "grad_norm": 0.3753056526184082, - "learning_rate": 1.0123958201125354e-05, - "loss": 0.0483, - "step": 58360 - }, - { - "epoch": 1.4815331894910522, - "grad_norm": 0.4597775340080261, - "learning_rate": 1.0123112070059653e-05, - "loss": 0.0391, - "step": 58365 - }, - { - "epoch": 1.4816601091509074, - "grad_norm": 0.3686605393886566, - "learning_rate": 1.0122265938993951e-05, - "loss": 0.0413, - "step": 58370 - }, - { - "epoch": 1.4817870288107629, - "grad_norm": 0.7702686786651611, - "learning_rate": 1.012141980792825e-05, - "loss": 0.0413, - "step": 58375 - }, - { - "epoch": 1.481913948470618, - "grad_norm": 0.35454681515693665, - "learning_rate": 1.0120573676862546e-05, - "loss": 0.0361, - "step": 58380 - }, - { - "epoch": 1.4820408681304733, - "grad_norm": 0.4464316666126251, - "learning_rate": 1.0119727545796845e-05, - "loss": 0.0388, - "step": 58385 - }, - { - "epoch": 1.4821677877903288, - "grad_norm": 0.6468459367752075, - "learning_rate": 1.0118881414731143e-05, - "loss": 0.0613, - "step": 58390 - }, - { - "epoch": 1.482294707450184, - "grad_norm": 0.6153712272644043, - "learning_rate": 1.0118035283665442e-05, - "loss": 0.0512, - "step": 58395 - }, - { - "epoch": 1.4824216271100394, - "grad_norm": 0.42431336641311646, - "learning_rate": 1.0117189152599738e-05, - "loss": 0.0556, - "step": 58400 - }, - { - "epoch": 1.4825485467698947, - "grad_norm": 0.5479484796524048, - "learning_rate": 1.0116343021534037e-05, - "loss": 0.0315, - "step": 58405 - }, - { - "epoch": 1.48267546642975, - "grad_norm": 0.7721520662307739, - "learning_rate": 1.0115496890468335e-05, - "loss": 0.0448, - "step": 58410 - }, - { - "epoch": 1.4828023860896054, - "grad_norm": 0.44385796785354614, - "learning_rate": 1.0114650759402633e-05, - "loss": 0.0369, - "step": 58415 - }, - { - "epoch": 1.4829293057494606, - "grad_norm": 0.2814343571662903, - "learning_rate": 1.011380462833693e-05, - "loss": 0.0428, - "step": 58420 - }, - { - "epoch": 1.483056225409316, - "grad_norm": 0.5907595753669739, - "learning_rate": 1.0112958497271229e-05, - "loss": 0.0621, - "step": 58425 - }, - { - "epoch": 1.4831831450691713, - "grad_norm": 0.6351984143257141, - "learning_rate": 1.0112112366205527e-05, - "loss": 0.0517, - "step": 58430 - }, - { - "epoch": 1.4833100647290265, - "grad_norm": 0.7622340321540833, - "learning_rate": 1.0111266235139825e-05, - "loss": 0.0539, - "step": 58435 - }, - { - "epoch": 1.4834369843888817, - "grad_norm": 0.3248145282268524, - "learning_rate": 1.0110420104074122e-05, - "loss": 0.0421, - "step": 58440 - }, - { - "epoch": 1.4835639040487372, - "grad_norm": 0.31135907769203186, - "learning_rate": 1.010957397300842e-05, - "loss": 0.0347, - "step": 58445 - }, - { - "epoch": 1.4836908237085924, - "grad_norm": 0.35918664932250977, - "learning_rate": 1.0108727841942719e-05, - "loss": 0.0415, - "step": 58450 - }, - { - "epoch": 1.4838177433684479, - "grad_norm": 0.4089791774749756, - "learning_rate": 1.0107881710877017e-05, - "loss": 0.0632, - "step": 58455 - }, - { - "epoch": 1.483944663028303, - "grad_norm": 0.36179250478744507, - "learning_rate": 1.0107035579811312e-05, - "loss": 0.0354, - "step": 58460 - }, - { - "epoch": 1.4840715826881583, - "grad_norm": 0.7174697518348694, - "learning_rate": 1.010618944874561e-05, - "loss": 0.0556, - "step": 58465 - }, - { - "epoch": 1.4841985023480138, - "grad_norm": 0.5924029350280762, - "learning_rate": 1.0105343317679909e-05, - "loss": 0.0464, - "step": 58470 - }, - { - "epoch": 1.484325422007869, - "grad_norm": 0.49490633606910706, - "learning_rate": 1.0104497186614207e-05, - "loss": 0.0508, - "step": 58475 - }, - { - "epoch": 1.4844523416677244, - "grad_norm": 0.6546943187713623, - "learning_rate": 1.0103651055548504e-05, - "loss": 0.0446, - "step": 58480 - }, - { - "epoch": 1.4845792613275797, - "grad_norm": 1.4407660961151123, - "learning_rate": 1.0102804924482803e-05, - "loss": 0.0469, - "step": 58485 - }, - { - "epoch": 1.484706180987435, - "grad_norm": 0.4444836974143982, - "learning_rate": 1.0101958793417101e-05, - "loss": 0.036, - "step": 58490 - }, - { - "epoch": 1.4848331006472901, - "grad_norm": 0.6220675110816956, - "learning_rate": 1.01011126623514e-05, - "loss": 0.0504, - "step": 58495 - }, - { - "epoch": 1.4849600203071456, - "grad_norm": 0.3276729881763458, - "learning_rate": 1.0100266531285696e-05, - "loss": 0.0315, - "step": 58500 - }, - { - "epoch": 1.4850869399670008, - "grad_norm": 0.3523581624031067, - "learning_rate": 1.0099420400219994e-05, - "loss": 0.0319, - "step": 58505 - }, - { - "epoch": 1.4852138596268563, - "grad_norm": 0.4233196973800659, - "learning_rate": 1.0098574269154293e-05, - "loss": 0.0349, - "step": 58510 - }, - { - "epoch": 1.4853407792867115, - "grad_norm": 0.49715927243232727, - "learning_rate": 1.0097728138088591e-05, - "loss": 0.0498, - "step": 58515 - }, - { - "epoch": 1.4854676989465667, - "grad_norm": 0.4570404887199402, - "learning_rate": 1.0096882007022888e-05, - "loss": 0.0435, - "step": 58520 - }, - { - "epoch": 1.4855946186064222, - "grad_norm": 0.5473050475120544, - "learning_rate": 1.0096035875957186e-05, - "loss": 0.0377, - "step": 58525 - }, - { - "epoch": 1.4857215382662774, - "grad_norm": 0.5913664698600769, - "learning_rate": 1.0095189744891485e-05, - "loss": 0.043, - "step": 58530 - }, - { - "epoch": 1.4858484579261328, - "grad_norm": 0.2955058515071869, - "learning_rate": 1.0094343613825783e-05, - "loss": 0.0613, - "step": 58535 - }, - { - "epoch": 1.485975377585988, - "grad_norm": 0.5848818421363831, - "learning_rate": 1.009349748276008e-05, - "loss": 0.0558, - "step": 58540 - }, - { - "epoch": 1.4861022972458433, - "grad_norm": 0.4515407085418701, - "learning_rate": 1.0092651351694378e-05, - "loss": 0.0343, - "step": 58545 - }, - { - "epoch": 1.4862292169056988, - "grad_norm": 0.46728187799453735, - "learning_rate": 1.0091805220628677e-05, - "loss": 0.0544, - "step": 58550 - }, - { - "epoch": 1.486356136565554, - "grad_norm": 0.4708852767944336, - "learning_rate": 1.0090959089562975e-05, - "loss": 0.0474, - "step": 58555 - }, - { - "epoch": 1.4864830562254094, - "grad_norm": 0.4891318678855896, - "learning_rate": 1.0090112958497272e-05, - "loss": 0.0407, - "step": 58560 - }, - { - "epoch": 1.4866099758852647, - "grad_norm": 0.342892050743103, - "learning_rate": 1.008926682743157e-05, - "loss": 0.0528, - "step": 58565 - }, - { - "epoch": 1.48673689554512, - "grad_norm": 0.45784756541252136, - "learning_rate": 1.0088420696365869e-05, - "loss": 0.0761, - "step": 58570 - }, - { - "epoch": 1.4868638152049751, - "grad_norm": 0.3661508858203888, - "learning_rate": 1.0087574565300167e-05, - "loss": 0.0549, - "step": 58575 - }, - { - "epoch": 1.4869907348648306, - "grad_norm": 0.27731919288635254, - "learning_rate": 1.0086728434234464e-05, - "loss": 0.0378, - "step": 58580 - }, - { - "epoch": 1.4871176545246858, - "grad_norm": 0.39587700366973877, - "learning_rate": 1.0085882303168762e-05, - "loss": 0.0358, - "step": 58585 - }, - { - "epoch": 1.4872445741845413, - "grad_norm": 0.37808576226234436, - "learning_rate": 1.008503617210306e-05, - "loss": 0.0406, - "step": 58590 - }, - { - "epoch": 1.4873714938443965, - "grad_norm": 0.6234633326530457, - "learning_rate": 1.0084190041037359e-05, - "loss": 0.057, - "step": 58595 - }, - { - "epoch": 1.4874984135042517, - "grad_norm": 0.2976928651332855, - "learning_rate": 1.0083343909971654e-05, - "loss": 0.0541, - "step": 58600 - }, - { - "epoch": 1.4876253331641072, - "grad_norm": 0.4799337387084961, - "learning_rate": 1.0082497778905952e-05, - "loss": 0.0449, - "step": 58605 - }, - { - "epoch": 1.4877522528239624, - "grad_norm": 0.5446792244911194, - "learning_rate": 1.008165164784025e-05, - "loss": 0.044, - "step": 58610 - }, - { - "epoch": 1.4878791724838178, - "grad_norm": 0.4116993546485901, - "learning_rate": 1.008080551677455e-05, - "loss": 0.0391, - "step": 58615 - }, - { - "epoch": 1.488006092143673, - "grad_norm": 0.37469568848609924, - "learning_rate": 1.0079959385708846e-05, - "loss": 0.0498, - "step": 58620 - }, - { - "epoch": 1.4881330118035283, - "grad_norm": 0.3724420964717865, - "learning_rate": 1.0079113254643144e-05, - "loss": 0.0479, - "step": 58625 - }, - { - "epoch": 1.4882599314633838, - "grad_norm": 0.8059240579605103, - "learning_rate": 1.0078267123577443e-05, - "loss": 0.0351, - "step": 58630 - }, - { - "epoch": 1.488386851123239, - "grad_norm": 0.42171210050582886, - "learning_rate": 1.0077420992511741e-05, - "loss": 0.0362, - "step": 58635 - }, - { - "epoch": 1.4885137707830944, - "grad_norm": 0.47728413343429565, - "learning_rate": 1.0076574861446038e-05, - "loss": 0.0439, - "step": 58640 - }, - { - "epoch": 1.4886406904429497, - "grad_norm": 0.518545925617218, - "learning_rate": 1.0075728730380336e-05, - "loss": 0.0459, - "step": 58645 - }, - { - "epoch": 1.488767610102805, - "grad_norm": 0.5440545678138733, - "learning_rate": 1.0074882599314635e-05, - "loss": 0.0512, - "step": 58650 - }, - { - "epoch": 1.4888945297626601, - "grad_norm": 0.28567081689834595, - "learning_rate": 1.0074036468248933e-05, - "loss": 0.0555, - "step": 58655 - }, - { - "epoch": 1.4890214494225156, - "grad_norm": 0.5578073263168335, - "learning_rate": 1.007319033718323e-05, - "loss": 0.0515, - "step": 58660 - }, - { - "epoch": 1.4891483690823708, - "grad_norm": 0.5281920433044434, - "learning_rate": 1.0072344206117528e-05, - "loss": 0.0342, - "step": 58665 - }, - { - "epoch": 1.4892752887422263, - "grad_norm": 0.2514770030975342, - "learning_rate": 1.0071498075051827e-05, - "loss": 0.0281, - "step": 58670 - }, - { - "epoch": 1.4894022084020815, - "grad_norm": 0.6073156595230103, - "learning_rate": 1.0070651943986125e-05, - "loss": 0.0633, - "step": 58675 - }, - { - "epoch": 1.4895291280619367, - "grad_norm": 0.5291013717651367, - "learning_rate": 1.0069805812920422e-05, - "loss": 0.0293, - "step": 58680 - }, - { - "epoch": 1.4896560477217922, - "grad_norm": 0.22973179817199707, - "learning_rate": 1.006895968185472e-05, - "loss": 0.019, - "step": 58685 - }, - { - "epoch": 1.4897829673816474, - "grad_norm": 0.30028191208839417, - "learning_rate": 1.0068113550789018e-05, - "loss": 0.0576, - "step": 58690 - }, - { - "epoch": 1.4899098870415028, - "grad_norm": 0.6933242678642273, - "learning_rate": 1.0067267419723317e-05, - "loss": 0.0342, - "step": 58695 - }, - { - "epoch": 1.490036806701358, - "grad_norm": 0.43655678629875183, - "learning_rate": 1.0066421288657614e-05, - "loss": 0.0467, - "step": 58700 - }, - { - "epoch": 1.4901637263612133, - "grad_norm": 0.939926266670227, - "learning_rate": 1.0065575157591912e-05, - "loss": 0.0371, - "step": 58705 - }, - { - "epoch": 1.4902906460210685, - "grad_norm": 0.5135831832885742, - "learning_rate": 1.006472902652621e-05, - "loss": 0.0372, - "step": 58710 - }, - { - "epoch": 1.490417565680924, - "grad_norm": 0.5471480488777161, - "learning_rate": 1.0063882895460509e-05, - "loss": 0.0485, - "step": 58715 - }, - { - "epoch": 1.4905444853407792, - "grad_norm": 0.3455117642879486, - "learning_rate": 1.0063036764394804e-05, - "loss": 0.0442, - "step": 58720 - }, - { - "epoch": 1.4906714050006347, - "grad_norm": 0.21077772974967957, - "learning_rate": 1.0062190633329104e-05, - "loss": 0.03, - "step": 58725 - }, - { - "epoch": 1.49079832466049, - "grad_norm": 0.4834494888782501, - "learning_rate": 1.0061344502263402e-05, - "loss": 0.0469, - "step": 58730 - }, - { - "epoch": 1.4909252443203451, - "grad_norm": 0.3821600675582886, - "learning_rate": 1.00604983711977e-05, - "loss": 0.0312, - "step": 58735 - }, - { - "epoch": 1.4910521639802006, - "grad_norm": 0.5982311964035034, - "learning_rate": 1.0059652240131996e-05, - "loss": 0.0663, - "step": 58740 - }, - { - "epoch": 1.4911790836400558, - "grad_norm": 0.24090464413166046, - "learning_rate": 1.0058806109066294e-05, - "loss": 0.0229, - "step": 58745 - }, - { - "epoch": 1.4913060032999113, - "grad_norm": 1.2743946313858032, - "learning_rate": 1.0057959978000592e-05, - "loss": 0.0282, - "step": 58750 - }, - { - "epoch": 1.4914329229597665, - "grad_norm": 0.5547011494636536, - "learning_rate": 1.0057113846934891e-05, - "loss": 0.0535, - "step": 58755 - }, - { - "epoch": 1.4915598426196217, - "grad_norm": 0.5819637775421143, - "learning_rate": 1.0056267715869188e-05, - "loss": 0.0468, - "step": 58760 - }, - { - "epoch": 1.4916867622794772, - "grad_norm": 0.3374323844909668, - "learning_rate": 1.0055421584803486e-05, - "loss": 0.0364, - "step": 58765 - }, - { - "epoch": 1.4918136819393324, - "grad_norm": 0.7301924824714661, - "learning_rate": 1.0054575453737784e-05, - "loss": 0.0572, - "step": 58770 - }, - { - "epoch": 1.4919406015991878, - "grad_norm": 0.5279085636138916, - "learning_rate": 1.0053729322672083e-05, - "loss": 0.0414, - "step": 58775 - }, - { - "epoch": 1.492067521259043, - "grad_norm": 0.2683861553668976, - "learning_rate": 1.005288319160638e-05, - "loss": 0.0247, - "step": 58780 - }, - { - "epoch": 1.4921944409188983, - "grad_norm": 0.4954611659049988, - "learning_rate": 1.0052037060540678e-05, - "loss": 0.0479, - "step": 58785 - }, - { - "epoch": 1.4923213605787535, - "grad_norm": 0.4286103844642639, - "learning_rate": 1.0051190929474976e-05, - "loss": 0.0418, - "step": 58790 - }, - { - "epoch": 1.492448280238609, - "grad_norm": 0.371116042137146, - "learning_rate": 1.0050344798409275e-05, - "loss": 0.0379, - "step": 58795 - }, - { - "epoch": 1.4925751998984642, - "grad_norm": 0.5354899168014526, - "learning_rate": 1.0049498667343571e-05, - "loss": 0.0475, - "step": 58800 - }, - { - "epoch": 1.4927021195583197, - "grad_norm": 0.44265803694725037, - "learning_rate": 1.004865253627787e-05, - "loss": 0.0555, - "step": 58805 - }, - { - "epoch": 1.492829039218175, - "grad_norm": 0.5516615509986877, - "learning_rate": 1.0047806405212168e-05, - "loss": 0.0347, - "step": 58810 - }, - { - "epoch": 1.4929559588780301, - "grad_norm": 1.1445759534835815, - "learning_rate": 1.0046960274146467e-05, - "loss": 0.0394, - "step": 58815 - }, - { - "epoch": 1.4930828785378856, - "grad_norm": 0.37119218707084656, - "learning_rate": 1.0046114143080763e-05, - "loss": 0.0373, - "step": 58820 - }, - { - "epoch": 1.4932097981977408, - "grad_norm": 0.49168631434440613, - "learning_rate": 1.0045268012015062e-05, - "loss": 0.0442, - "step": 58825 - }, - { - "epoch": 1.4933367178575963, - "grad_norm": 0.48002827167510986, - "learning_rate": 1.004442188094936e-05, - "loss": 0.0486, - "step": 58830 - }, - { - "epoch": 1.4934636375174515, - "grad_norm": 0.347168505191803, - "learning_rate": 1.0043575749883659e-05, - "loss": 0.0373, - "step": 58835 - }, - { - "epoch": 1.4935905571773067, - "grad_norm": 0.49530407786369324, - "learning_rate": 1.0042729618817957e-05, - "loss": 0.0491, - "step": 58840 - }, - { - "epoch": 1.493717476837162, - "grad_norm": 0.35706549882888794, - "learning_rate": 1.0041883487752254e-05, - "loss": 0.0328, - "step": 58845 - }, - { - "epoch": 1.4938443964970174, - "grad_norm": 0.5171608924865723, - "learning_rate": 1.0041037356686552e-05, - "loss": 0.0504, - "step": 58850 - }, - { - "epoch": 1.4939713161568726, - "grad_norm": 0.6221930980682373, - "learning_rate": 1.004019122562085e-05, - "loss": 0.0398, - "step": 58855 - }, - { - "epoch": 1.494098235816728, - "grad_norm": 0.6492722630500793, - "learning_rate": 1.0039345094555149e-05, - "loss": 0.0633, - "step": 58860 - }, - { - "epoch": 1.4942251554765833, - "grad_norm": 0.3474634885787964, - "learning_rate": 1.0038498963489446e-05, - "loss": 0.0453, - "step": 58865 - }, - { - "epoch": 1.4943520751364385, - "grad_norm": 0.360568106174469, - "learning_rate": 1.0037652832423744e-05, - "loss": 0.0319, - "step": 58870 - }, - { - "epoch": 1.494478994796294, - "grad_norm": 0.4512651264667511, - "learning_rate": 1.0036806701358042e-05, - "loss": 0.0481, - "step": 58875 - }, - { - "epoch": 1.4946059144561492, - "grad_norm": 1.2456635236740112, - "learning_rate": 1.003596057029234e-05, - "loss": 0.0362, - "step": 58880 - }, - { - "epoch": 1.4947328341160047, - "grad_norm": 0.47719287872314453, - "learning_rate": 1.0035114439226636e-05, - "loss": 0.0488, - "step": 58885 - }, - { - "epoch": 1.49485975377586, - "grad_norm": 0.7014736533164978, - "learning_rate": 1.0034268308160934e-05, - "loss": 0.071, - "step": 58890 - }, - { - "epoch": 1.4949866734357151, - "grad_norm": 0.5970779061317444, - "learning_rate": 1.0033422177095233e-05, - "loss": 0.0393, - "step": 58895 - }, - { - "epoch": 1.4951135930955706, - "grad_norm": 0.8462978601455688, - "learning_rate": 1.0032576046029533e-05, - "loss": 0.0638, - "step": 58900 - }, - { - "epoch": 1.4952405127554258, - "grad_norm": 0.5627976655960083, - "learning_rate": 1.0031729914963828e-05, - "loss": 0.0413, - "step": 58905 - }, - { - "epoch": 1.4953674324152813, - "grad_norm": 1.9020659923553467, - "learning_rate": 1.0030883783898126e-05, - "loss": 0.033, - "step": 58910 - }, - { - "epoch": 1.4954943520751365, - "grad_norm": 0.4748908579349518, - "learning_rate": 1.0030037652832424e-05, - "loss": 0.0626, - "step": 58915 - }, - { - "epoch": 1.4956212717349917, - "grad_norm": 0.2542126774787903, - "learning_rate": 1.0029191521766723e-05, - "loss": 0.0396, - "step": 58920 - }, - { - "epoch": 1.495748191394847, - "grad_norm": 0.44777894020080566, - "learning_rate": 1.002834539070102e-05, - "loss": 0.048, - "step": 58925 - }, - { - "epoch": 1.4958751110547024, - "grad_norm": 0.5654532313346863, - "learning_rate": 1.0027499259635318e-05, - "loss": 0.0477, - "step": 58930 - }, - { - "epoch": 1.4960020307145576, - "grad_norm": 0.3541356325149536, - "learning_rate": 1.0026653128569616e-05, - "loss": 0.0319, - "step": 58935 - }, - { - "epoch": 1.496128950374413, - "grad_norm": 0.5890568494796753, - "learning_rate": 1.0025806997503915e-05, - "loss": 0.0537, - "step": 58940 - }, - { - "epoch": 1.4962558700342683, - "grad_norm": 0.44646385312080383, - "learning_rate": 1.0024960866438212e-05, - "loss": 0.0339, - "step": 58945 - }, - { - "epoch": 1.4963827896941235, - "grad_norm": 0.40856826305389404, - "learning_rate": 1.002411473537251e-05, - "loss": 0.0423, - "step": 58950 - }, - { - "epoch": 1.496509709353979, - "grad_norm": 0.4848567545413971, - "learning_rate": 1.0023268604306808e-05, - "loss": 0.0556, - "step": 58955 - }, - { - "epoch": 1.4966366290138342, - "grad_norm": 0.5484955906867981, - "learning_rate": 1.0022422473241107e-05, - "loss": 0.0371, - "step": 58960 - }, - { - "epoch": 1.4967635486736897, - "grad_norm": 0.9441750049591064, - "learning_rate": 1.0021576342175403e-05, - "loss": 0.0554, - "step": 58965 - }, - { - "epoch": 1.4968904683335449, - "grad_norm": 0.3212212920188904, - "learning_rate": 1.0020730211109702e-05, - "loss": 0.033, - "step": 58970 - }, - { - "epoch": 1.4970173879934001, - "grad_norm": 0.5073497295379639, - "learning_rate": 1.0019884080044e-05, - "loss": 0.0469, - "step": 58975 - }, - { - "epoch": 1.4971443076532556, - "grad_norm": 0.4364004135131836, - "learning_rate": 1.0019037948978299e-05, - "loss": 0.0395, - "step": 58980 - }, - { - "epoch": 1.4972712273131108, - "grad_norm": 0.46128666400909424, - "learning_rate": 1.0018191817912595e-05, - "loss": 0.051, - "step": 58985 - }, - { - "epoch": 1.4973981469729662, - "grad_norm": 0.6358432173728943, - "learning_rate": 1.0017345686846894e-05, - "loss": 0.0436, - "step": 58990 - }, - { - "epoch": 1.4975250666328215, - "grad_norm": 0.6185564398765564, - "learning_rate": 1.0016499555781192e-05, - "loss": 0.0618, - "step": 58995 - }, - { - "epoch": 1.4976519862926767, - "grad_norm": 0.44276705384254456, - "learning_rate": 1.001565342471549e-05, - "loss": 0.0373, - "step": 59000 - }, - { - "epoch": 1.497778905952532, - "grad_norm": 1.036063551902771, - "learning_rate": 1.0014807293649787e-05, - "loss": 0.0343, - "step": 59005 - }, - { - "epoch": 1.4979058256123874, - "grad_norm": 0.592972993850708, - "learning_rate": 1.0013961162584086e-05, - "loss": 0.0352, - "step": 59010 - }, - { - "epoch": 1.4980327452722426, - "grad_norm": 0.377095103263855, - "learning_rate": 1.0013115031518384e-05, - "loss": 0.0481, - "step": 59015 - }, - { - "epoch": 1.498159664932098, - "grad_norm": 0.507404625415802, - "learning_rate": 1.0012268900452682e-05, - "loss": 0.0432, - "step": 59020 - }, - { - "epoch": 1.4982865845919533, - "grad_norm": 0.3944237530231476, - "learning_rate": 1.0011422769386977e-05, - "loss": 0.0362, - "step": 59025 - }, - { - "epoch": 1.4984135042518085, - "grad_norm": 0.5585571527481079, - "learning_rate": 1.0010576638321276e-05, - "loss": 0.0499, - "step": 59030 - }, - { - "epoch": 1.498540423911664, - "grad_norm": 0.5720795392990112, - "learning_rate": 1.0009730507255574e-05, - "loss": 0.062, - "step": 59035 - }, - { - "epoch": 1.4986673435715192, - "grad_norm": 0.37837645411491394, - "learning_rate": 1.0008884376189873e-05, - "loss": 0.0443, - "step": 59040 - }, - { - "epoch": 1.4987942632313747, - "grad_norm": 1.197766661643982, - "learning_rate": 1.000803824512417e-05, - "loss": 0.0335, - "step": 59045 - }, - { - "epoch": 1.4989211828912299, - "grad_norm": 0.5496237874031067, - "learning_rate": 1.0007192114058468e-05, - "loss": 0.0356, - "step": 59050 - }, - { - "epoch": 1.4990481025510851, - "grad_norm": 0.6773380637168884, - "learning_rate": 1.0006345982992766e-05, - "loss": 0.0397, - "step": 59055 - }, - { - "epoch": 1.4991750222109403, - "grad_norm": 0.5894771218299866, - "learning_rate": 1.0005499851927065e-05, - "loss": 0.0276, - "step": 59060 - }, - { - "epoch": 1.4993019418707958, - "grad_norm": 0.7130319476127625, - "learning_rate": 1.0004653720861361e-05, - "loss": 0.0303, - "step": 59065 - }, - { - "epoch": 1.499428861530651, - "grad_norm": 1.1196461915969849, - "learning_rate": 1.000380758979566e-05, - "loss": 0.0424, - "step": 59070 - }, - { - "epoch": 1.4995557811905065, - "grad_norm": 0.24362590909004211, - "learning_rate": 1.0002961458729958e-05, - "loss": 0.0417, - "step": 59075 - }, - { - "epoch": 1.4996827008503617, - "grad_norm": 0.568020761013031, - "learning_rate": 1.0002115327664257e-05, - "loss": 0.0377, - "step": 59080 - }, - { - "epoch": 1.499809620510217, - "grad_norm": 0.3940967321395874, - "learning_rate": 1.0001269196598553e-05, - "loss": 0.048, - "step": 59085 - }, - { - "epoch": 1.4999365401700724, - "grad_norm": 0.35607197880744934, - "learning_rate": 1.0000423065532852e-05, - "loss": 0.0363, - "step": 59090 - }, - { - "epoch": 1.5000634598299276, - "grad_norm": 0.4913197457790375, - "learning_rate": 9.99957693446715e-06, - "loss": 0.0453, - "step": 59095 - }, - { - "epoch": 1.500190379489783, - "grad_norm": 0.37652090191841125, - "learning_rate": 9.998730803401447e-06, - "loss": 0.0449, - "step": 59100 - }, - { - "epoch": 1.5003172991496383, - "grad_norm": 0.38787344098091125, - "learning_rate": 9.997884672335745e-06, - "loss": 0.0256, - "step": 59105 - }, - { - "epoch": 1.5004442188094935, - "grad_norm": 0.4474491775035858, - "learning_rate": 9.997038541270044e-06, - "loss": 0.0412, - "step": 59110 - }, - { - "epoch": 1.5005711384693488, - "grad_norm": 0.6502440571784973, - "learning_rate": 9.996192410204342e-06, - "loss": 0.0253, - "step": 59115 - }, - { - "epoch": 1.5006980581292042, - "grad_norm": 0.6182408928871155, - "learning_rate": 9.995346279138639e-06, - "loss": 0.0336, - "step": 59120 - }, - { - "epoch": 1.5008249777890597, - "grad_norm": 0.3854941725730896, - "learning_rate": 9.994500148072937e-06, - "loss": 0.0272, - "step": 59125 - }, - { - "epoch": 1.5009518974489149, - "grad_norm": 0.3869488835334778, - "learning_rate": 9.993654017007235e-06, - "loss": 0.0524, - "step": 59130 - }, - { - "epoch": 1.5010788171087701, - "grad_norm": 0.447515070438385, - "learning_rate": 9.992807885941534e-06, - "loss": 0.0348, - "step": 59135 - }, - { - "epoch": 1.5012057367686253, - "grad_norm": 0.25506648421287537, - "learning_rate": 9.99196175487583e-06, - "loss": 0.0295, - "step": 59140 - }, - { - "epoch": 1.5013326564284808, - "grad_norm": 0.2776268720626831, - "learning_rate": 9.991115623810129e-06, - "loss": 0.0294, - "step": 59145 - }, - { - "epoch": 1.5014595760883362, - "grad_norm": 0.24667274951934814, - "learning_rate": 9.990269492744427e-06, - "loss": 0.0232, - "step": 59150 - }, - { - "epoch": 1.5015864957481915, - "grad_norm": 0.47303125262260437, - "learning_rate": 9.989423361678726e-06, - "loss": 0.0499, - "step": 59155 - }, - { - "epoch": 1.5017134154080467, - "grad_norm": 0.6019712686538696, - "learning_rate": 9.988577230613022e-06, - "loss": 0.0479, - "step": 59160 - }, - { - "epoch": 1.501840335067902, - "grad_norm": 0.5306481122970581, - "learning_rate": 9.987731099547321e-06, - "loss": 0.0481, - "step": 59165 - }, - { - "epoch": 1.5019672547277574, - "grad_norm": 0.4610695540904999, - "learning_rate": 9.986884968481618e-06, - "loss": 0.0438, - "step": 59170 - }, - { - "epoch": 1.5020941743876126, - "grad_norm": 0.3864743113517761, - "learning_rate": 9.986038837415916e-06, - "loss": 0.0311, - "step": 59175 - }, - { - "epoch": 1.502221094047468, - "grad_norm": 0.38493698835372925, - "learning_rate": 9.985192706350214e-06, - "loss": 0.0437, - "step": 59180 - }, - { - "epoch": 1.5023480137073233, - "grad_norm": 0.3451683819293976, - "learning_rate": 9.984346575284513e-06, - "loss": 0.0253, - "step": 59185 - }, - { - "epoch": 1.5024749333671785, - "grad_norm": 0.4955267012119293, - "learning_rate": 9.98350044421881e-06, - "loss": 0.0284, - "step": 59190 - }, - { - "epoch": 1.5026018530270338, - "grad_norm": 0.45208072662353516, - "learning_rate": 9.982654313153108e-06, - "loss": 0.0339, - "step": 59195 - }, - { - "epoch": 1.5027287726868892, - "grad_norm": 0.5216742753982544, - "learning_rate": 9.981808182087406e-06, - "loss": 0.0461, - "step": 59200 - }, - { - "epoch": 1.5028556923467447, - "grad_norm": 0.3641612231731415, - "learning_rate": 9.980962051021705e-06, - "loss": 0.0509, - "step": 59205 - }, - { - "epoch": 1.5029826120065999, - "grad_norm": 0.43554219603538513, - "learning_rate": 9.980115919956001e-06, - "loss": 0.062, - "step": 59210 - }, - { - "epoch": 1.5031095316664551, - "grad_norm": 0.49621084332466125, - "learning_rate": 9.9792697888903e-06, - "loss": 0.0494, - "step": 59215 - }, - { - "epoch": 1.5032364513263103, - "grad_norm": 0.4797101318836212, - "learning_rate": 9.978423657824598e-06, - "loss": 0.0482, - "step": 59220 - }, - { - "epoch": 1.5033633709861658, - "grad_norm": 0.7007740139961243, - "learning_rate": 9.977577526758897e-06, - "loss": 0.0382, - "step": 59225 - }, - { - "epoch": 1.503490290646021, - "grad_norm": 0.2663155496120453, - "learning_rate": 9.976731395693193e-06, - "loss": 0.0404, - "step": 59230 - }, - { - "epoch": 1.5036172103058765, - "grad_norm": 0.43053266406059265, - "learning_rate": 9.975885264627492e-06, - "loss": 0.0385, - "step": 59235 - }, - { - "epoch": 1.5037441299657317, - "grad_norm": 0.34279701113700867, - "learning_rate": 9.975039133561788e-06, - "loss": 0.0579, - "step": 59240 - }, - { - "epoch": 1.503871049625587, - "grad_norm": 0.2553871273994446, - "learning_rate": 9.974193002496087e-06, - "loss": 0.038, - "step": 59245 - }, - { - "epoch": 1.5039979692854422, - "grad_norm": 0.49103909730911255, - "learning_rate": 9.973346871430385e-06, - "loss": 0.0446, - "step": 59250 - }, - { - "epoch": 1.5041248889452976, - "grad_norm": 0.5005699992179871, - "learning_rate": 9.972500740364684e-06, - "loss": 0.0481, - "step": 59255 - }, - { - "epoch": 1.504251808605153, - "grad_norm": 0.7234626412391663, - "learning_rate": 9.97165460929898e-06, - "loss": 0.0383, - "step": 59260 - }, - { - "epoch": 1.5043787282650083, - "grad_norm": 0.6385260820388794, - "learning_rate": 9.970808478233279e-06, - "loss": 0.0413, - "step": 59265 - }, - { - "epoch": 1.5045056479248635, - "grad_norm": 0.7461102604866028, - "learning_rate": 9.969962347167577e-06, - "loss": 0.0505, - "step": 59270 - }, - { - "epoch": 1.5046325675847187, - "grad_norm": 0.35918256640434265, - "learning_rate": 9.969116216101876e-06, - "loss": 0.0431, - "step": 59275 - }, - { - "epoch": 1.5047594872445742, - "grad_norm": 0.3406965434551239, - "learning_rate": 9.968270085036172e-06, - "loss": 0.045, - "step": 59280 - }, - { - "epoch": 1.5048864069044297, - "grad_norm": 0.8110314011573792, - "learning_rate": 9.96742395397047e-06, - "loss": 0.043, - "step": 59285 - }, - { - "epoch": 1.5050133265642849, - "grad_norm": 0.7020354270935059, - "learning_rate": 9.966577822904769e-06, - "loss": 0.0442, - "step": 59290 - }, - { - "epoch": 1.50514024622414, - "grad_norm": 1.9615557193756104, - "learning_rate": 9.965731691839067e-06, - "loss": 0.0379, - "step": 59295 - }, - { - "epoch": 1.5052671658839953, - "grad_norm": 0.35624241828918457, - "learning_rate": 9.964885560773364e-06, - "loss": 0.0356, - "step": 59300 - }, - { - "epoch": 1.5053940855438508, - "grad_norm": 0.7723415493965149, - "learning_rate": 9.964039429707663e-06, - "loss": 0.0615, - "step": 59305 - }, - { - "epoch": 1.505521005203706, - "grad_norm": 0.26520249247550964, - "learning_rate": 9.96319329864196e-06, - "loss": 0.0417, - "step": 59310 - }, - { - "epoch": 1.5056479248635615, - "grad_norm": 0.30173471570014954, - "learning_rate": 9.962347167576258e-06, - "loss": 0.0505, - "step": 59315 - }, - { - "epoch": 1.5057748445234167, - "grad_norm": 0.2418724000453949, - "learning_rate": 9.961501036510556e-06, - "loss": 0.0573, - "step": 59320 - }, - { - "epoch": 1.505901764183272, - "grad_norm": 0.43792417645454407, - "learning_rate": 9.960654905444854e-06, - "loss": 0.0393, - "step": 59325 - }, - { - "epoch": 1.5060286838431272, - "grad_norm": 0.4711081385612488, - "learning_rate": 9.959808774379151e-06, - "loss": 0.0552, - "step": 59330 - }, - { - "epoch": 1.5061556035029826, - "grad_norm": 0.45063433051109314, - "learning_rate": 9.95896264331345e-06, - "loss": 0.0379, - "step": 59335 - }, - { - "epoch": 1.506282523162838, - "grad_norm": 0.5038205981254578, - "learning_rate": 9.958116512247748e-06, - "loss": 0.0445, - "step": 59340 - }, - { - "epoch": 1.5064094428226933, - "grad_norm": 0.36201712489128113, - "learning_rate": 9.957270381182046e-06, - "loss": 0.0411, - "step": 59345 - }, - { - "epoch": 1.5065363624825485, - "grad_norm": 0.484000563621521, - "learning_rate": 9.956424250116343e-06, - "loss": 0.034, - "step": 59350 - }, - { - "epoch": 1.5066632821424037, - "grad_norm": 0.8958897590637207, - "learning_rate": 9.955578119050642e-06, - "loss": 0.0478, - "step": 59355 - }, - { - "epoch": 1.5067902018022592, - "grad_norm": 0.4113599359989166, - "learning_rate": 9.95473198798494e-06, - "loss": 0.0433, - "step": 59360 - }, - { - "epoch": 1.5069171214621144, - "grad_norm": 0.4769178330898285, - "learning_rate": 9.953885856919238e-06, - "loss": 0.0388, - "step": 59365 - }, - { - "epoch": 1.5070440411219699, - "grad_norm": 0.39303964376449585, - "learning_rate": 9.953039725853537e-06, - "loss": 0.0405, - "step": 59370 - }, - { - "epoch": 1.507170960781825, - "grad_norm": 0.45942020416259766, - "learning_rate": 9.952193594787833e-06, - "loss": 0.0589, - "step": 59375 - }, - { - "epoch": 1.5072978804416803, - "grad_norm": 0.642532229423523, - "learning_rate": 9.951347463722132e-06, - "loss": 0.0397, - "step": 59380 - }, - { - "epoch": 1.5074248001015356, - "grad_norm": 0.8642057180404663, - "learning_rate": 9.950501332656429e-06, - "loss": 0.0479, - "step": 59385 - }, - { - "epoch": 1.507551719761391, - "grad_norm": 0.42450588941574097, - "learning_rate": 9.949655201590727e-06, - "loss": 0.0665, - "step": 59390 - }, - { - "epoch": 1.5076786394212465, - "grad_norm": 0.45572155714035034, - "learning_rate": 9.948809070525025e-06, - "loss": 0.0536, - "step": 59395 - }, - { - "epoch": 1.5078055590811017, - "grad_norm": 0.35061728954315186, - "learning_rate": 9.947962939459324e-06, - "loss": 0.0539, - "step": 59400 - }, - { - "epoch": 1.507932478740957, - "grad_norm": 0.5549939870834351, - "learning_rate": 9.94711680839362e-06, - "loss": 0.0443, - "step": 59405 - }, - { - "epoch": 1.5080593984008122, - "grad_norm": 0.4482344686985016, - "learning_rate": 9.946270677327919e-06, - "loss": 0.036, - "step": 59410 - }, - { - "epoch": 1.5081863180606676, - "grad_norm": 0.6022230982780457, - "learning_rate": 9.945424546262217e-06, - "loss": 0.0461, - "step": 59415 - }, - { - "epoch": 1.508313237720523, - "grad_norm": 0.5587319731712341, - "learning_rate": 9.944578415196516e-06, - "loss": 0.0395, - "step": 59420 - }, - { - "epoch": 1.5084401573803783, - "grad_norm": 0.7117272615432739, - "learning_rate": 9.943732284130812e-06, - "loss": 0.0493, - "step": 59425 - }, - { - "epoch": 1.5085670770402335, - "grad_norm": 0.45899784564971924, - "learning_rate": 9.94288615306511e-06, - "loss": 0.0256, - "step": 59430 - }, - { - "epoch": 1.5086939967000887, - "grad_norm": 0.45115602016448975, - "learning_rate": 9.94204002199941e-06, - "loss": 0.0439, - "step": 59435 - }, - { - "epoch": 1.5088209163599442, - "grad_norm": 0.5757416486740112, - "learning_rate": 9.941193890933708e-06, - "loss": 0.0374, - "step": 59440 - }, - { - "epoch": 1.5089478360197994, - "grad_norm": 0.5116561651229858, - "learning_rate": 9.940347759868004e-06, - "loss": 0.0483, - "step": 59445 - }, - { - "epoch": 1.5090747556796549, - "grad_norm": 0.344269722700119, - "learning_rate": 9.939501628802303e-06, - "loss": 0.0281, - "step": 59450 - }, - { - "epoch": 1.50920167533951, - "grad_norm": 0.4524206817150116, - "learning_rate": 9.9386554977366e-06, - "loss": 0.0376, - "step": 59455 - }, - { - "epoch": 1.5093285949993653, - "grad_norm": 0.4955184757709503, - "learning_rate": 9.937809366670898e-06, - "loss": 0.029, - "step": 59460 - }, - { - "epoch": 1.5094555146592206, - "grad_norm": 0.24709106981754303, - "learning_rate": 9.936963235605196e-06, - "loss": 0.0388, - "step": 59465 - }, - { - "epoch": 1.509582434319076, - "grad_norm": 0.6112472414970398, - "learning_rate": 9.936117104539495e-06, - "loss": 0.0533, - "step": 59470 - }, - { - "epoch": 1.5097093539789315, - "grad_norm": 0.30030927062034607, - "learning_rate": 9.935270973473791e-06, - "loss": 0.0709, - "step": 59475 - }, - { - "epoch": 1.5098362736387867, - "grad_norm": 0.4050077795982361, - "learning_rate": 9.93442484240809e-06, - "loss": 0.0419, - "step": 59480 - }, - { - "epoch": 1.509963193298642, - "grad_norm": 0.5759156942367554, - "learning_rate": 9.933578711342388e-06, - "loss": 0.0493, - "step": 59485 - }, - { - "epoch": 1.5100901129584972, - "grad_norm": 0.7293353080749512, - "learning_rate": 9.932732580276687e-06, - "loss": 0.0394, - "step": 59490 - }, - { - "epoch": 1.5102170326183526, - "grad_norm": 0.5492882132530212, - "learning_rate": 9.931886449210983e-06, - "loss": 0.0348, - "step": 59495 - }, - { - "epoch": 1.510343952278208, - "grad_norm": 0.5694292783737183, - "learning_rate": 9.931040318145282e-06, - "loss": 0.0337, - "step": 59500 - }, - { - "epoch": 1.5104708719380633, - "grad_norm": 0.5712590217590332, - "learning_rate": 9.93019418707958e-06, - "loss": 0.0499, - "step": 59505 - }, - { - "epoch": 1.5105977915979185, - "grad_norm": 0.5955480933189392, - "learning_rate": 9.929348056013878e-06, - "loss": 0.0402, - "step": 59510 - }, - { - "epoch": 1.5107247112577737, - "grad_norm": 0.4624987244606018, - "learning_rate": 9.928501924948175e-06, - "loss": 0.0349, - "step": 59515 - }, - { - "epoch": 1.5108516309176292, - "grad_norm": 0.44200167059898376, - "learning_rate": 9.927655793882474e-06, - "loss": 0.0531, - "step": 59520 - }, - { - "epoch": 1.5109785505774844, - "grad_norm": 0.36024680733680725, - "learning_rate": 9.92680966281677e-06, - "loss": 0.0431, - "step": 59525 - }, - { - "epoch": 1.5111054702373399, - "grad_norm": 0.7246339321136475, - "learning_rate": 9.925963531751069e-06, - "loss": 0.0597, - "step": 59530 - }, - { - "epoch": 1.511232389897195, - "grad_norm": 0.43692487478256226, - "learning_rate": 9.925117400685367e-06, - "loss": 0.0448, - "step": 59535 - }, - { - "epoch": 1.5113593095570503, - "grad_norm": 0.5058413147926331, - "learning_rate": 9.924271269619665e-06, - "loss": 0.0357, - "step": 59540 - }, - { - "epoch": 1.5114862292169056, - "grad_norm": 0.515558660030365, - "learning_rate": 9.923425138553962e-06, - "loss": 0.0376, - "step": 59545 - }, - { - "epoch": 1.511613148876761, - "grad_norm": 0.3888366222381592, - "learning_rate": 9.92257900748826e-06, - "loss": 0.0427, - "step": 59550 - }, - { - "epoch": 1.5117400685366165, - "grad_norm": 0.6130295991897583, - "learning_rate": 9.921732876422559e-06, - "loss": 0.0283, - "step": 59555 - }, - { - "epoch": 1.5118669881964717, - "grad_norm": 0.30365610122680664, - "learning_rate": 9.920886745356857e-06, - "loss": 0.0511, - "step": 59560 - }, - { - "epoch": 1.511993907856327, - "grad_norm": 0.5739392042160034, - "learning_rate": 9.920040614291154e-06, - "loss": 0.0491, - "step": 59565 - }, - { - "epoch": 1.5121208275161822, - "grad_norm": 0.5841431021690369, - "learning_rate": 9.919194483225452e-06, - "loss": 0.0353, - "step": 59570 - }, - { - "epoch": 1.5122477471760376, - "grad_norm": 0.4795876741409302, - "learning_rate": 9.918348352159751e-06, - "loss": 0.0483, - "step": 59575 - }, - { - "epoch": 1.5123746668358928, - "grad_norm": 0.3131629228591919, - "learning_rate": 9.91750222109405e-06, - "loss": 0.0379, - "step": 59580 - }, - { - "epoch": 1.5125015864957483, - "grad_norm": 0.5431044697761536, - "learning_rate": 9.916656090028346e-06, - "loss": 0.0325, - "step": 59585 - }, - { - "epoch": 1.5126285061556035, - "grad_norm": 0.6453796625137329, - "learning_rate": 9.915809958962644e-06, - "loss": 0.0501, - "step": 59590 - }, - { - "epoch": 1.5127554258154587, - "grad_norm": 0.2739478051662445, - "learning_rate": 9.914963827896941e-06, - "loss": 0.0428, - "step": 59595 - }, - { - "epoch": 1.512882345475314, - "grad_norm": 0.42639490962028503, - "learning_rate": 9.91411769683124e-06, - "loss": 0.0483, - "step": 59600 - }, - { - "epoch": 1.5130092651351694, - "grad_norm": 0.3476022183895111, - "learning_rate": 9.913271565765538e-06, - "loss": 0.0325, - "step": 59605 - }, - { - "epoch": 1.5131361847950249, - "grad_norm": 0.4102407693862915, - "learning_rate": 9.912425434699836e-06, - "loss": 0.0574, - "step": 59610 - }, - { - "epoch": 1.51326310445488, - "grad_norm": 0.6412167549133301, - "learning_rate": 9.911579303634133e-06, - "loss": 0.0398, - "step": 59615 - }, - { - "epoch": 1.5133900241147353, - "grad_norm": 1.519069790840149, - "learning_rate": 9.910733172568431e-06, - "loss": 0.0449, - "step": 59620 - }, - { - "epoch": 1.5135169437745906, - "grad_norm": 0.5038411617279053, - "learning_rate": 9.90988704150273e-06, - "loss": 0.0233, - "step": 59625 - }, - { - "epoch": 1.513643863434446, - "grad_norm": 0.5700514912605286, - "learning_rate": 9.909040910437028e-06, - "loss": 0.0371, - "step": 59630 - }, - { - "epoch": 1.5137707830943015, - "grad_norm": 0.6336293816566467, - "learning_rate": 9.908194779371325e-06, - "loss": 0.0503, - "step": 59635 - }, - { - "epoch": 1.5138977027541567, - "grad_norm": 0.3906371593475342, - "learning_rate": 9.907348648305623e-06, - "loss": 0.0426, - "step": 59640 - }, - { - "epoch": 1.514024622414012, - "grad_norm": 0.9399778842926025, - "learning_rate": 9.906502517239922e-06, - "loss": 0.0422, - "step": 59645 - }, - { - "epoch": 1.5141515420738672, - "grad_norm": 0.3918018341064453, - "learning_rate": 9.90565638617422e-06, - "loss": 0.0511, - "step": 59650 - }, - { - "epoch": 1.5142784617337226, - "grad_norm": 0.47080475091934204, - "learning_rate": 9.904810255108517e-06, - "loss": 0.0573, - "step": 59655 - }, - { - "epoch": 1.5144053813935778, - "grad_norm": 0.6250923275947571, - "learning_rate": 9.903964124042815e-06, - "loss": 0.0452, - "step": 59660 - }, - { - "epoch": 1.5145323010534333, - "grad_norm": 0.3081468939781189, - "learning_rate": 9.903117992977112e-06, - "loss": 0.0394, - "step": 59665 - }, - { - "epoch": 1.5146592207132885, - "grad_norm": 0.8842360973358154, - "learning_rate": 9.90227186191141e-06, - "loss": 0.0287, - "step": 59670 - }, - { - "epoch": 1.5147861403731437, - "grad_norm": 0.4945274591445923, - "learning_rate": 9.901425730845709e-06, - "loss": 0.0295, - "step": 59675 - }, - { - "epoch": 1.514913060032999, - "grad_norm": 0.5542486310005188, - "learning_rate": 9.900579599780007e-06, - "loss": 0.0475, - "step": 59680 - }, - { - "epoch": 1.5150399796928544, - "grad_norm": 0.5361037850379944, - "learning_rate": 9.899733468714304e-06, - "loss": 0.0414, - "step": 59685 - }, - { - "epoch": 1.5151668993527099, - "grad_norm": 0.844848096370697, - "learning_rate": 9.898887337648602e-06, - "loss": 0.0369, - "step": 59690 - }, - { - "epoch": 1.515293819012565, - "grad_norm": 0.5875592231750488, - "learning_rate": 9.8980412065829e-06, - "loss": 0.0319, - "step": 59695 - }, - { - "epoch": 1.5154207386724203, - "grad_norm": 0.5528709888458252, - "learning_rate": 9.897195075517199e-06, - "loss": 0.0522, - "step": 59700 - }, - { - "epoch": 1.5155476583322756, - "grad_norm": 0.6540472507476807, - "learning_rate": 9.896348944451496e-06, - "loss": 0.0482, - "step": 59705 - }, - { - "epoch": 1.515674577992131, - "grad_norm": 0.3389526307582855, - "learning_rate": 9.895502813385794e-06, - "loss": 0.0352, - "step": 59710 - }, - { - "epoch": 1.5158014976519862, - "grad_norm": 1.1828649044036865, - "learning_rate": 9.894656682320093e-06, - "loss": 0.0251, - "step": 59715 - }, - { - "epoch": 1.5159284173118417, - "grad_norm": 0.7518989443778992, - "learning_rate": 9.893810551254391e-06, - "loss": 0.0379, - "step": 59720 - }, - { - "epoch": 1.516055336971697, - "grad_norm": 0.5056030750274658, - "learning_rate": 9.892964420188688e-06, - "loss": 0.0467, - "step": 59725 - }, - { - "epoch": 1.5161822566315521, - "grad_norm": 0.32481420040130615, - "learning_rate": 9.892118289122986e-06, - "loss": 0.0444, - "step": 59730 - }, - { - "epoch": 1.5163091762914074, - "grad_norm": 0.5184552073478699, - "learning_rate": 9.891272158057283e-06, - "loss": 0.0546, - "step": 59735 - }, - { - "epoch": 1.5164360959512628, - "grad_norm": 0.3037630021572113, - "learning_rate": 9.890426026991581e-06, - "loss": 0.0295, - "step": 59740 - }, - { - "epoch": 1.5165630156111183, - "grad_norm": 0.3805435299873352, - "learning_rate": 9.88957989592588e-06, - "loss": 0.0572, - "step": 59745 - }, - { - "epoch": 1.5166899352709735, - "grad_norm": 0.5139039158821106, - "learning_rate": 9.888733764860178e-06, - "loss": 0.0481, - "step": 59750 - }, - { - "epoch": 1.5168168549308287, - "grad_norm": 0.4601380228996277, - "learning_rate": 9.887887633794475e-06, - "loss": 0.0505, - "step": 59755 - }, - { - "epoch": 1.516943774590684, - "grad_norm": 0.3773060441017151, - "learning_rate": 9.887041502728773e-06, - "loss": 0.0582, - "step": 59760 - }, - { - "epoch": 1.5170706942505394, - "grad_norm": 0.3803686499595642, - "learning_rate": 9.886195371663072e-06, - "loss": 0.0475, - "step": 59765 - }, - { - "epoch": 1.5171976139103949, - "grad_norm": 0.5181493759155273, - "learning_rate": 9.88534924059737e-06, - "loss": 0.0412, - "step": 59770 - }, - { - "epoch": 1.51732453357025, - "grad_norm": 0.33407965302467346, - "learning_rate": 9.884503109531667e-06, - "loss": 0.0594, - "step": 59775 - }, - { - "epoch": 1.5174514532301053, - "grad_norm": 0.5500765442848206, - "learning_rate": 9.883656978465965e-06, - "loss": 0.0349, - "step": 59780 - }, - { - "epoch": 1.5175783728899606, - "grad_norm": 0.2826929986476898, - "learning_rate": 9.882810847400263e-06, - "loss": 0.0328, - "step": 59785 - }, - { - "epoch": 1.517705292549816, - "grad_norm": 0.3694946765899658, - "learning_rate": 9.881964716334562e-06, - "loss": 0.0318, - "step": 59790 - }, - { - "epoch": 1.5178322122096712, - "grad_norm": 0.3883016109466553, - "learning_rate": 9.881118585268859e-06, - "loss": 0.0347, - "step": 59795 - }, - { - "epoch": 1.5179591318695267, - "grad_norm": 0.3113800585269928, - "learning_rate": 9.880272454203157e-06, - "loss": 0.0385, - "step": 59800 - }, - { - "epoch": 1.518086051529382, - "grad_norm": 0.5400792956352234, - "learning_rate": 9.879426323137454e-06, - "loss": 0.0435, - "step": 59805 - }, - { - "epoch": 1.5182129711892371, - "grad_norm": 0.5845812559127808, - "learning_rate": 9.878580192071752e-06, - "loss": 0.0476, - "step": 59810 - }, - { - "epoch": 1.5183398908490924, - "grad_norm": 0.784371554851532, - "learning_rate": 9.87773406100605e-06, - "loss": 0.0379, - "step": 59815 - }, - { - "epoch": 1.5184668105089478, - "grad_norm": 0.2879987955093384, - "learning_rate": 9.876887929940349e-06, - "loss": 0.0395, - "step": 59820 - }, - { - "epoch": 1.5185937301688033, - "grad_norm": 0.43982061743736267, - "learning_rate": 9.876041798874646e-06, - "loss": 0.0506, - "step": 59825 - }, - { - "epoch": 1.5187206498286585, - "grad_norm": 0.29455554485321045, - "learning_rate": 9.875195667808944e-06, - "loss": 0.045, - "step": 59830 - }, - { - "epoch": 1.5188475694885137, - "grad_norm": 0.3360239863395691, - "learning_rate": 9.874349536743242e-06, - "loss": 0.0517, - "step": 59835 - }, - { - "epoch": 1.518974489148369, - "grad_norm": 0.6614536643028259, - "learning_rate": 9.87350340567754e-06, - "loss": 0.0477, - "step": 59840 - }, - { - "epoch": 1.5191014088082244, - "grad_norm": 0.3662366569042206, - "learning_rate": 9.872657274611837e-06, - "loss": 0.034, - "step": 59845 - }, - { - "epoch": 1.5192283284680799, - "grad_norm": 0.38601168990135193, - "learning_rate": 9.871811143546136e-06, - "loss": 0.0345, - "step": 59850 - }, - { - "epoch": 1.519355248127935, - "grad_norm": 0.31888914108276367, - "learning_rate": 9.870965012480434e-06, - "loss": 0.0331, - "step": 59855 - }, - { - "epoch": 1.5194821677877903, - "grad_norm": 0.7144458889961243, - "learning_rate": 9.870118881414733e-06, - "loss": 0.0498, - "step": 59860 - }, - { - "epoch": 1.5196090874476456, - "grad_norm": 0.598483681678772, - "learning_rate": 9.86927275034903e-06, - "loss": 0.0415, - "step": 59865 - }, - { - "epoch": 1.5197360071075008, - "grad_norm": 0.4775528013706207, - "learning_rate": 9.868426619283328e-06, - "loss": 0.0322, - "step": 59870 - }, - { - "epoch": 1.5198629267673562, - "grad_norm": 0.8036655187606812, - "learning_rate": 9.867580488217626e-06, - "loss": 0.0437, - "step": 59875 - }, - { - "epoch": 1.5199898464272117, - "grad_norm": 0.413927286863327, - "learning_rate": 9.866734357151923e-06, - "loss": 0.0445, - "step": 59880 - }, - { - "epoch": 1.520116766087067, - "grad_norm": 0.550529956817627, - "learning_rate": 9.865888226086221e-06, - "loss": 0.0393, - "step": 59885 - }, - { - "epoch": 1.5202436857469221, - "grad_norm": 1.2557895183563232, - "learning_rate": 9.86504209502052e-06, - "loss": 0.0492, - "step": 59890 - }, - { - "epoch": 1.5203706054067774, - "grad_norm": 0.48043832182884216, - "learning_rate": 9.864195963954818e-06, - "loss": 0.0311, - "step": 59895 - }, - { - "epoch": 1.5204975250666328, - "grad_norm": 0.3529353737831116, - "learning_rate": 9.863349832889115e-06, - "loss": 0.0411, - "step": 59900 - }, - { - "epoch": 1.5206244447264883, - "grad_norm": 0.5317634344100952, - "learning_rate": 9.862503701823413e-06, - "loss": 0.032, - "step": 59905 - }, - { - "epoch": 1.5207513643863435, - "grad_norm": 0.3887934982776642, - "learning_rate": 9.861657570757712e-06, - "loss": 0.028, - "step": 59910 - }, - { - "epoch": 1.5208782840461987, - "grad_norm": 0.5726295709609985, - "learning_rate": 9.86081143969201e-06, - "loss": 0.0472, - "step": 59915 - }, - { - "epoch": 1.521005203706054, - "grad_norm": 0.49199169874191284, - "learning_rate": 9.859965308626307e-06, - "loss": 0.0545, - "step": 59920 - }, - { - "epoch": 1.5211321233659094, - "grad_norm": 0.32305529713630676, - "learning_rate": 9.859119177560605e-06, - "loss": 0.0516, - "step": 59925 - }, - { - "epoch": 1.5212590430257646, - "grad_norm": 0.4746415317058563, - "learning_rate": 9.858273046494904e-06, - "loss": 0.0349, - "step": 59930 - }, - { - "epoch": 1.52138596268562, - "grad_norm": 0.5107420086860657, - "learning_rate": 9.857426915429202e-06, - "loss": 0.0559, - "step": 59935 - }, - { - "epoch": 1.5215128823454753, - "grad_norm": 0.32244110107421875, - "learning_rate": 9.856580784363499e-06, - "loss": 0.0351, - "step": 59940 - }, - { - "epoch": 1.5216398020053306, - "grad_norm": 0.3731497526168823, - "learning_rate": 9.855734653297797e-06, - "loss": 0.0639, - "step": 59945 - }, - { - "epoch": 1.5217667216651858, - "grad_norm": 0.8766856789588928, - "learning_rate": 9.854888522232094e-06, - "loss": 0.0559, - "step": 59950 - }, - { - "epoch": 1.5218936413250412, - "grad_norm": 0.3308705687522888, - "learning_rate": 9.854042391166392e-06, - "loss": 0.0503, - "step": 59955 - }, - { - "epoch": 1.5220205609848967, - "grad_norm": 0.2626152038574219, - "learning_rate": 9.85319626010069e-06, - "loss": 0.0479, - "step": 59960 - }, - { - "epoch": 1.522147480644752, - "grad_norm": 0.4996538460254669, - "learning_rate": 9.852350129034989e-06, - "loss": 0.0395, - "step": 59965 - }, - { - "epoch": 1.5222744003046071, - "grad_norm": 0.452284038066864, - "learning_rate": 9.851503997969286e-06, - "loss": 0.0584, - "step": 59970 - }, - { - "epoch": 1.5224013199644624, - "grad_norm": 0.48433345556259155, - "learning_rate": 9.850657866903584e-06, - "loss": 0.0478, - "step": 59975 - }, - { - "epoch": 1.5225282396243178, - "grad_norm": 0.3864128291606903, - "learning_rate": 9.849811735837882e-06, - "loss": 0.0354, - "step": 59980 - }, - { - "epoch": 1.5226551592841733, - "grad_norm": 0.5958579778671265, - "learning_rate": 9.848965604772181e-06, - "loss": 0.0346, - "step": 59985 - }, - { - "epoch": 1.5227820789440285, - "grad_norm": 0.39149710536003113, - "learning_rate": 9.848119473706478e-06, - "loss": 0.032, - "step": 59990 - }, - { - "epoch": 1.5229089986038837, - "grad_norm": 0.46685245633125305, - "learning_rate": 9.847273342640776e-06, - "loss": 0.0372, - "step": 59995 - }, - { - "epoch": 1.523035918263739, - "grad_norm": 2.2079031467437744, - "learning_rate": 9.846427211575074e-06, - "loss": 0.0361, - "step": 60000 - }, - { - "epoch": 1.5231628379235944, - "grad_norm": 0.682000994682312, - "learning_rate": 9.845581080509373e-06, - "loss": 0.0311, - "step": 60005 - }, - { - "epoch": 1.5232897575834496, - "grad_norm": 0.6565372943878174, - "learning_rate": 9.84473494944367e-06, - "loss": 0.0428, - "step": 60010 - }, - { - "epoch": 1.523416677243305, - "grad_norm": 0.7128352522850037, - "learning_rate": 9.843888818377968e-06, - "loss": 0.0348, - "step": 60015 - }, - { - "epoch": 1.5235435969031603, - "grad_norm": 0.5773813724517822, - "learning_rate": 9.843042687312265e-06, - "loss": 0.0468, - "step": 60020 - }, - { - "epoch": 1.5236705165630156, - "grad_norm": 0.29816102981567383, - "learning_rate": 9.842196556246563e-06, - "loss": 0.0327, - "step": 60025 - }, - { - "epoch": 1.5237974362228708, - "grad_norm": 0.4471127986907959, - "learning_rate": 9.841350425180861e-06, - "loss": 0.0469, - "step": 60030 - }, - { - "epoch": 1.5239243558827262, - "grad_norm": 0.7779947519302368, - "learning_rate": 9.84050429411516e-06, - "loss": 0.0477, - "step": 60035 - }, - { - "epoch": 1.5240512755425817, - "grad_norm": 0.5320317149162292, - "learning_rate": 9.839658163049457e-06, - "loss": 0.0531, - "step": 60040 - }, - { - "epoch": 1.524178195202437, - "grad_norm": 0.43755531311035156, - "learning_rate": 9.838812031983755e-06, - "loss": 0.0463, - "step": 60045 - }, - { - "epoch": 1.5243051148622921, - "grad_norm": 0.41151219606399536, - "learning_rate": 9.837965900918053e-06, - "loss": 0.0301, - "step": 60050 - }, - { - "epoch": 1.5244320345221474, - "grad_norm": 0.2503757178783417, - "learning_rate": 9.837119769852352e-06, - "loss": 0.0559, - "step": 60055 - }, - { - "epoch": 1.5245589541820028, - "grad_norm": 0.5281050205230713, - "learning_rate": 9.836273638786648e-06, - "loss": 0.0412, - "step": 60060 - }, - { - "epoch": 1.524685873841858, - "grad_norm": 1.183107852935791, - "learning_rate": 9.835427507720947e-06, - "loss": 0.0475, - "step": 60065 - }, - { - "epoch": 1.5248127935017135, - "grad_norm": 0.4056752324104309, - "learning_rate": 9.834581376655245e-06, - "loss": 0.0418, - "step": 60070 - }, - { - "epoch": 1.5249397131615687, - "grad_norm": 0.4393934905529022, - "learning_rate": 9.833735245589544e-06, - "loss": 0.0294, - "step": 60075 - }, - { - "epoch": 1.525066632821424, - "grad_norm": 0.27978554368019104, - "learning_rate": 9.83288911452384e-06, - "loss": 0.0343, - "step": 60080 - }, - { - "epoch": 1.5251935524812792, - "grad_norm": 0.5442721843719482, - "learning_rate": 9.832042983458139e-06, - "loss": 0.0344, - "step": 60085 - }, - { - "epoch": 1.5253204721411346, - "grad_norm": 0.496447890996933, - "learning_rate": 9.831196852392435e-06, - "loss": 0.0418, - "step": 60090 - }, - { - "epoch": 1.52544739180099, - "grad_norm": 0.314064085483551, - "learning_rate": 9.830350721326734e-06, - "loss": 0.0472, - "step": 60095 - }, - { - "epoch": 1.5255743114608453, - "grad_norm": 0.32264527678489685, - "learning_rate": 9.829504590261032e-06, - "loss": 0.038, - "step": 60100 - }, - { - "epoch": 1.5257012311207006, - "grad_norm": 0.3623993396759033, - "learning_rate": 9.82865845919533e-06, - "loss": 0.0257, - "step": 60105 - }, - { - "epoch": 1.5258281507805558, - "grad_norm": 0.5830428004264832, - "learning_rate": 9.827812328129627e-06, - "loss": 0.0656, - "step": 60110 - }, - { - "epoch": 1.5259550704404112, - "grad_norm": 0.9273244738578796, - "learning_rate": 9.826966197063926e-06, - "loss": 0.0376, - "step": 60115 - }, - { - "epoch": 1.5260819901002667, - "grad_norm": 0.34256282448768616, - "learning_rate": 9.826120065998224e-06, - "loss": 0.0425, - "step": 60120 - }, - { - "epoch": 1.526208909760122, - "grad_norm": 0.4260820746421814, - "learning_rate": 9.825273934932523e-06, - "loss": 0.0293, - "step": 60125 - }, - { - "epoch": 1.5263358294199771, - "grad_norm": 0.3597904145717621, - "learning_rate": 9.82442780386682e-06, - "loss": 0.046, - "step": 60130 - }, - { - "epoch": 1.5264627490798324, - "grad_norm": 0.4839112162590027, - "learning_rate": 9.823581672801118e-06, - "loss": 0.0459, - "step": 60135 - }, - { - "epoch": 1.5265896687396878, - "grad_norm": 0.33218643069267273, - "learning_rate": 9.822735541735416e-06, - "loss": 0.0366, - "step": 60140 - }, - { - "epoch": 1.526716588399543, - "grad_norm": 0.3378354609012604, - "learning_rate": 9.821889410669714e-06, - "loss": 0.0304, - "step": 60145 - }, - { - "epoch": 1.5268435080593985, - "grad_norm": 0.3426971733570099, - "learning_rate": 9.821043279604011e-06, - "loss": 0.042, - "step": 60150 - }, - { - "epoch": 1.5269704277192537, - "grad_norm": 0.6162996888160706, - "learning_rate": 9.82019714853831e-06, - "loss": 0.0465, - "step": 60155 - }, - { - "epoch": 1.527097347379109, - "grad_norm": 0.8201425075531006, - "learning_rate": 9.819351017472606e-06, - "loss": 0.0743, - "step": 60160 - }, - { - "epoch": 1.5272242670389642, - "grad_norm": 0.5909543633460999, - "learning_rate": 9.818504886406905e-06, - "loss": 0.0332, - "step": 60165 - }, - { - "epoch": 1.5273511866988196, - "grad_norm": 0.44381093978881836, - "learning_rate": 9.817658755341203e-06, - "loss": 0.0464, - "step": 60170 - }, - { - "epoch": 1.527478106358675, - "grad_norm": 0.5297927856445312, - "learning_rate": 9.816812624275502e-06, - "loss": 0.0447, - "step": 60175 - }, - { - "epoch": 1.5276050260185303, - "grad_norm": 0.46169352531433105, - "learning_rate": 9.815966493209798e-06, - "loss": 0.034, - "step": 60180 - }, - { - "epoch": 1.5277319456783856, - "grad_norm": 0.492567241191864, - "learning_rate": 9.815120362144097e-06, - "loss": 0.0447, - "step": 60185 - }, - { - "epoch": 1.5278588653382408, - "grad_norm": 0.7979075908660889, - "learning_rate": 9.814274231078395e-06, - "loss": 0.0368, - "step": 60190 - }, - { - "epoch": 1.5279857849980962, - "grad_norm": 0.33332163095474243, - "learning_rate": 9.813428100012693e-06, - "loss": 0.0477, - "step": 60195 - }, - { - "epoch": 1.5281127046579517, - "grad_norm": 0.42813003063201904, - "learning_rate": 9.81258196894699e-06, - "loss": 0.0387, - "step": 60200 - }, - { - "epoch": 1.528239624317807, - "grad_norm": 0.5172672867774963, - "learning_rate": 9.811735837881289e-06, - "loss": 0.0397, - "step": 60205 - }, - { - "epoch": 1.5283665439776621, - "grad_norm": 0.5604130625724792, - "learning_rate": 9.810889706815587e-06, - "loss": 0.0332, - "step": 60210 - }, - { - "epoch": 1.5284934636375174, - "grad_norm": 0.6814897656440735, - "learning_rate": 9.810043575749885e-06, - "loss": 0.058, - "step": 60215 - }, - { - "epoch": 1.5286203832973726, - "grad_norm": 0.5882918238639832, - "learning_rate": 9.809197444684182e-06, - "loss": 0.0489, - "step": 60220 - }, - { - "epoch": 1.528747302957228, - "grad_norm": 1.0893446207046509, - "learning_rate": 9.80835131361848e-06, - "loss": 0.0554, - "step": 60225 - }, - { - "epoch": 1.5288742226170835, - "grad_norm": 0.6348974108695984, - "learning_rate": 9.807505182552777e-06, - "loss": 0.0413, - "step": 60230 - }, - { - "epoch": 1.5290011422769387, - "grad_norm": 0.5324177742004395, - "learning_rate": 9.806659051487076e-06, - "loss": 0.0425, - "step": 60235 - }, - { - "epoch": 1.529128061936794, - "grad_norm": 0.6330705285072327, - "learning_rate": 9.805812920421374e-06, - "loss": 0.0478, - "step": 60240 - }, - { - "epoch": 1.5292549815966492, - "grad_norm": 0.6163447499275208, - "learning_rate": 9.804966789355672e-06, - "loss": 0.0553, - "step": 60245 - }, - { - "epoch": 1.5293819012565046, - "grad_norm": 0.34740084409713745, - "learning_rate": 9.804120658289969e-06, - "loss": 0.0456, - "step": 60250 - }, - { - "epoch": 1.52950882091636, - "grad_norm": 0.4986593425273895, - "learning_rate": 9.803274527224267e-06, - "loss": 0.0593, - "step": 60255 - }, - { - "epoch": 1.5296357405762153, - "grad_norm": 0.7453776597976685, - "learning_rate": 9.802428396158566e-06, - "loss": 0.0519, - "step": 60260 - }, - { - "epoch": 1.5297626602360705, - "grad_norm": 0.3354417085647583, - "learning_rate": 9.801582265092864e-06, - "loss": 0.0357, - "step": 60265 - }, - { - "epoch": 1.5298895798959258, - "grad_norm": 0.6193733215332031, - "learning_rate": 9.800736134027161e-06, - "loss": 0.0472, - "step": 60270 - }, - { - "epoch": 1.5300164995557812, - "grad_norm": 0.2942991256713867, - "learning_rate": 9.79989000296146e-06, - "loss": 0.0385, - "step": 60275 - }, - { - "epoch": 1.5301434192156365, - "grad_norm": 0.4193989634513855, - "learning_rate": 9.799043871895758e-06, - "loss": 0.0314, - "step": 60280 - }, - { - "epoch": 1.530270338875492, - "grad_norm": 0.30434727668762207, - "learning_rate": 9.798197740830056e-06, - "loss": 0.0413, - "step": 60285 - }, - { - "epoch": 1.5303972585353471, - "grad_norm": 0.9559926986694336, - "learning_rate": 9.797351609764353e-06, - "loss": 0.0499, - "step": 60290 - }, - { - "epoch": 1.5305241781952024, - "grad_norm": 0.554780900478363, - "learning_rate": 9.796505478698651e-06, - "loss": 0.0581, - "step": 60295 - }, - { - "epoch": 1.5306510978550576, - "grad_norm": 0.6768825054168701, - "learning_rate": 9.795659347632948e-06, - "loss": 0.0387, - "step": 60300 - }, - { - "epoch": 1.530778017514913, - "grad_norm": 0.4950866997241974, - "learning_rate": 9.794813216567246e-06, - "loss": 0.0525, - "step": 60305 - }, - { - "epoch": 1.5309049371747685, - "grad_norm": 0.6043761372566223, - "learning_rate": 9.793967085501545e-06, - "loss": 0.0322, - "step": 60310 - }, - { - "epoch": 1.5310318568346237, - "grad_norm": 0.41620174050331116, - "learning_rate": 9.793120954435843e-06, - "loss": 0.0424, - "step": 60315 - }, - { - "epoch": 1.531158776494479, - "grad_norm": 0.3693621754646301, - "learning_rate": 9.79227482337014e-06, - "loss": 0.0323, - "step": 60320 - }, - { - "epoch": 1.5312856961543342, - "grad_norm": 0.8230075836181641, - "learning_rate": 9.791428692304438e-06, - "loss": 0.0465, - "step": 60325 - }, - { - "epoch": 1.5314126158141896, - "grad_norm": 0.42439553141593933, - "learning_rate": 9.790582561238737e-06, - "loss": 0.0313, - "step": 60330 - }, - { - "epoch": 1.531539535474045, - "grad_norm": 0.4250667691230774, - "learning_rate": 9.789736430173035e-06, - "loss": 0.0571, - "step": 60335 - }, - { - "epoch": 1.5316664551339003, - "grad_norm": 0.427573025226593, - "learning_rate": 9.788890299107332e-06, - "loss": 0.047, - "step": 60340 - }, - { - "epoch": 1.5317933747937555, - "grad_norm": 0.6743175387382507, - "learning_rate": 9.78804416804163e-06, - "loss": 0.0304, - "step": 60345 - }, - { - "epoch": 1.5319202944536108, - "grad_norm": 0.3858724534511566, - "learning_rate": 9.787198036975929e-06, - "loss": 0.0446, - "step": 60350 - }, - { - "epoch": 1.5320472141134662, - "grad_norm": 0.5352027416229248, - "learning_rate": 9.786351905910227e-06, - "loss": 0.0547, - "step": 60355 - }, - { - "epoch": 1.5321741337733215, - "grad_norm": 0.4528164863586426, - "learning_rate": 9.785505774844524e-06, - "loss": 0.0401, - "step": 60360 - }, - { - "epoch": 1.532301053433177, - "grad_norm": 0.5829252004623413, - "learning_rate": 9.784659643778822e-06, - "loss": 0.0529, - "step": 60365 - }, - { - "epoch": 1.5324279730930321, - "grad_norm": 0.3705340027809143, - "learning_rate": 9.783813512713119e-06, - "loss": 0.0612, - "step": 60370 - }, - { - "epoch": 1.5325548927528874, - "grad_norm": 0.5625510811805725, - "learning_rate": 9.782967381647417e-06, - "loss": 0.0354, - "step": 60375 - }, - { - "epoch": 1.5326818124127426, - "grad_norm": 0.4164828062057495, - "learning_rate": 9.782121250581716e-06, - "loss": 0.0442, - "step": 60380 - }, - { - "epoch": 1.532808732072598, - "grad_norm": 0.6061062812805176, - "learning_rate": 9.781275119516014e-06, - "loss": 0.0516, - "step": 60385 - }, - { - "epoch": 1.5329356517324535, - "grad_norm": 0.35152214765548706, - "learning_rate": 9.780428988450312e-06, - "loss": 0.0329, - "step": 60390 - }, - { - "epoch": 1.5330625713923087, - "grad_norm": 1.0963033437728882, - "learning_rate": 9.77958285738461e-06, - "loss": 0.0406, - "step": 60395 - }, - { - "epoch": 1.533189491052164, - "grad_norm": 0.5251843929290771, - "learning_rate": 9.778736726318908e-06, - "loss": 0.0408, - "step": 60400 - }, - { - "epoch": 1.5333164107120192, - "grad_norm": 1.7832458019256592, - "learning_rate": 9.777890595253206e-06, - "loss": 0.0422, - "step": 60405 - }, - { - "epoch": 1.5334433303718746, - "grad_norm": 0.19042618572711945, - "learning_rate": 9.777044464187504e-06, - "loss": 0.0389, - "step": 60410 - }, - { - "epoch": 1.5335702500317299, - "grad_norm": 0.6988484859466553, - "learning_rate": 9.776198333121801e-06, - "loss": 0.033, - "step": 60415 - }, - { - "epoch": 1.5336971696915853, - "grad_norm": 0.4077683687210083, - "learning_rate": 9.7753522020561e-06, - "loss": 0.046, - "step": 60420 - }, - { - "epoch": 1.5338240893514405, - "grad_norm": 0.4407869875431061, - "learning_rate": 9.774506070990398e-06, - "loss": 0.0375, - "step": 60425 - }, - { - "epoch": 1.5339510090112958, - "grad_norm": 0.4323981702327728, - "learning_rate": 9.773659939924696e-06, - "loss": 0.0443, - "step": 60430 - }, - { - "epoch": 1.534077928671151, - "grad_norm": 0.5714977979660034, - "learning_rate": 9.772813808858993e-06, - "loss": 0.0312, - "step": 60435 - }, - { - "epoch": 1.5342048483310065, - "grad_norm": 0.5943276882171631, - "learning_rate": 9.771967677793291e-06, - "loss": 0.0414, - "step": 60440 - }, - { - "epoch": 1.534331767990862, - "grad_norm": 0.3095023036003113, - "learning_rate": 9.771121546727588e-06, - "loss": 0.0574, - "step": 60445 - }, - { - "epoch": 1.5344586876507171, - "grad_norm": 0.32128801941871643, - "learning_rate": 9.770275415661887e-06, - "loss": 0.0386, - "step": 60450 - }, - { - "epoch": 1.5345856073105724, - "grad_norm": 0.5965123772621155, - "learning_rate": 9.769429284596185e-06, - "loss": 0.06, - "step": 60455 - }, - { - "epoch": 1.5347125269704276, - "grad_norm": 0.4395780563354492, - "learning_rate": 9.768583153530483e-06, - "loss": 0.0533, - "step": 60460 - }, - { - "epoch": 1.534839446630283, - "grad_norm": 1.227949857711792, - "learning_rate": 9.76773702246478e-06, - "loss": 0.0553, - "step": 60465 - }, - { - "epoch": 1.5349663662901385, - "grad_norm": 0.532230019569397, - "learning_rate": 9.766890891399078e-06, - "loss": 0.0346, - "step": 60470 - }, - { - "epoch": 1.5350932859499937, - "grad_norm": 0.42486366629600525, - "learning_rate": 9.766044760333377e-06, - "loss": 0.0552, - "step": 60475 - }, - { - "epoch": 1.535220205609849, - "grad_norm": 0.5204038619995117, - "learning_rate": 9.765198629267675e-06, - "loss": 0.0406, - "step": 60480 - }, - { - "epoch": 1.5353471252697042, - "grad_norm": 0.4868180453777313, - "learning_rate": 9.764352498201972e-06, - "loss": 0.0522, - "step": 60485 - }, - { - "epoch": 1.5354740449295596, - "grad_norm": 0.6510946154594421, - "learning_rate": 9.76350636713627e-06, - "loss": 0.0381, - "step": 60490 - }, - { - "epoch": 1.5356009645894149, - "grad_norm": 0.31842124462127686, - "learning_rate": 9.762660236070569e-06, - "loss": 0.0538, - "step": 60495 - }, - { - "epoch": 1.5357278842492703, - "grad_norm": 0.44030845165252686, - "learning_rate": 9.761814105004867e-06, - "loss": 0.0684, - "step": 60500 - }, - { - "epoch": 1.5358548039091255, - "grad_norm": 0.5379852652549744, - "learning_rate": 9.760967973939164e-06, - "loss": 0.0633, - "step": 60505 - }, - { - "epoch": 1.5359817235689808, - "grad_norm": 0.6203917860984802, - "learning_rate": 9.760121842873462e-06, - "loss": 0.0655, - "step": 60510 - }, - { - "epoch": 1.536108643228836, - "grad_norm": 0.37929829955101013, - "learning_rate": 9.759275711807759e-06, - "loss": 0.0473, - "step": 60515 - }, - { - "epoch": 1.5362355628886915, - "grad_norm": 0.6108877062797546, - "learning_rate": 9.758429580742057e-06, - "loss": 0.0693, - "step": 60520 - }, - { - "epoch": 1.536362482548547, - "grad_norm": 0.2140817940235138, - "learning_rate": 9.757583449676356e-06, - "loss": 0.0465, - "step": 60525 - }, - { - "epoch": 1.5364894022084021, - "grad_norm": 0.31625306606292725, - "learning_rate": 9.756737318610654e-06, - "loss": 0.0416, - "step": 60530 - }, - { - "epoch": 1.5366163218682574, - "grad_norm": 0.42012032866477966, - "learning_rate": 9.755891187544951e-06, - "loss": 0.0377, - "step": 60535 - }, - { - "epoch": 1.5367432415281126, - "grad_norm": 0.8828595876693726, - "learning_rate": 9.75504505647925e-06, - "loss": 0.0417, - "step": 60540 - }, - { - "epoch": 1.536870161187968, - "grad_norm": 0.38472071290016174, - "learning_rate": 9.754198925413548e-06, - "loss": 0.0393, - "step": 60545 - }, - { - "epoch": 1.5369970808478235, - "grad_norm": 0.393461674451828, - "learning_rate": 9.753352794347846e-06, - "loss": 0.0432, - "step": 60550 - }, - { - "epoch": 1.5371240005076787, - "grad_norm": 0.2992313504219055, - "learning_rate": 9.752506663282143e-06, - "loss": 0.0352, - "step": 60555 - }, - { - "epoch": 1.537250920167534, - "grad_norm": 0.41887715458869934, - "learning_rate": 9.751660532216441e-06, - "loss": 0.0639, - "step": 60560 - }, - { - "epoch": 1.5373778398273892, - "grad_norm": 0.40910080075263977, - "learning_rate": 9.75081440115074e-06, - "loss": 0.0369, - "step": 60565 - }, - { - "epoch": 1.5375047594872444, - "grad_norm": 0.36207664012908936, - "learning_rate": 9.749968270085038e-06, - "loss": 0.0415, - "step": 60570 - }, - { - "epoch": 1.5376316791470999, - "grad_norm": 0.6636420488357544, - "learning_rate": 9.749122139019335e-06, - "loss": 0.0489, - "step": 60575 - }, - { - "epoch": 1.5377585988069553, - "grad_norm": 0.3463526964187622, - "learning_rate": 9.748276007953633e-06, - "loss": 0.0283, - "step": 60580 - }, - { - "epoch": 1.5378855184668105, - "grad_norm": 0.8947260975837708, - "learning_rate": 9.74742987688793e-06, - "loss": 0.0386, - "step": 60585 - }, - { - "epoch": 1.5380124381266658, - "grad_norm": 0.44531333446502686, - "learning_rate": 9.746583745822228e-06, - "loss": 0.0566, - "step": 60590 - }, - { - "epoch": 1.538139357786521, - "grad_norm": 0.4984789192676544, - "learning_rate": 9.745737614756527e-06, - "loss": 0.0537, - "step": 60595 - }, - { - "epoch": 1.5382662774463765, - "grad_norm": 0.34797537326812744, - "learning_rate": 9.744891483690825e-06, - "loss": 0.0343, - "step": 60600 - }, - { - "epoch": 1.538393197106232, - "grad_norm": 1.3095393180847168, - "learning_rate": 9.744045352625122e-06, - "loss": 0.0364, - "step": 60605 - }, - { - "epoch": 1.5385201167660871, - "grad_norm": 0.3669411242008209, - "learning_rate": 9.74319922155942e-06, - "loss": 0.0327, - "step": 60610 - }, - { - "epoch": 1.5386470364259424, - "grad_norm": 0.38988977670669556, - "learning_rate": 9.742353090493719e-06, - "loss": 0.0432, - "step": 60615 - }, - { - "epoch": 1.5387739560857976, - "grad_norm": 0.5453447699546814, - "learning_rate": 9.741506959428017e-06, - "loss": 0.0473, - "step": 60620 - }, - { - "epoch": 1.538900875745653, - "grad_norm": 0.31737950444221497, - "learning_rate": 9.740660828362314e-06, - "loss": 0.0523, - "step": 60625 - }, - { - "epoch": 1.5390277954055083, - "grad_norm": 0.2417464703321457, - "learning_rate": 9.739814697296612e-06, - "loss": 0.0474, - "step": 60630 - }, - { - "epoch": 1.5391547150653637, - "grad_norm": 0.4070051312446594, - "learning_rate": 9.73896856623091e-06, - "loss": 0.0361, - "step": 60635 - }, - { - "epoch": 1.539281634725219, - "grad_norm": 1.387916922569275, - "learning_rate": 9.738122435165209e-06, - "loss": 0.0302, - "step": 60640 - }, - { - "epoch": 1.5394085543850742, - "grad_norm": 0.5732051134109497, - "learning_rate": 9.737276304099506e-06, - "loss": 0.0487, - "step": 60645 - }, - { - "epoch": 1.5395354740449294, - "grad_norm": 0.19018499553203583, - "learning_rate": 9.736430173033804e-06, - "loss": 0.0331, - "step": 60650 - }, - { - "epoch": 1.5396623937047849, - "grad_norm": 0.2358613908290863, - "learning_rate": 9.7355840419681e-06, - "loss": 0.0416, - "step": 60655 - }, - { - "epoch": 1.5397893133646403, - "grad_norm": 0.4839414656162262, - "learning_rate": 9.734737910902399e-06, - "loss": 0.0435, - "step": 60660 - }, - { - "epoch": 1.5399162330244955, - "grad_norm": 0.3334687650203705, - "learning_rate": 9.733891779836697e-06, - "loss": 0.0418, - "step": 60665 - }, - { - "epoch": 1.5400431526843508, - "grad_norm": 0.4878867566585541, - "learning_rate": 9.733045648770996e-06, - "loss": 0.0436, - "step": 60670 - }, - { - "epoch": 1.540170072344206, - "grad_norm": 0.4066387414932251, - "learning_rate": 9.732199517705293e-06, - "loss": 0.043, - "step": 60675 - }, - { - "epoch": 1.5402969920040614, - "grad_norm": 0.3571045398712158, - "learning_rate": 9.731353386639591e-06, - "loss": 0.034, - "step": 60680 - }, - { - "epoch": 1.540423911663917, - "grad_norm": 0.25978389382362366, - "learning_rate": 9.73050725557389e-06, - "loss": 0.0418, - "step": 60685 - }, - { - "epoch": 1.5405508313237721, - "grad_norm": 0.387438029050827, - "learning_rate": 9.729661124508188e-06, - "loss": 0.0401, - "step": 60690 - }, - { - "epoch": 1.5406777509836274, - "grad_norm": 0.4534596800804138, - "learning_rate": 9.728814993442485e-06, - "loss": 0.0436, - "step": 60695 - }, - { - "epoch": 1.5408046706434826, - "grad_norm": 0.38863950967788696, - "learning_rate": 9.727968862376783e-06, - "loss": 0.0395, - "step": 60700 - }, - { - "epoch": 1.540931590303338, - "grad_norm": 0.434245765209198, - "learning_rate": 9.727122731311081e-06, - "loss": 0.0555, - "step": 60705 - }, - { - "epoch": 1.5410585099631933, - "grad_norm": 0.384207546710968, - "learning_rate": 9.72627660024538e-06, - "loss": 0.0374, - "step": 60710 - }, - { - "epoch": 1.5411854296230487, - "grad_norm": 0.38890528678894043, - "learning_rate": 9.725430469179676e-06, - "loss": 0.0442, - "step": 60715 - }, - { - "epoch": 1.541312349282904, - "grad_norm": 0.45345255732536316, - "learning_rate": 9.724584338113975e-06, - "loss": 0.0373, - "step": 60720 - }, - { - "epoch": 1.5414392689427592, - "grad_norm": 0.5577213764190674, - "learning_rate": 9.723738207048272e-06, - "loss": 0.0291, - "step": 60725 - }, - { - "epoch": 1.5415661886026144, - "grad_norm": 0.9645996689796448, - "learning_rate": 9.72289207598257e-06, - "loss": 0.0417, - "step": 60730 - }, - { - "epoch": 1.5416931082624699, - "grad_norm": 0.4083018898963928, - "learning_rate": 9.722045944916868e-06, - "loss": 0.0558, - "step": 60735 - }, - { - "epoch": 1.5418200279223253, - "grad_norm": 0.29102495312690735, - "learning_rate": 9.721199813851167e-06, - "loss": 0.0425, - "step": 60740 - }, - { - "epoch": 1.5419469475821805, - "grad_norm": 0.3605896830558777, - "learning_rate": 9.720353682785463e-06, - "loss": 0.0461, - "step": 60745 - }, - { - "epoch": 1.5420738672420358, - "grad_norm": 0.818068265914917, - "learning_rate": 9.719507551719762e-06, - "loss": 0.0634, - "step": 60750 - }, - { - "epoch": 1.542200786901891, - "grad_norm": 0.37713491916656494, - "learning_rate": 9.71866142065406e-06, - "loss": 0.027, - "step": 60755 - }, - { - "epoch": 1.5423277065617464, - "grad_norm": 0.5304471850395203, - "learning_rate": 9.717815289588359e-06, - "loss": 0.0491, - "step": 60760 - }, - { - "epoch": 1.5424546262216017, - "grad_norm": 6.581975936889648, - "learning_rate": 9.716969158522655e-06, - "loss": 0.054, - "step": 60765 - }, - { - "epoch": 1.5425815458814571, - "grad_norm": 0.5063075423240662, - "learning_rate": 9.716123027456954e-06, - "loss": 0.0365, - "step": 60770 - }, - { - "epoch": 1.5427084655413124, - "grad_norm": 0.2557864785194397, - "learning_rate": 9.715276896391252e-06, - "loss": 0.0342, - "step": 60775 - }, - { - "epoch": 1.5428353852011676, - "grad_norm": 0.4564296007156372, - "learning_rate": 9.71443076532555e-06, - "loss": 0.0557, - "step": 60780 - }, - { - "epoch": 1.5429623048610228, - "grad_norm": 0.41451627016067505, - "learning_rate": 9.713584634259847e-06, - "loss": 0.0448, - "step": 60785 - }, - { - "epoch": 1.5430892245208783, - "grad_norm": 0.25762200355529785, - "learning_rate": 9.712738503194146e-06, - "loss": 0.0508, - "step": 60790 - }, - { - "epoch": 1.5432161441807337, - "grad_norm": 0.7216759324073792, - "learning_rate": 9.711892372128442e-06, - "loss": 0.0385, - "step": 60795 - }, - { - "epoch": 1.543343063840589, - "grad_norm": 0.45906248688697815, - "learning_rate": 9.71104624106274e-06, - "loss": 0.0436, - "step": 60800 - }, - { - "epoch": 1.5434699835004442, - "grad_norm": 0.5879256725311279, - "learning_rate": 9.71020010999704e-06, - "loss": 0.0645, - "step": 60805 - }, - { - "epoch": 1.5435969031602994, - "grad_norm": 0.4125334322452545, - "learning_rate": 9.709353978931338e-06, - "loss": 0.047, - "step": 60810 - }, - { - "epoch": 1.5437238228201549, - "grad_norm": 0.3294677734375, - "learning_rate": 9.708507847865634e-06, - "loss": 0.0369, - "step": 60815 - }, - { - "epoch": 1.5438507424800103, - "grad_norm": 0.6560848951339722, - "learning_rate": 9.707661716799933e-06, - "loss": 0.0483, - "step": 60820 - }, - { - "epoch": 1.5439776621398655, - "grad_norm": 0.20990341901779175, - "learning_rate": 9.706815585734231e-06, - "loss": 0.0338, - "step": 60825 - }, - { - "epoch": 1.5441045817997208, - "grad_norm": 0.46670958399772644, - "learning_rate": 9.70596945466853e-06, - "loss": 0.0398, - "step": 60830 - }, - { - "epoch": 1.544231501459576, - "grad_norm": 0.49275580048561096, - "learning_rate": 9.705123323602826e-06, - "loss": 0.0442, - "step": 60835 - }, - { - "epoch": 1.5443584211194314, - "grad_norm": 0.21907828748226166, - "learning_rate": 9.704277192537125e-06, - "loss": 0.0284, - "step": 60840 - }, - { - "epoch": 1.5444853407792867, - "grad_norm": 0.37297895550727844, - "learning_rate": 9.703431061471423e-06, - "loss": 0.0492, - "step": 60845 - }, - { - "epoch": 1.5446122604391421, - "grad_norm": 0.4460243582725525, - "learning_rate": 9.702584930405721e-06, - "loss": 0.0414, - "step": 60850 - }, - { - "epoch": 1.5447391800989974, - "grad_norm": 1.8541282415390015, - "learning_rate": 9.701738799340018e-06, - "loss": 0.0439, - "step": 60855 - }, - { - "epoch": 1.5448660997588526, - "grad_norm": 0.31878623366355896, - "learning_rate": 9.700892668274317e-06, - "loss": 0.0373, - "step": 60860 - }, - { - "epoch": 1.5449930194187078, - "grad_norm": 0.3707473874092102, - "learning_rate": 9.700046537208613e-06, - "loss": 0.04, - "step": 60865 - }, - { - "epoch": 1.5451199390785633, - "grad_norm": 0.6833390593528748, - "learning_rate": 9.699200406142912e-06, - "loss": 0.0429, - "step": 60870 - }, - { - "epoch": 1.5452468587384187, - "grad_norm": 0.4353007376194, - "learning_rate": 9.69835427507721e-06, - "loss": 0.029, - "step": 60875 - }, - { - "epoch": 1.545373778398274, - "grad_norm": 0.43372899293899536, - "learning_rate": 9.697508144011508e-06, - "loss": 0.0296, - "step": 60880 - }, - { - "epoch": 1.5455006980581292, - "grad_norm": 0.5735467672348022, - "learning_rate": 9.696662012945805e-06, - "loss": 0.0388, - "step": 60885 - }, - { - "epoch": 1.5456276177179844, - "grad_norm": 0.6122635006904602, - "learning_rate": 9.695815881880104e-06, - "loss": 0.0353, - "step": 60890 - }, - { - "epoch": 1.5457545373778399, - "grad_norm": 0.9643400311470032, - "learning_rate": 9.694969750814402e-06, - "loss": 0.0537, - "step": 60895 - }, - { - "epoch": 1.545881457037695, - "grad_norm": 0.3480902314186096, - "learning_rate": 9.6941236197487e-06, - "loss": 0.0436, - "step": 60900 - }, - { - "epoch": 1.5460083766975505, - "grad_norm": 0.3088207244873047, - "learning_rate": 9.693277488682999e-06, - "loss": 0.0302, - "step": 60905 - }, - { - "epoch": 1.5461352963574058, - "grad_norm": 0.4206470847129822, - "learning_rate": 9.692431357617295e-06, - "loss": 0.032, - "step": 60910 - }, - { - "epoch": 1.546262216017261, - "grad_norm": 0.4637081027030945, - "learning_rate": 9.691585226551594e-06, - "loss": 0.0367, - "step": 60915 - }, - { - "epoch": 1.5463891356771162, - "grad_norm": 0.8515473008155823, - "learning_rate": 9.690739095485892e-06, - "loss": 0.0486, - "step": 60920 - }, - { - "epoch": 1.5465160553369717, - "grad_norm": 0.3307439982891083, - "learning_rate": 9.68989296442019e-06, - "loss": 0.0367, - "step": 60925 - }, - { - "epoch": 1.5466429749968271, - "grad_norm": 0.4294101893901825, - "learning_rate": 9.689046833354487e-06, - "loss": 0.0363, - "step": 60930 - }, - { - "epoch": 1.5467698946566824, - "grad_norm": 0.643816351890564, - "learning_rate": 9.688200702288786e-06, - "loss": 0.0341, - "step": 60935 - }, - { - "epoch": 1.5468968143165376, - "grad_norm": 0.26220986247062683, - "learning_rate": 9.687354571223082e-06, - "loss": 0.0324, - "step": 60940 - }, - { - "epoch": 1.5470237339763928, - "grad_norm": 0.5945456027984619, - "learning_rate": 9.686508440157381e-06, - "loss": 0.0407, - "step": 60945 - }, - { - "epoch": 1.5471506536362483, - "grad_norm": 1.6146165132522583, - "learning_rate": 9.68566230909168e-06, - "loss": 0.0483, - "step": 60950 - }, - { - "epoch": 1.5472775732961037, - "grad_norm": 0.643166720867157, - "learning_rate": 9.684816178025978e-06, - "loss": 0.0456, - "step": 60955 - }, - { - "epoch": 1.547404492955959, - "grad_norm": 0.28817641735076904, - "learning_rate": 9.683970046960274e-06, - "loss": 0.0345, - "step": 60960 - }, - { - "epoch": 1.5475314126158142, - "grad_norm": 1.120608925819397, - "learning_rate": 9.683123915894573e-06, - "loss": 0.0605, - "step": 60965 - }, - { - "epoch": 1.5476583322756694, - "grad_norm": 0.4669854938983917, - "learning_rate": 9.682277784828871e-06, - "loss": 0.0235, - "step": 60970 - }, - { - "epoch": 1.5477852519355249, - "grad_norm": 0.36151614785194397, - "learning_rate": 9.68143165376317e-06, - "loss": 0.0586, - "step": 60975 - }, - { - "epoch": 1.54791217159538, - "grad_norm": 0.6274901628494263, - "learning_rate": 9.680585522697466e-06, - "loss": 0.0473, - "step": 60980 - }, - { - "epoch": 1.5480390912552355, - "grad_norm": 0.8237858414649963, - "learning_rate": 9.679739391631765e-06, - "loss": 0.0437, - "step": 60985 - }, - { - "epoch": 1.5481660109150908, - "grad_norm": 0.5199068188667297, - "learning_rate": 9.678893260566063e-06, - "loss": 0.0459, - "step": 60990 - }, - { - "epoch": 1.548292930574946, - "grad_norm": 0.3212149739265442, - "learning_rate": 9.678047129500362e-06, - "loss": 0.0389, - "step": 60995 - }, - { - "epoch": 1.5484198502348012, - "grad_norm": 0.35439032316207886, - "learning_rate": 9.677200998434658e-06, - "loss": 0.0375, - "step": 61000 - }, - { - "epoch": 1.5485467698946567, - "grad_norm": 0.4271939694881439, - "learning_rate": 9.676354867368957e-06, - "loss": 0.0475, - "step": 61005 - }, - { - "epoch": 1.5486736895545121, - "grad_norm": 0.5420103073120117, - "learning_rate": 9.675508736303253e-06, - "loss": 0.0392, - "step": 61010 - }, - { - "epoch": 1.5488006092143674, - "grad_norm": 0.5646460652351379, - "learning_rate": 9.674662605237552e-06, - "loss": 0.0338, - "step": 61015 - }, - { - "epoch": 1.5489275288742226, - "grad_norm": 0.3046616017818451, - "learning_rate": 9.67381647417185e-06, - "loss": 0.0408, - "step": 61020 - }, - { - "epoch": 1.5490544485340778, - "grad_norm": 0.4456482529640198, - "learning_rate": 9.672970343106149e-06, - "loss": 0.0489, - "step": 61025 - }, - { - "epoch": 1.5491813681939333, - "grad_norm": 0.4432178735733032, - "learning_rate": 9.672124212040445e-06, - "loss": 0.0302, - "step": 61030 - }, - { - "epoch": 1.5493082878537887, - "grad_norm": 0.2963753044605255, - "learning_rate": 9.671278080974744e-06, - "loss": 0.0275, - "step": 61035 - }, - { - "epoch": 1.549435207513644, - "grad_norm": 0.8702388405799866, - "learning_rate": 9.670431949909042e-06, - "loss": 0.0372, - "step": 61040 - }, - { - "epoch": 1.5495621271734992, - "grad_norm": 0.31116387248039246, - "learning_rate": 9.66958581884334e-06, - "loss": 0.0541, - "step": 61045 - }, - { - "epoch": 1.5496890468333544, - "grad_norm": 0.33169010281562805, - "learning_rate": 9.668739687777637e-06, - "loss": 0.0439, - "step": 61050 - }, - { - "epoch": 1.5498159664932099, - "grad_norm": 0.5286628007888794, - "learning_rate": 9.667893556711936e-06, - "loss": 0.0447, - "step": 61055 - }, - { - "epoch": 1.549942886153065, - "grad_norm": 0.44876211881637573, - "learning_rate": 9.667047425646234e-06, - "loss": 0.0426, - "step": 61060 - }, - { - "epoch": 1.5500698058129205, - "grad_norm": 0.475019246339798, - "learning_rate": 9.666201294580532e-06, - "loss": 0.0482, - "step": 61065 - }, - { - "epoch": 1.5501967254727758, - "grad_norm": 0.526512086391449, - "learning_rate": 9.665355163514829e-06, - "loss": 0.0244, - "step": 61070 - }, - { - "epoch": 1.550323645132631, - "grad_norm": 0.4790557324886322, - "learning_rate": 9.664509032449127e-06, - "loss": 0.0427, - "step": 61075 - }, - { - "epoch": 1.5504505647924862, - "grad_norm": 0.5933211445808411, - "learning_rate": 9.663662901383424e-06, - "loss": 0.0292, - "step": 61080 - }, - { - "epoch": 1.5505774844523417, - "grad_norm": 0.20575831830501556, - "learning_rate": 9.662816770317723e-06, - "loss": 0.0263, - "step": 61085 - }, - { - "epoch": 1.5507044041121971, - "grad_norm": 0.4005739986896515, - "learning_rate": 9.661970639252021e-06, - "loss": 0.0368, - "step": 61090 - }, - { - "epoch": 1.5508313237720524, - "grad_norm": 0.44143736362457275, - "learning_rate": 9.66112450818632e-06, - "loss": 0.054, - "step": 61095 - }, - { - "epoch": 1.5509582434319076, - "grad_norm": 0.5342168211936951, - "learning_rate": 9.660278377120616e-06, - "loss": 0.0342, - "step": 61100 - }, - { - "epoch": 1.5510851630917628, - "grad_norm": 0.4468173086643219, - "learning_rate": 9.659432246054915e-06, - "loss": 0.0401, - "step": 61105 - }, - { - "epoch": 1.5512120827516183, - "grad_norm": 0.5366607904434204, - "learning_rate": 9.658586114989213e-06, - "loss": 0.0399, - "step": 61110 - }, - { - "epoch": 1.5513390024114735, - "grad_norm": 0.5431616306304932, - "learning_rate": 9.657739983923511e-06, - "loss": 0.0398, - "step": 61115 - }, - { - "epoch": 1.551465922071329, - "grad_norm": 0.41569432616233826, - "learning_rate": 9.656893852857808e-06, - "loss": 0.0497, - "step": 61120 - }, - { - "epoch": 1.5515928417311842, - "grad_norm": 1.1864678859710693, - "learning_rate": 9.656047721792106e-06, - "loss": 0.0325, - "step": 61125 - }, - { - "epoch": 1.5517197613910394, - "grad_norm": 0.40203338861465454, - "learning_rate": 9.655201590726405e-06, - "loss": 0.0452, - "step": 61130 - }, - { - "epoch": 1.5518466810508946, - "grad_norm": 0.4864705204963684, - "learning_rate": 9.654355459660703e-06, - "loss": 0.0492, - "step": 61135 - }, - { - "epoch": 1.55197360071075, - "grad_norm": 0.8666732907295227, - "learning_rate": 9.653509328595e-06, - "loss": 0.0382, - "step": 61140 - }, - { - "epoch": 1.5521005203706055, - "grad_norm": 0.2834208309650421, - "learning_rate": 9.652663197529298e-06, - "loss": 0.0456, - "step": 61145 - }, - { - "epoch": 1.5522274400304608, - "grad_norm": 0.6185582280158997, - "learning_rate": 9.651817066463595e-06, - "loss": 0.0519, - "step": 61150 - }, - { - "epoch": 1.552354359690316, - "grad_norm": 0.30819472670555115, - "learning_rate": 9.650970935397893e-06, - "loss": 0.038, - "step": 61155 - }, - { - "epoch": 1.5524812793501712, - "grad_norm": 1.0752294063568115, - "learning_rate": 9.650124804332192e-06, - "loss": 0.0414, - "step": 61160 - }, - { - "epoch": 1.5526081990100267, - "grad_norm": 0.5116236805915833, - "learning_rate": 9.64927867326649e-06, - "loss": 0.036, - "step": 61165 - }, - { - "epoch": 1.5527351186698821, - "grad_norm": 0.3809540867805481, - "learning_rate": 9.648432542200787e-06, - "loss": 0.0314, - "step": 61170 - }, - { - "epoch": 1.5528620383297373, - "grad_norm": 0.7413864135742188, - "learning_rate": 9.647586411135085e-06, - "loss": 0.0585, - "step": 61175 - }, - { - "epoch": 1.5529889579895926, - "grad_norm": 0.4051249027252197, - "learning_rate": 9.646740280069384e-06, - "loss": 0.0354, - "step": 61180 - }, - { - "epoch": 1.5531158776494478, - "grad_norm": 0.4243438243865967, - "learning_rate": 9.645894149003682e-06, - "loss": 0.0289, - "step": 61185 - }, - { - "epoch": 1.5532427973093033, - "grad_norm": 0.3594447672367096, - "learning_rate": 9.645048017937979e-06, - "loss": 0.0519, - "step": 61190 - }, - { - "epoch": 1.5533697169691585, - "grad_norm": 0.40012121200561523, - "learning_rate": 9.644201886872277e-06, - "loss": 0.0379, - "step": 61195 - }, - { - "epoch": 1.553496636629014, - "grad_norm": 0.4406599700450897, - "learning_rate": 9.643355755806576e-06, - "loss": 0.0443, - "step": 61200 - }, - { - "epoch": 1.5536235562888692, - "grad_norm": 0.444776713848114, - "learning_rate": 9.642509624740874e-06, - "loss": 0.0293, - "step": 61205 - }, - { - "epoch": 1.5537504759487244, - "grad_norm": 0.4725155532360077, - "learning_rate": 9.64166349367517e-06, - "loss": 0.0661, - "step": 61210 - }, - { - "epoch": 1.5538773956085796, - "grad_norm": 0.36250144243240356, - "learning_rate": 9.64081736260947e-06, - "loss": 0.0489, - "step": 61215 - }, - { - "epoch": 1.554004315268435, - "grad_norm": 1.1503437757492065, - "learning_rate": 9.639971231543766e-06, - "loss": 0.0379, - "step": 61220 - }, - { - "epoch": 1.5541312349282905, - "grad_norm": 0.5243590474128723, - "learning_rate": 9.639125100478064e-06, - "loss": 0.0405, - "step": 61225 - }, - { - "epoch": 1.5542581545881458, - "grad_norm": 0.3635208308696747, - "learning_rate": 9.638278969412363e-06, - "loss": 0.044, - "step": 61230 - }, - { - "epoch": 1.554385074248001, - "grad_norm": 0.4928430914878845, - "learning_rate": 9.637432838346661e-06, - "loss": 0.0351, - "step": 61235 - }, - { - "epoch": 1.5545119939078562, - "grad_norm": 0.4726577401161194, - "learning_rate": 9.636586707280958e-06, - "loss": 0.0353, - "step": 61240 - }, - { - "epoch": 1.5546389135677117, - "grad_norm": 0.4115508794784546, - "learning_rate": 9.635740576215256e-06, - "loss": 0.0444, - "step": 61245 - }, - { - "epoch": 1.554765833227567, - "grad_norm": 0.8411226272583008, - "learning_rate": 9.634894445149555e-06, - "loss": 0.0571, - "step": 61250 - }, - { - "epoch": 1.5548927528874223, - "grad_norm": 0.8240222334861755, - "learning_rate": 9.634048314083853e-06, - "loss": 0.0541, - "step": 61255 - }, - { - "epoch": 1.5550196725472776, - "grad_norm": 0.4217580258846283, - "learning_rate": 9.63320218301815e-06, - "loss": 0.0507, - "step": 61260 - }, - { - "epoch": 1.5551465922071328, - "grad_norm": 0.44198957085609436, - "learning_rate": 9.632356051952448e-06, - "loss": 0.0457, - "step": 61265 - }, - { - "epoch": 1.555273511866988, - "grad_norm": 0.20228976011276245, - "learning_rate": 9.631509920886747e-06, - "loss": 0.0337, - "step": 61270 - }, - { - "epoch": 1.5554004315268435, - "grad_norm": 0.4278029203414917, - "learning_rate": 9.630663789821045e-06, - "loss": 0.0459, - "step": 61275 - }, - { - "epoch": 1.555527351186699, - "grad_norm": 0.6001102924346924, - "learning_rate": 9.629817658755342e-06, - "loss": 0.0572, - "step": 61280 - }, - { - "epoch": 1.5556542708465542, - "grad_norm": 0.466344952583313, - "learning_rate": 9.62897152768964e-06, - "loss": 0.049, - "step": 61285 - }, - { - "epoch": 1.5557811905064094, - "grad_norm": 0.4926164448261261, - "learning_rate": 9.628125396623937e-06, - "loss": 0.0363, - "step": 61290 - }, - { - "epoch": 1.5559081101662646, - "grad_norm": 0.4576652944087982, - "learning_rate": 9.627279265558235e-06, - "loss": 0.0508, - "step": 61295 - }, - { - "epoch": 1.55603502982612, - "grad_norm": 0.8411287069320679, - "learning_rate": 9.626433134492534e-06, - "loss": 0.0501, - "step": 61300 - }, - { - "epoch": 1.5561619494859755, - "grad_norm": 0.35475242137908936, - "learning_rate": 9.625587003426832e-06, - "loss": 0.0456, - "step": 61305 - }, - { - "epoch": 1.5562888691458308, - "grad_norm": 0.6374982595443726, - "learning_rate": 9.624740872361129e-06, - "loss": 0.06, - "step": 61310 - }, - { - "epoch": 1.556415788805686, - "grad_norm": 0.352006196975708, - "learning_rate": 9.623894741295427e-06, - "loss": 0.0456, - "step": 61315 - }, - { - "epoch": 1.5565427084655412, - "grad_norm": 0.4142884612083435, - "learning_rate": 9.623048610229725e-06, - "loss": 0.0512, - "step": 61320 - }, - { - "epoch": 1.5566696281253967, - "grad_norm": 0.40665310621261597, - "learning_rate": 9.622202479164024e-06, - "loss": 0.0486, - "step": 61325 - }, - { - "epoch": 1.556796547785252, - "grad_norm": 2.038663625717163, - "learning_rate": 9.62135634809832e-06, - "loss": 0.0435, - "step": 61330 - }, - { - "epoch": 1.5569234674451073, - "grad_norm": 0.945563554763794, - "learning_rate": 9.620510217032619e-06, - "loss": 0.0484, - "step": 61335 - }, - { - "epoch": 1.5570503871049626, - "grad_norm": 0.32528600096702576, - "learning_rate": 9.619664085966917e-06, - "loss": 0.0391, - "step": 61340 - }, - { - "epoch": 1.5571773067648178, - "grad_norm": 0.32012560963630676, - "learning_rate": 9.618817954901216e-06, - "loss": 0.0531, - "step": 61345 - }, - { - "epoch": 1.557304226424673, - "grad_norm": 0.29449817538261414, - "learning_rate": 9.617971823835512e-06, - "loss": 0.0446, - "step": 61350 - }, - { - "epoch": 1.5574311460845285, - "grad_norm": 0.4874213635921478, - "learning_rate": 9.617125692769811e-06, - "loss": 0.0399, - "step": 61355 - }, - { - "epoch": 1.557558065744384, - "grad_norm": 0.4543229937553406, - "learning_rate": 9.616279561704108e-06, - "loss": 0.042, - "step": 61360 - }, - { - "epoch": 1.5576849854042392, - "grad_norm": 0.2799110412597656, - "learning_rate": 9.615433430638406e-06, - "loss": 0.0471, - "step": 61365 - }, - { - "epoch": 1.5578119050640944, - "grad_norm": 0.5515391826629639, - "learning_rate": 9.614587299572704e-06, - "loss": 0.0425, - "step": 61370 - }, - { - "epoch": 1.5579388247239496, - "grad_norm": 0.4537419378757477, - "learning_rate": 9.613741168507003e-06, - "loss": 0.0439, - "step": 61375 - }, - { - "epoch": 1.558065744383805, - "grad_norm": 0.7098743319511414, - "learning_rate": 9.6128950374413e-06, - "loss": 0.0319, - "step": 61380 - }, - { - "epoch": 1.5581926640436605, - "grad_norm": 0.9174268245697021, - "learning_rate": 9.612048906375598e-06, - "loss": 0.0536, - "step": 61385 - }, - { - "epoch": 1.5583195837035158, - "grad_norm": 0.30117353796958923, - "learning_rate": 9.611202775309896e-06, - "loss": 0.0312, - "step": 61390 - }, - { - "epoch": 1.558446503363371, - "grad_norm": 0.6957359910011292, - "learning_rate": 9.610356644244195e-06, - "loss": 0.0165, - "step": 61395 - }, - { - "epoch": 1.5585734230232262, - "grad_norm": 0.43595898151397705, - "learning_rate": 9.609510513178491e-06, - "loss": 0.0346, - "step": 61400 - }, - { - "epoch": 1.5587003426830817, - "grad_norm": 0.359657883644104, - "learning_rate": 9.60866438211279e-06, - "loss": 0.0453, - "step": 61405 - }, - { - "epoch": 1.558827262342937, - "grad_norm": 0.36895516514778137, - "learning_rate": 9.607818251047088e-06, - "loss": 0.0354, - "step": 61410 - }, - { - "epoch": 1.5589541820027923, - "grad_norm": 0.3663685917854309, - "learning_rate": 9.606972119981387e-06, - "loss": 0.0328, - "step": 61415 - }, - { - "epoch": 1.5590811016626476, - "grad_norm": 0.4837845265865326, - "learning_rate": 9.606125988915685e-06, - "loss": 0.0585, - "step": 61420 - }, - { - "epoch": 1.5592080213225028, - "grad_norm": 0.4535079598426819, - "learning_rate": 9.605279857849982e-06, - "loss": 0.0308, - "step": 61425 - }, - { - "epoch": 1.559334940982358, - "grad_norm": 0.3753737211227417, - "learning_rate": 9.60443372678428e-06, - "loss": 0.0352, - "step": 61430 - }, - { - "epoch": 1.5594618606422135, - "grad_norm": 0.6870817542076111, - "learning_rate": 9.603587595718577e-06, - "loss": 0.05, - "step": 61435 - }, - { - "epoch": 1.559588780302069, - "grad_norm": 0.4885948896408081, - "learning_rate": 9.602741464652875e-06, - "loss": 0.0335, - "step": 61440 - }, - { - "epoch": 1.5597156999619242, - "grad_norm": 0.38993778824806213, - "learning_rate": 9.601895333587174e-06, - "loss": 0.0334, - "step": 61445 - }, - { - "epoch": 1.5598426196217794, - "grad_norm": 0.5643147230148315, - "learning_rate": 9.601049202521472e-06, - "loss": 0.041, - "step": 61450 - }, - { - "epoch": 1.5599695392816346, - "grad_norm": 0.4447472393512726, - "learning_rate": 9.600203071455769e-06, - "loss": 0.0419, - "step": 61455 - }, - { - "epoch": 1.56009645894149, - "grad_norm": 0.5149756073951721, - "learning_rate": 9.599356940390067e-06, - "loss": 0.0368, - "step": 61460 - }, - { - "epoch": 1.5602233786013453, - "grad_norm": 0.2650030553340912, - "learning_rate": 9.598510809324366e-06, - "loss": 0.0256, - "step": 61465 - }, - { - "epoch": 1.5603502982612008, - "grad_norm": 1.0557466745376587, - "learning_rate": 9.597664678258664e-06, - "loss": 0.0387, - "step": 61470 - }, - { - "epoch": 1.560477217921056, - "grad_norm": 0.5141106247901917, - "learning_rate": 9.59681854719296e-06, - "loss": 0.0426, - "step": 61475 - }, - { - "epoch": 1.5606041375809112, - "grad_norm": 0.7817085981369019, - "learning_rate": 9.595972416127259e-06, - "loss": 0.0347, - "step": 61480 - }, - { - "epoch": 1.5607310572407664, - "grad_norm": 0.5426324009895325, - "learning_rate": 9.595126285061557e-06, - "loss": 0.0565, - "step": 61485 - }, - { - "epoch": 1.560857976900622, - "grad_norm": 0.35495588183403015, - "learning_rate": 9.594280153995856e-06, - "loss": 0.0413, - "step": 61490 - }, - { - "epoch": 1.5609848965604773, - "grad_norm": 0.30762648582458496, - "learning_rate": 9.593434022930153e-06, - "loss": 0.0266, - "step": 61495 - }, - { - "epoch": 1.5611118162203326, - "grad_norm": 0.5199943780899048, - "learning_rate": 9.592587891864451e-06, - "loss": 0.0466, - "step": 61500 - }, - { - "epoch": 1.5612387358801878, - "grad_norm": 1.1502718925476074, - "learning_rate": 9.591741760798748e-06, - "loss": 0.043, - "step": 61505 - }, - { - "epoch": 1.561365655540043, - "grad_norm": 0.8781419396400452, - "learning_rate": 9.590895629733046e-06, - "loss": 0.0348, - "step": 61510 - }, - { - "epoch": 1.5614925751998985, - "grad_norm": 0.5520227551460266, - "learning_rate": 9.590049498667345e-06, - "loss": 0.0389, - "step": 61515 - }, - { - "epoch": 1.561619494859754, - "grad_norm": 0.5850364565849304, - "learning_rate": 9.589203367601643e-06, - "loss": 0.0384, - "step": 61520 - }, - { - "epoch": 1.5617464145196092, - "grad_norm": 0.5779756903648376, - "learning_rate": 9.58835723653594e-06, - "loss": 0.0377, - "step": 61525 - }, - { - "epoch": 1.5618733341794644, - "grad_norm": 0.46996036171913147, - "learning_rate": 9.587511105470238e-06, - "loss": 0.0387, - "step": 61530 - }, - { - "epoch": 1.5620002538393196, - "grad_norm": 0.5236393213272095, - "learning_rate": 9.586664974404536e-06, - "loss": 0.0201, - "step": 61535 - }, - { - "epoch": 1.562127173499175, - "grad_norm": 0.6090348362922668, - "learning_rate": 9.585818843338835e-06, - "loss": 0.0473, - "step": 61540 - }, - { - "epoch": 1.5622540931590303, - "grad_norm": 0.3736020624637604, - "learning_rate": 9.584972712273132e-06, - "loss": 0.0383, - "step": 61545 - }, - { - "epoch": 1.5623810128188858, - "grad_norm": 0.5968793630599976, - "learning_rate": 9.58412658120743e-06, - "loss": 0.0432, - "step": 61550 - }, - { - "epoch": 1.562507932478741, - "grad_norm": 0.6474838256835938, - "learning_rate": 9.583280450141728e-06, - "loss": 0.0376, - "step": 61555 - }, - { - "epoch": 1.5626348521385962, - "grad_norm": 0.5011726021766663, - "learning_rate": 9.582434319076027e-06, - "loss": 0.0257, - "step": 61560 - }, - { - "epoch": 1.5627617717984514, - "grad_norm": 0.43499505519866943, - "learning_rate": 9.581588188010323e-06, - "loss": 0.0458, - "step": 61565 - }, - { - "epoch": 1.562888691458307, - "grad_norm": 0.48259836435317993, - "learning_rate": 9.580742056944622e-06, - "loss": 0.0752, - "step": 61570 - }, - { - "epoch": 1.5630156111181623, - "grad_norm": 0.5936389565467834, - "learning_rate": 9.579895925878919e-06, - "loss": 0.0655, - "step": 61575 - }, - { - "epoch": 1.5631425307780176, - "grad_norm": 0.4118220806121826, - "learning_rate": 9.579049794813217e-06, - "loss": 0.0497, - "step": 61580 - }, - { - "epoch": 1.5632694504378728, - "grad_norm": 0.644287645816803, - "learning_rate": 9.578203663747515e-06, - "loss": 0.0308, - "step": 61585 - }, - { - "epoch": 1.563396370097728, - "grad_norm": 0.5896307229995728, - "learning_rate": 9.577357532681814e-06, - "loss": 0.0464, - "step": 61590 - }, - { - "epoch": 1.5635232897575835, - "grad_norm": 0.3914563059806824, - "learning_rate": 9.57651140161611e-06, - "loss": 0.0331, - "step": 61595 - }, - { - "epoch": 1.5636502094174387, - "grad_norm": 0.8524635434150696, - "learning_rate": 9.575665270550409e-06, - "loss": 0.0362, - "step": 61600 - }, - { - "epoch": 1.5637771290772942, - "grad_norm": 0.48118776082992554, - "learning_rate": 9.574819139484707e-06, - "loss": 0.0313, - "step": 61605 - }, - { - "epoch": 1.5639040487371494, - "grad_norm": 0.4021732807159424, - "learning_rate": 9.573973008419006e-06, - "loss": 0.0342, - "step": 61610 - }, - { - "epoch": 1.5640309683970046, - "grad_norm": 0.5035510659217834, - "learning_rate": 9.573126877353302e-06, - "loss": 0.0511, - "step": 61615 - }, - { - "epoch": 1.5641578880568598, - "grad_norm": 0.3187960088253021, - "learning_rate": 9.5722807462876e-06, - "loss": 0.0401, - "step": 61620 - }, - { - "epoch": 1.5642848077167153, - "grad_norm": 0.6317287087440491, - "learning_rate": 9.5714346152219e-06, - "loss": 0.0433, - "step": 61625 - }, - { - "epoch": 1.5644117273765707, - "grad_norm": 0.501792311668396, - "learning_rate": 9.570588484156198e-06, - "loss": 0.043, - "step": 61630 - }, - { - "epoch": 1.564538647036426, - "grad_norm": 0.3496341407299042, - "learning_rate": 9.569742353090494e-06, - "loss": 0.0348, - "step": 61635 - }, - { - "epoch": 1.5646655666962812, - "grad_norm": 0.3468380272388458, - "learning_rate": 9.568896222024793e-06, - "loss": 0.0405, - "step": 61640 - }, - { - "epoch": 1.5647924863561364, - "grad_norm": 0.3745633661746979, - "learning_rate": 9.56805009095909e-06, - "loss": 0.0333, - "step": 61645 - }, - { - "epoch": 1.5649194060159919, - "grad_norm": 0.14163877069950104, - "learning_rate": 9.567203959893388e-06, - "loss": 0.049, - "step": 61650 - }, - { - "epoch": 1.5650463256758473, - "grad_norm": 0.3820866346359253, - "learning_rate": 9.566357828827686e-06, - "loss": 0.0381, - "step": 61655 - }, - { - "epoch": 1.5651732453357026, - "grad_norm": 1.5704286098480225, - "learning_rate": 9.565511697761985e-06, - "loss": 0.0474, - "step": 61660 - }, - { - "epoch": 1.5653001649955578, - "grad_norm": 0.58939528465271, - "learning_rate": 9.564665566696281e-06, - "loss": 0.0466, - "step": 61665 - }, - { - "epoch": 1.565427084655413, - "grad_norm": 0.36717119812965393, - "learning_rate": 9.56381943563058e-06, - "loss": 0.0372, - "step": 61670 - }, - { - "epoch": 1.5655540043152685, - "grad_norm": 0.2993777096271515, - "learning_rate": 9.562973304564878e-06, - "loss": 0.0558, - "step": 61675 - }, - { - "epoch": 1.5656809239751237, - "grad_norm": 0.3953433930873871, - "learning_rate": 9.562127173499177e-06, - "loss": 0.0384, - "step": 61680 - }, - { - "epoch": 1.5658078436349792, - "grad_norm": 0.6943314075469971, - "learning_rate": 9.561281042433473e-06, - "loss": 0.0414, - "step": 61685 - }, - { - "epoch": 1.5659347632948344, - "grad_norm": 0.4004857838153839, - "learning_rate": 9.560434911367772e-06, - "loss": 0.0461, - "step": 61690 - }, - { - "epoch": 1.5660616829546896, - "grad_norm": 0.3011561930179596, - "learning_rate": 9.55958878030207e-06, - "loss": 0.0311, - "step": 61695 - }, - { - "epoch": 1.5661886026145448, - "grad_norm": 0.4413153827190399, - "learning_rate": 9.558742649236368e-06, - "loss": 0.0285, - "step": 61700 - }, - { - "epoch": 1.5663155222744003, - "grad_norm": 0.24989427626132965, - "learning_rate": 9.557896518170665e-06, - "loss": 0.0286, - "step": 61705 - }, - { - "epoch": 1.5664424419342557, - "grad_norm": 0.2301076352596283, - "learning_rate": 9.557050387104964e-06, - "loss": 0.0398, - "step": 61710 - }, - { - "epoch": 1.566569361594111, - "grad_norm": 0.5520600080490112, - "learning_rate": 9.55620425603926e-06, - "loss": 0.0379, - "step": 61715 - }, - { - "epoch": 1.5666962812539662, - "grad_norm": 0.41824063658714294, - "learning_rate": 9.555358124973559e-06, - "loss": 0.0381, - "step": 61720 - }, - { - "epoch": 1.5668232009138214, - "grad_norm": 0.329886794090271, - "learning_rate": 9.554511993907857e-06, - "loss": 0.0374, - "step": 61725 - }, - { - "epoch": 1.5669501205736769, - "grad_norm": 0.6183372735977173, - "learning_rate": 9.553665862842155e-06, - "loss": 0.0375, - "step": 61730 - }, - { - "epoch": 1.5670770402335323, - "grad_norm": 0.5408313274383545, - "learning_rate": 9.552819731776452e-06, - "loss": 0.0445, - "step": 61735 - }, - { - "epoch": 1.5672039598933876, - "grad_norm": 0.4324204623699188, - "learning_rate": 9.55197360071075e-06, - "loss": 0.0599, - "step": 61740 - }, - { - "epoch": 1.5673308795532428, - "grad_norm": 0.931523859500885, - "learning_rate": 9.551127469645049e-06, - "loss": 0.0325, - "step": 61745 - }, - { - "epoch": 1.567457799213098, - "grad_norm": 0.5118609666824341, - "learning_rate": 9.550281338579347e-06, - "loss": 0.0667, - "step": 61750 - }, - { - "epoch": 1.5675847188729535, - "grad_norm": 0.45758211612701416, - "learning_rate": 9.549435207513644e-06, - "loss": 0.03, - "step": 61755 - }, - { - "epoch": 1.5677116385328087, - "grad_norm": 0.5014877319335938, - "learning_rate": 9.548589076447942e-06, - "loss": 0.0451, - "step": 61760 - }, - { - "epoch": 1.5678385581926642, - "grad_norm": 0.8637116551399231, - "learning_rate": 9.547742945382241e-06, - "loss": 0.0726, - "step": 61765 - }, - { - "epoch": 1.5679654778525194, - "grad_norm": 0.45344552397727966, - "learning_rate": 9.54689681431654e-06, - "loss": 0.0481, - "step": 61770 - }, - { - "epoch": 1.5680923975123746, - "grad_norm": 0.4397076666355133, - "learning_rate": 9.546050683250836e-06, - "loss": 0.04, - "step": 61775 - }, - { - "epoch": 1.5682193171722298, - "grad_norm": 0.3642389178276062, - "learning_rate": 9.545204552185134e-06, - "loss": 0.0456, - "step": 61780 - }, - { - "epoch": 1.5683462368320853, - "grad_norm": 0.35574060678482056, - "learning_rate": 9.544358421119431e-06, - "loss": 0.0388, - "step": 61785 - }, - { - "epoch": 1.5684731564919407, - "grad_norm": 0.5338152050971985, - "learning_rate": 9.54351229005373e-06, - "loss": 0.0546, - "step": 61790 - }, - { - "epoch": 1.568600076151796, - "grad_norm": 0.4681483209133148, - "learning_rate": 9.542666158988028e-06, - "loss": 0.0487, - "step": 61795 - }, - { - "epoch": 1.5687269958116512, - "grad_norm": 0.45032650232315063, - "learning_rate": 9.541820027922326e-06, - "loss": 0.0485, - "step": 61800 - }, - { - "epoch": 1.5688539154715064, - "grad_norm": 0.3066333830356598, - "learning_rate": 9.540973896856623e-06, - "loss": 0.0418, - "step": 61805 - }, - { - "epoch": 1.5689808351313619, - "grad_norm": 0.4484754502773285, - "learning_rate": 9.540127765790921e-06, - "loss": 0.0579, - "step": 61810 - }, - { - "epoch": 1.5691077547912171, - "grad_norm": 0.3074108958244324, - "learning_rate": 9.53928163472522e-06, - "loss": 0.0394, - "step": 61815 - }, - { - "epoch": 1.5692346744510726, - "grad_norm": 1.726180076599121, - "learning_rate": 9.538435503659518e-06, - "loss": 0.0464, - "step": 61820 - }, - { - "epoch": 1.5693615941109278, - "grad_norm": 0.5894386172294617, - "learning_rate": 9.537589372593815e-06, - "loss": 0.0458, - "step": 61825 - }, - { - "epoch": 1.569488513770783, - "grad_norm": 2.5905277729034424, - "learning_rate": 9.536743241528113e-06, - "loss": 0.0487, - "step": 61830 - }, - { - "epoch": 1.5696154334306383, - "grad_norm": 0.2252957820892334, - "learning_rate": 9.535897110462412e-06, - "loss": 0.0316, - "step": 61835 - }, - { - "epoch": 1.5697423530904937, - "grad_norm": 0.29030677676200867, - "learning_rate": 9.53505097939671e-06, - "loss": 0.0374, - "step": 61840 - }, - { - "epoch": 1.5698692727503492, - "grad_norm": 0.4301593601703644, - "learning_rate": 9.534204848331007e-06, - "loss": 0.033, - "step": 61845 - }, - { - "epoch": 1.5699961924102044, - "grad_norm": 0.492311954498291, - "learning_rate": 9.533358717265305e-06, - "loss": 0.0503, - "step": 61850 - }, - { - "epoch": 1.5701231120700596, - "grad_norm": 1.061232566833496, - "learning_rate": 9.532512586199602e-06, - "loss": 0.0471, - "step": 61855 - }, - { - "epoch": 1.5702500317299148, - "grad_norm": 0.5907232165336609, - "learning_rate": 9.5316664551339e-06, - "loss": 0.0342, - "step": 61860 - }, - { - "epoch": 1.5703769513897703, - "grad_norm": 0.3794013261795044, - "learning_rate": 9.530820324068199e-06, - "loss": 0.0431, - "step": 61865 - }, - { - "epoch": 1.5705038710496257, - "grad_norm": 0.466548889875412, - "learning_rate": 9.529974193002497e-06, - "loss": 0.0333, - "step": 61870 - }, - { - "epoch": 1.570630790709481, - "grad_norm": 0.4053478240966797, - "learning_rate": 9.529128061936794e-06, - "loss": 0.0378, - "step": 61875 - }, - { - "epoch": 1.5707577103693362, - "grad_norm": 0.3961637318134308, - "learning_rate": 9.528281930871092e-06, - "loss": 0.0393, - "step": 61880 - }, - { - "epoch": 1.5708846300291914, - "grad_norm": 0.6722768545150757, - "learning_rate": 9.52743579980539e-06, - "loss": 0.0473, - "step": 61885 - }, - { - "epoch": 1.5710115496890469, - "grad_norm": 0.46053799986839294, - "learning_rate": 9.526589668739689e-06, - "loss": 0.0354, - "step": 61890 - }, - { - "epoch": 1.5711384693489021, - "grad_norm": 0.37527719140052795, - "learning_rate": 9.525743537673986e-06, - "loss": 0.0486, - "step": 61895 - }, - { - "epoch": 1.5712653890087576, - "grad_norm": 0.4301064908504486, - "learning_rate": 9.524897406608284e-06, - "loss": 0.0582, - "step": 61900 - }, - { - "epoch": 1.5713923086686128, - "grad_norm": 0.6529536843299866, - "learning_rate": 9.524051275542583e-06, - "loss": 0.0415, - "step": 61905 - }, - { - "epoch": 1.571519228328468, - "grad_norm": 0.6467190384864807, - "learning_rate": 9.523205144476881e-06, - "loss": 0.0605, - "step": 61910 - }, - { - "epoch": 1.5716461479883232, - "grad_norm": 0.9764957427978516, - "learning_rate": 9.522359013411178e-06, - "loss": 0.0477, - "step": 61915 - }, - { - "epoch": 1.5717730676481787, - "grad_norm": 1.6717770099639893, - "learning_rate": 9.521512882345476e-06, - "loss": 0.0543, - "step": 61920 - }, - { - "epoch": 1.5718999873080342, - "grad_norm": 1.1324249505996704, - "learning_rate": 9.520666751279773e-06, - "loss": 0.0397, - "step": 61925 - }, - { - "epoch": 1.5720269069678894, - "grad_norm": 0.48710742592811584, - "learning_rate": 9.519820620214071e-06, - "loss": 0.029, - "step": 61930 - }, - { - "epoch": 1.5721538266277446, - "grad_norm": 0.5891375541687012, - "learning_rate": 9.51897448914837e-06, - "loss": 0.0345, - "step": 61935 - }, - { - "epoch": 1.5722807462875998, - "grad_norm": 0.4660463035106659, - "learning_rate": 9.518128358082668e-06, - "loss": 0.0318, - "step": 61940 - }, - { - "epoch": 1.5724076659474553, - "grad_norm": 0.09543660283088684, - "learning_rate": 9.517282227016966e-06, - "loss": 0.0236, - "step": 61945 - }, - { - "epoch": 1.5725345856073105, - "grad_norm": 0.441120445728302, - "learning_rate": 9.516436095951263e-06, - "loss": 0.042, - "step": 61950 - }, - { - "epoch": 1.572661505267166, - "grad_norm": 1.0055873394012451, - "learning_rate": 9.515589964885562e-06, - "loss": 0.0525, - "step": 61955 - }, - { - "epoch": 1.5727884249270212, - "grad_norm": 0.41497576236724854, - "learning_rate": 9.51474383381986e-06, - "loss": 0.0448, - "step": 61960 - }, - { - "epoch": 1.5729153445868764, - "grad_norm": 0.2944048345088959, - "learning_rate": 9.513897702754158e-06, - "loss": 0.0282, - "step": 61965 - }, - { - "epoch": 1.5730422642467317, - "grad_norm": 0.29719799757003784, - "learning_rate": 9.513051571688455e-06, - "loss": 0.0509, - "step": 61970 - }, - { - "epoch": 1.573169183906587, - "grad_norm": 0.4551069438457489, - "learning_rate": 9.512205440622753e-06, - "loss": 0.0416, - "step": 61975 - }, - { - "epoch": 1.5732961035664426, - "grad_norm": 0.35904595255851746, - "learning_rate": 9.511359309557052e-06, - "loss": 0.0446, - "step": 61980 - }, - { - "epoch": 1.5734230232262978, - "grad_norm": 0.4332889914512634, - "learning_rate": 9.51051317849135e-06, - "loss": 0.0346, - "step": 61985 - }, - { - "epoch": 1.573549942886153, - "grad_norm": 0.5109942555427551, - "learning_rate": 9.509667047425647e-06, - "loss": 0.0416, - "step": 61990 - }, - { - "epoch": 1.5736768625460082, - "grad_norm": 0.458619624376297, - "learning_rate": 9.508820916359945e-06, - "loss": 0.0261, - "step": 61995 - }, - { - "epoch": 1.5738037822058637, - "grad_norm": 0.8562700748443604, - "learning_rate": 9.507974785294242e-06, - "loss": 0.0746, - "step": 62000 - }, - { - "epoch": 1.5739307018657192, - "grad_norm": 0.6213415861129761, - "learning_rate": 9.50712865422854e-06, - "loss": 0.0459, - "step": 62005 - }, - { - "epoch": 1.5740576215255744, - "grad_norm": 1.238232970237732, - "learning_rate": 9.506282523162839e-06, - "loss": 0.0466, - "step": 62010 - }, - { - "epoch": 1.5741845411854296, - "grad_norm": 0.8251142501831055, - "learning_rate": 9.505436392097137e-06, - "loss": 0.0272, - "step": 62015 - }, - { - "epoch": 1.5743114608452848, - "grad_norm": 0.2550247013568878, - "learning_rate": 9.504590261031434e-06, - "loss": 0.0338, - "step": 62020 - }, - { - "epoch": 1.5744383805051403, - "grad_norm": 0.5507863759994507, - "learning_rate": 9.503744129965732e-06, - "loss": 0.0446, - "step": 62025 - }, - { - "epoch": 1.5745653001649955, - "grad_norm": 0.4194641411304474, - "learning_rate": 9.50289799890003e-06, - "loss": 0.0278, - "step": 62030 - }, - { - "epoch": 1.574692219824851, - "grad_norm": 0.43443167209625244, - "learning_rate": 9.50205186783433e-06, - "loss": 0.0363, - "step": 62035 - }, - { - "epoch": 1.5748191394847062, - "grad_norm": 0.42523542046546936, - "learning_rate": 9.501205736768626e-06, - "loss": 0.0335, - "step": 62040 - }, - { - "epoch": 1.5749460591445614, - "grad_norm": 0.43651121854782104, - "learning_rate": 9.500359605702924e-06, - "loss": 0.0324, - "step": 62045 - }, - { - "epoch": 1.5750729788044167, - "grad_norm": 0.2648915648460388, - "learning_rate": 9.499513474637223e-06, - "loss": 0.0385, - "step": 62050 - }, - { - "epoch": 1.575199898464272, - "grad_norm": 0.4636967182159424, - "learning_rate": 9.498667343571521e-06, - "loss": 0.0475, - "step": 62055 - }, - { - "epoch": 1.5753268181241276, - "grad_norm": 1.312626600265503, - "learning_rate": 9.497821212505818e-06, - "loss": 0.0463, - "step": 62060 - }, - { - "epoch": 1.5754537377839828, - "grad_norm": 0.4046166241168976, - "learning_rate": 9.496975081440116e-06, - "loss": 0.0533, - "step": 62065 - }, - { - "epoch": 1.575580657443838, - "grad_norm": 0.3398943245410919, - "learning_rate": 9.496128950374413e-06, - "loss": 0.0468, - "step": 62070 - }, - { - "epoch": 1.5757075771036932, - "grad_norm": 0.3396152853965759, - "learning_rate": 9.495282819308711e-06, - "loss": 0.0494, - "step": 62075 - }, - { - "epoch": 1.5758344967635487, - "grad_norm": 0.8138095736503601, - "learning_rate": 9.49443668824301e-06, - "loss": 0.0487, - "step": 62080 - }, - { - "epoch": 1.5759614164234041, - "grad_norm": 0.38976818323135376, - "learning_rate": 9.493590557177308e-06, - "loss": 0.0331, - "step": 62085 - }, - { - "epoch": 1.5760883360832594, - "grad_norm": 0.4521302878856659, - "learning_rate": 9.492744426111605e-06, - "loss": 0.0431, - "step": 62090 - }, - { - "epoch": 1.5762152557431146, - "grad_norm": 0.453552782535553, - "learning_rate": 9.491898295045903e-06, - "loss": 0.0506, - "step": 62095 - }, - { - "epoch": 1.5763421754029698, - "grad_norm": 1.0236541032791138, - "learning_rate": 9.491052163980202e-06, - "loss": 0.0467, - "step": 62100 - }, - { - "epoch": 1.576469095062825, - "grad_norm": 0.6889199614524841, - "learning_rate": 9.4902060329145e-06, - "loss": 0.0484, - "step": 62105 - }, - { - "epoch": 1.5765960147226805, - "grad_norm": 0.39305347204208374, - "learning_rate": 9.489359901848797e-06, - "loss": 0.0421, - "step": 62110 - }, - { - "epoch": 1.576722934382536, - "grad_norm": 0.35025888681411743, - "learning_rate": 9.488513770783095e-06, - "loss": 0.0383, - "step": 62115 - }, - { - "epoch": 1.5768498540423912, - "grad_norm": 3.5555193424224854, - "learning_rate": 9.487667639717394e-06, - "loss": 0.0325, - "step": 62120 - }, - { - "epoch": 1.5769767737022464, - "grad_norm": 0.6006699204444885, - "learning_rate": 9.486821508651692e-06, - "loss": 0.0601, - "step": 62125 - }, - { - "epoch": 1.5771036933621017, - "grad_norm": 0.2200213372707367, - "learning_rate": 9.485975377585989e-06, - "loss": 0.0221, - "step": 62130 - }, - { - "epoch": 1.577230613021957, - "grad_norm": 0.498176246881485, - "learning_rate": 9.485129246520287e-06, - "loss": 0.0435, - "step": 62135 - }, - { - "epoch": 1.5773575326818126, - "grad_norm": 0.5196181535720825, - "learning_rate": 9.484283115454584e-06, - "loss": 0.0523, - "step": 62140 - }, - { - "epoch": 1.5774844523416678, - "grad_norm": 0.5419731140136719, - "learning_rate": 9.483436984388882e-06, - "loss": 0.0266, - "step": 62145 - }, - { - "epoch": 1.577611372001523, - "grad_norm": 0.530762255191803, - "learning_rate": 9.48259085332318e-06, - "loss": 0.0423, - "step": 62150 - }, - { - "epoch": 1.5777382916613782, - "grad_norm": 0.5955967307090759, - "learning_rate": 9.481744722257479e-06, - "loss": 0.0318, - "step": 62155 - }, - { - "epoch": 1.5778652113212337, - "grad_norm": 0.37676042318344116, - "learning_rate": 9.480898591191776e-06, - "loss": 0.0379, - "step": 62160 - }, - { - "epoch": 1.577992130981089, - "grad_norm": 0.46697813272476196, - "learning_rate": 9.480052460126074e-06, - "loss": 0.0353, - "step": 62165 - }, - { - "epoch": 1.5781190506409444, - "grad_norm": 0.4493296146392822, - "learning_rate": 9.479206329060372e-06, - "loss": 0.0232, - "step": 62170 - }, - { - "epoch": 1.5782459703007996, - "grad_norm": 0.36994248628616333, - "learning_rate": 9.478360197994671e-06, - "loss": 0.0455, - "step": 62175 - }, - { - "epoch": 1.5783728899606548, - "grad_norm": 0.4796760380268097, - "learning_rate": 9.477514066928968e-06, - "loss": 0.0304, - "step": 62180 - }, - { - "epoch": 1.57849980962051, - "grad_norm": 0.30397987365722656, - "learning_rate": 9.476667935863266e-06, - "loss": 0.0337, - "step": 62185 - }, - { - "epoch": 1.5786267292803655, - "grad_norm": 0.44830405712127686, - "learning_rate": 9.475821804797564e-06, - "loss": 0.0385, - "step": 62190 - }, - { - "epoch": 1.578753648940221, - "grad_norm": 0.540786623954773, - "learning_rate": 9.474975673731863e-06, - "loss": 0.0415, - "step": 62195 - }, - { - "epoch": 1.5788805686000762, - "grad_norm": 0.49932628870010376, - "learning_rate": 9.47412954266616e-06, - "loss": 0.0362, - "step": 62200 - }, - { - "epoch": 1.5790074882599314, - "grad_norm": 0.6375693678855896, - "learning_rate": 9.473283411600458e-06, - "loss": 0.0238, - "step": 62205 - }, - { - "epoch": 1.5791344079197867, - "grad_norm": 1.0645257234573364, - "learning_rate": 9.472437280534755e-06, - "loss": 0.0585, - "step": 62210 - }, - { - "epoch": 1.579261327579642, - "grad_norm": 0.5753408074378967, - "learning_rate": 9.471591149469053e-06, - "loss": 0.0521, - "step": 62215 - }, - { - "epoch": 1.5793882472394976, - "grad_norm": 0.4903488755226135, - "learning_rate": 9.470745018403351e-06, - "loss": 0.063, - "step": 62220 - }, - { - "epoch": 1.5795151668993528, - "grad_norm": 0.6350020170211792, - "learning_rate": 9.46989888733765e-06, - "loss": 0.025, - "step": 62225 - }, - { - "epoch": 1.579642086559208, - "grad_norm": 0.35555464029312134, - "learning_rate": 9.469052756271947e-06, - "loss": 0.0592, - "step": 62230 - }, - { - "epoch": 1.5797690062190632, - "grad_norm": 0.531130313873291, - "learning_rate": 9.468206625206245e-06, - "loss": 0.0277, - "step": 62235 - }, - { - "epoch": 1.5798959258789187, - "grad_norm": 0.30568012595176697, - "learning_rate": 9.467360494140543e-06, - "loss": 0.0356, - "step": 62240 - }, - { - "epoch": 1.580022845538774, - "grad_norm": 0.27183812856674194, - "learning_rate": 9.466514363074842e-06, - "loss": 0.025, - "step": 62245 - }, - { - "epoch": 1.5801497651986294, - "grad_norm": 0.854446530342102, - "learning_rate": 9.465668232009138e-06, - "loss": 0.0342, - "step": 62250 - }, - { - "epoch": 1.5802766848584846, - "grad_norm": 0.4195851683616638, - "learning_rate": 9.464822100943437e-06, - "loss": 0.032, - "step": 62255 - }, - { - "epoch": 1.5804036045183398, - "grad_norm": 0.5052242279052734, - "learning_rate": 9.463975969877735e-06, - "loss": 0.0327, - "step": 62260 - }, - { - "epoch": 1.580530524178195, - "grad_norm": 0.7068619132041931, - "learning_rate": 9.463129838812034e-06, - "loss": 0.0282, - "step": 62265 - }, - { - "epoch": 1.5806574438380505, - "grad_norm": 0.3179589509963989, - "learning_rate": 9.46228370774633e-06, - "loss": 0.03, - "step": 62270 - }, - { - "epoch": 1.580784363497906, - "grad_norm": 0.31801754236221313, - "learning_rate": 9.461437576680629e-06, - "loss": 0.0372, - "step": 62275 - }, - { - "epoch": 1.5809112831577612, - "grad_norm": 0.5264418125152588, - "learning_rate": 9.460591445614925e-06, - "loss": 0.0384, - "step": 62280 - }, - { - "epoch": 1.5810382028176164, - "grad_norm": 0.43095213174819946, - "learning_rate": 9.459745314549224e-06, - "loss": 0.0427, - "step": 62285 - }, - { - "epoch": 1.5811651224774717, - "grad_norm": 0.6780824065208435, - "learning_rate": 9.458899183483522e-06, - "loss": 0.0515, - "step": 62290 - }, - { - "epoch": 1.581292042137327, - "grad_norm": 0.7759870886802673, - "learning_rate": 9.45805305241782e-06, - "loss": 0.0362, - "step": 62295 - }, - { - "epoch": 1.5814189617971823, - "grad_norm": 0.3180144429206848, - "learning_rate": 9.457206921352117e-06, - "loss": 0.028, - "step": 62300 - }, - { - "epoch": 1.5815458814570378, - "grad_norm": 0.46425679326057434, - "learning_rate": 9.456360790286416e-06, - "loss": 0.0556, - "step": 62305 - }, - { - "epoch": 1.581672801116893, - "grad_norm": 0.26134786009788513, - "learning_rate": 9.455514659220714e-06, - "loss": 0.0467, - "step": 62310 - }, - { - "epoch": 1.5817997207767482, - "grad_norm": 0.4941941201686859, - "learning_rate": 9.454668528155013e-06, - "loss": 0.0476, - "step": 62315 - }, - { - "epoch": 1.5819266404366035, - "grad_norm": 0.6974141001701355, - "learning_rate": 9.45382239708931e-06, - "loss": 0.0395, - "step": 62320 - }, - { - "epoch": 1.582053560096459, - "grad_norm": 0.4705851972103119, - "learning_rate": 9.452976266023608e-06, - "loss": 0.0562, - "step": 62325 - }, - { - "epoch": 1.5821804797563144, - "grad_norm": 0.36031782627105713, - "learning_rate": 9.452130134957906e-06, - "loss": 0.0346, - "step": 62330 - }, - { - "epoch": 1.5823073994161696, - "grad_norm": 0.6299204230308533, - "learning_rate": 9.451284003892205e-06, - "loss": 0.0301, - "step": 62335 - }, - { - "epoch": 1.5824343190760248, - "grad_norm": 0.8589865565299988, - "learning_rate": 9.450437872826501e-06, - "loss": 0.0417, - "step": 62340 - }, - { - "epoch": 1.58256123873588, - "grad_norm": 0.4248843491077423, - "learning_rate": 9.4495917417608e-06, - "loss": 0.0252, - "step": 62345 - }, - { - "epoch": 1.5826881583957355, - "grad_norm": 0.3354315459728241, - "learning_rate": 9.448745610695096e-06, - "loss": 0.0394, - "step": 62350 - }, - { - "epoch": 1.582815078055591, - "grad_norm": 0.5584332346916199, - "learning_rate": 9.447899479629395e-06, - "loss": 0.0611, - "step": 62355 - }, - { - "epoch": 1.5829419977154462, - "grad_norm": 0.5144937038421631, - "learning_rate": 9.447053348563693e-06, - "loss": 0.0398, - "step": 62360 - }, - { - "epoch": 1.5830689173753014, - "grad_norm": 0.38878071308135986, - "learning_rate": 9.446207217497992e-06, - "loss": 0.0514, - "step": 62365 - }, - { - "epoch": 1.5831958370351567, - "grad_norm": 0.5971949100494385, - "learning_rate": 9.445361086432288e-06, - "loss": 0.0347, - "step": 62370 - }, - { - "epoch": 1.583322756695012, - "grad_norm": 0.5414021611213684, - "learning_rate": 9.444514955366587e-06, - "loss": 0.0393, - "step": 62375 - }, - { - "epoch": 1.5834496763548673, - "grad_norm": 0.3669593632221222, - "learning_rate": 9.443668824300885e-06, - "loss": 0.0635, - "step": 62380 - }, - { - "epoch": 1.5835765960147228, - "grad_norm": 0.18802137672901154, - "learning_rate": 9.442822693235183e-06, - "loss": 0.0231, - "step": 62385 - }, - { - "epoch": 1.583703515674578, - "grad_norm": 0.25776344537734985, - "learning_rate": 9.44197656216948e-06, - "loss": 0.0472, - "step": 62390 - }, - { - "epoch": 1.5838304353344332, - "grad_norm": 0.3012596368789673, - "learning_rate": 9.441130431103779e-06, - "loss": 0.0287, - "step": 62395 - }, - { - "epoch": 1.5839573549942885, - "grad_norm": 0.38593733310699463, - "learning_rate": 9.440284300038077e-06, - "loss": 0.04, - "step": 62400 - }, - { - "epoch": 1.584084274654144, - "grad_norm": 0.3974718153476715, - "learning_rate": 9.439438168972375e-06, - "loss": 0.0447, - "step": 62405 - }, - { - "epoch": 1.5842111943139994, - "grad_norm": 0.7362498641014099, - "learning_rate": 9.438592037906672e-06, - "loss": 0.0321, - "step": 62410 - }, - { - "epoch": 1.5843381139738546, - "grad_norm": 0.3554406464099884, - "learning_rate": 9.43774590684097e-06, - "loss": 0.0357, - "step": 62415 - }, - { - "epoch": 1.5844650336337098, - "grad_norm": 0.3439858555793762, - "learning_rate": 9.436899775775267e-06, - "loss": 0.0385, - "step": 62420 - }, - { - "epoch": 1.584591953293565, - "grad_norm": 0.9048056602478027, - "learning_rate": 9.436053644709566e-06, - "loss": 0.0573, - "step": 62425 - }, - { - "epoch": 1.5847188729534205, - "grad_norm": 0.3415582776069641, - "learning_rate": 9.435207513643864e-06, - "loss": 0.0259, - "step": 62430 - }, - { - "epoch": 1.584845792613276, - "grad_norm": 0.6358506083488464, - "learning_rate": 9.434361382578162e-06, - "loss": 0.0496, - "step": 62435 - }, - { - "epoch": 1.5849727122731312, - "grad_norm": 0.5343345999717712, - "learning_rate": 9.433515251512459e-06, - "loss": 0.039, - "step": 62440 - }, - { - "epoch": 1.5850996319329864, - "grad_norm": 0.5047246217727661, - "learning_rate": 9.432669120446757e-06, - "loss": 0.0352, - "step": 62445 - }, - { - "epoch": 1.5852265515928416, - "grad_norm": 0.34910985827445984, - "learning_rate": 9.431822989381056e-06, - "loss": 0.0367, - "step": 62450 - }, - { - "epoch": 1.5853534712526969, - "grad_norm": 0.21142558753490448, - "learning_rate": 9.430976858315354e-06, - "loss": 0.0204, - "step": 62455 - }, - { - "epoch": 1.5854803909125523, - "grad_norm": 0.34362107515335083, - "learning_rate": 9.430130727249653e-06, - "loss": 0.0232, - "step": 62460 - }, - { - "epoch": 1.5856073105724078, - "grad_norm": 0.2408212423324585, - "learning_rate": 9.42928459618395e-06, - "loss": 0.0427, - "step": 62465 - }, - { - "epoch": 1.585734230232263, - "grad_norm": 0.5389513969421387, - "learning_rate": 9.428438465118248e-06, - "loss": 0.0297, - "step": 62470 - }, - { - "epoch": 1.5858611498921182, - "grad_norm": 1.3830116987228394, - "learning_rate": 9.427592334052546e-06, - "loss": 0.0454, - "step": 62475 - }, - { - "epoch": 1.5859880695519735, - "grad_norm": 0.24403493106365204, - "learning_rate": 9.426746202986845e-06, - "loss": 0.0324, - "step": 62480 - }, - { - "epoch": 1.586114989211829, - "grad_norm": 0.32592013478279114, - "learning_rate": 9.425900071921141e-06, - "loss": 0.0323, - "step": 62485 - }, - { - "epoch": 1.5862419088716844, - "grad_norm": 0.43356287479400635, - "learning_rate": 9.42505394085544e-06, - "loss": 0.0281, - "step": 62490 - }, - { - "epoch": 1.5863688285315396, - "grad_norm": 0.3129551112651825, - "learning_rate": 9.424207809789736e-06, - "loss": 0.0404, - "step": 62495 - }, - { - "epoch": 1.5864957481913948, - "grad_norm": 0.48976895213127136, - "learning_rate": 9.423361678724035e-06, - "loss": 0.0412, - "step": 62500 - }, - { - "epoch": 1.58662266785125, - "grad_norm": 0.5779134035110474, - "learning_rate": 9.422515547658333e-06, - "loss": 0.0539, - "step": 62505 - }, - { - "epoch": 1.5867495875111055, - "grad_norm": 0.3583587408065796, - "learning_rate": 9.421669416592632e-06, - "loss": 0.0331, - "step": 62510 - }, - { - "epoch": 1.5868765071709607, - "grad_norm": 0.45251601934432983, - "learning_rate": 9.420823285526928e-06, - "loss": 0.0396, - "step": 62515 - }, - { - "epoch": 1.5870034268308162, - "grad_norm": 0.44919663667678833, - "learning_rate": 9.419977154461227e-06, - "loss": 0.0561, - "step": 62520 - }, - { - "epoch": 1.5871303464906714, - "grad_norm": 0.5257801413536072, - "learning_rate": 9.419131023395525e-06, - "loss": 0.0451, - "step": 62525 - }, - { - "epoch": 1.5872572661505266, - "grad_norm": 0.4038412868976593, - "learning_rate": 9.418284892329824e-06, - "loss": 0.0344, - "step": 62530 - }, - { - "epoch": 1.5873841858103819, - "grad_norm": 0.5427528619766235, - "learning_rate": 9.41743876126412e-06, - "loss": 0.0444, - "step": 62535 - }, - { - "epoch": 1.5875111054702373, - "grad_norm": 0.3502368628978729, - "learning_rate": 9.416592630198419e-06, - "loss": 0.0425, - "step": 62540 - }, - { - "epoch": 1.5876380251300928, - "grad_norm": 0.4850040376186371, - "learning_rate": 9.415746499132717e-06, - "loss": 0.0419, - "step": 62545 - }, - { - "epoch": 1.587764944789948, - "grad_norm": 0.8384765386581421, - "learning_rate": 9.414900368067015e-06, - "loss": 0.0318, - "step": 62550 - }, - { - "epoch": 1.5878918644498032, - "grad_norm": 0.69500333070755, - "learning_rate": 9.414054237001312e-06, - "loss": 0.0225, - "step": 62555 - }, - { - "epoch": 1.5880187841096585, - "grad_norm": 0.8842777609825134, - "learning_rate": 9.41320810593561e-06, - "loss": 0.0573, - "step": 62560 - }, - { - "epoch": 1.588145703769514, - "grad_norm": 0.6177293658256531, - "learning_rate": 9.412361974869907e-06, - "loss": 0.0527, - "step": 62565 - }, - { - "epoch": 1.5882726234293694, - "grad_norm": 0.5017049908638, - "learning_rate": 9.411515843804206e-06, - "loss": 0.0478, - "step": 62570 - }, - { - "epoch": 1.5883995430892246, - "grad_norm": 0.6293337941169739, - "learning_rate": 9.410669712738504e-06, - "loss": 0.0586, - "step": 62575 - }, - { - "epoch": 1.5885264627490798, - "grad_norm": 0.5239773392677307, - "learning_rate": 9.409823581672802e-06, - "loss": 0.0395, - "step": 62580 - }, - { - "epoch": 1.588653382408935, - "grad_norm": 0.6229205131530762, - "learning_rate": 9.4089774506071e-06, - "loss": 0.0283, - "step": 62585 - }, - { - "epoch": 1.5887803020687905, - "grad_norm": 0.7210657000541687, - "learning_rate": 9.408131319541398e-06, - "loss": 0.0381, - "step": 62590 - }, - { - "epoch": 1.5889072217286457, - "grad_norm": 0.6151654124259949, - "learning_rate": 9.407285188475696e-06, - "loss": 0.0471, - "step": 62595 - }, - { - "epoch": 1.5890341413885012, - "grad_norm": 0.3953249752521515, - "learning_rate": 9.406439057409994e-06, - "loss": 0.0353, - "step": 62600 - }, - { - "epoch": 1.5891610610483564, - "grad_norm": 0.3969983160495758, - "learning_rate": 9.405592926344291e-06, - "loss": 0.0422, - "step": 62605 - }, - { - "epoch": 1.5892879807082116, - "grad_norm": 0.4073546528816223, - "learning_rate": 9.40474679527859e-06, - "loss": 0.0312, - "step": 62610 - }, - { - "epoch": 1.5894149003680669, - "grad_norm": 0.5030173063278198, - "learning_rate": 9.403900664212888e-06, - "loss": 0.0423, - "step": 62615 - }, - { - "epoch": 1.5895418200279223, - "grad_norm": 0.47354555130004883, - "learning_rate": 9.403054533147186e-06, - "loss": 0.0342, - "step": 62620 - }, - { - "epoch": 1.5896687396877778, - "grad_norm": 0.46680334210395813, - "learning_rate": 9.402208402081483e-06, - "loss": 0.0567, - "step": 62625 - }, - { - "epoch": 1.589795659347633, - "grad_norm": 0.9847118258476257, - "learning_rate": 9.401362271015781e-06, - "loss": 0.0459, - "step": 62630 - }, - { - "epoch": 1.5899225790074882, - "grad_norm": 0.5671366453170776, - "learning_rate": 9.400516139950078e-06, - "loss": 0.0459, - "step": 62635 - }, - { - "epoch": 1.5900494986673435, - "grad_norm": 0.26191145181655884, - "learning_rate": 9.399670008884377e-06, - "loss": 0.043, - "step": 62640 - }, - { - "epoch": 1.590176418327199, - "grad_norm": 0.38814225792884827, - "learning_rate": 9.398823877818675e-06, - "loss": 0.0461, - "step": 62645 - }, - { - "epoch": 1.5903033379870541, - "grad_norm": 0.8337894678115845, - "learning_rate": 9.397977746752973e-06, - "loss": 0.0425, - "step": 62650 - }, - { - "epoch": 1.5904302576469096, - "grad_norm": 0.44675779342651367, - "learning_rate": 9.39713161568727e-06, - "loss": 0.0396, - "step": 62655 - }, - { - "epoch": 1.5905571773067648, - "grad_norm": 0.36777928471565247, - "learning_rate": 9.396285484621568e-06, - "loss": 0.0264, - "step": 62660 - }, - { - "epoch": 1.59068409696662, - "grad_norm": 0.2690862715244293, - "learning_rate": 9.395439353555867e-06, - "loss": 0.0385, - "step": 62665 - }, - { - "epoch": 1.5908110166264753, - "grad_norm": 0.43665581941604614, - "learning_rate": 9.394593222490165e-06, - "loss": 0.0535, - "step": 62670 - }, - { - "epoch": 1.5909379362863307, - "grad_norm": 1.4679784774780273, - "learning_rate": 9.393747091424462e-06, - "loss": 0.0382, - "step": 62675 - }, - { - "epoch": 1.5910648559461862, - "grad_norm": 0.1819458305835724, - "learning_rate": 9.39290096035876e-06, - "loss": 0.0514, - "step": 62680 - }, - { - "epoch": 1.5911917756060414, - "grad_norm": 0.3094083070755005, - "learning_rate": 9.392054829293059e-06, - "loss": 0.0266, - "step": 62685 - }, - { - "epoch": 1.5913186952658966, - "grad_norm": 0.3216138482093811, - "learning_rate": 9.391208698227357e-06, - "loss": 0.0372, - "step": 62690 - }, - { - "epoch": 1.5914456149257519, - "grad_norm": 0.3438762426376343, - "learning_rate": 9.390362567161654e-06, - "loss": 0.0327, - "step": 62695 - }, - { - "epoch": 1.5915725345856073, - "grad_norm": 0.8592019081115723, - "learning_rate": 9.389516436095952e-06, - "loss": 0.0594, - "step": 62700 - }, - { - "epoch": 1.5916994542454628, - "grad_norm": 0.5210677981376648, - "learning_rate": 9.388670305030249e-06, - "loss": 0.0467, - "step": 62705 - }, - { - "epoch": 1.591826373905318, - "grad_norm": 0.3915383517742157, - "learning_rate": 9.387824173964547e-06, - "loss": 0.0408, - "step": 62710 - }, - { - "epoch": 1.5919532935651732, - "grad_norm": 0.5105882883071899, - "learning_rate": 9.386978042898846e-06, - "loss": 0.0309, - "step": 62715 - }, - { - "epoch": 1.5920802132250285, - "grad_norm": 0.33356451988220215, - "learning_rate": 9.386131911833144e-06, - "loss": 0.0332, - "step": 62720 - }, - { - "epoch": 1.592207132884884, - "grad_norm": 0.3288405239582062, - "learning_rate": 9.385285780767441e-06, - "loss": 0.0271, - "step": 62725 - }, - { - "epoch": 1.5923340525447391, - "grad_norm": 0.4224473237991333, - "learning_rate": 9.38443964970174e-06, - "loss": 0.03, - "step": 62730 - }, - { - "epoch": 1.5924609722045946, - "grad_norm": 0.4622962772846222, - "learning_rate": 9.383593518636038e-06, - "loss": 0.0368, - "step": 62735 - }, - { - "epoch": 1.5925878918644498, - "grad_norm": 0.5397520065307617, - "learning_rate": 9.382747387570336e-06, - "loss": 0.0462, - "step": 62740 - }, - { - "epoch": 1.592714811524305, - "grad_norm": 0.39817094802856445, - "learning_rate": 9.381901256504633e-06, - "loss": 0.0375, - "step": 62745 - }, - { - "epoch": 1.5928417311841603, - "grad_norm": 0.34153348207473755, - "learning_rate": 9.381055125438931e-06, - "loss": 0.0286, - "step": 62750 - }, - { - "epoch": 1.5929686508440157, - "grad_norm": 0.3055344223976135, - "learning_rate": 9.38020899437323e-06, - "loss": 0.045, - "step": 62755 - }, - { - "epoch": 1.5930955705038712, - "grad_norm": 0.309180349111557, - "learning_rate": 9.379362863307528e-06, - "loss": 0.0464, - "step": 62760 - }, - { - "epoch": 1.5932224901637264, - "grad_norm": 0.7202571630477905, - "learning_rate": 9.378516732241825e-06, - "loss": 0.042, - "step": 62765 - }, - { - "epoch": 1.5933494098235816, - "grad_norm": 0.5124692320823669, - "learning_rate": 9.377670601176123e-06, - "loss": 0.0384, - "step": 62770 - }, - { - "epoch": 1.5934763294834369, - "grad_norm": 0.2934432327747345, - "learning_rate": 9.37682447011042e-06, - "loss": 0.0328, - "step": 62775 - }, - { - "epoch": 1.5936032491432923, - "grad_norm": 0.34159040451049805, - "learning_rate": 9.375978339044718e-06, - "loss": 0.0573, - "step": 62780 - }, - { - "epoch": 1.5937301688031478, - "grad_norm": 0.4177282452583313, - "learning_rate": 9.375132207979017e-06, - "loss": 0.0488, - "step": 62785 - }, - { - "epoch": 1.593857088463003, - "grad_norm": 0.41430720686912537, - "learning_rate": 9.374286076913315e-06, - "loss": 0.0326, - "step": 62790 - }, - { - "epoch": 1.5939840081228582, - "grad_norm": 0.5052420496940613, - "learning_rate": 9.373439945847612e-06, - "loss": 0.0441, - "step": 62795 - }, - { - "epoch": 1.5941109277827135, - "grad_norm": 0.7726274728775024, - "learning_rate": 9.37259381478191e-06, - "loss": 0.0418, - "step": 62800 - }, - { - "epoch": 1.5942378474425687, - "grad_norm": 0.7591464519500732, - "learning_rate": 9.371747683716209e-06, - "loss": 0.0583, - "step": 62805 - }, - { - "epoch": 1.5943647671024241, - "grad_norm": 0.4365907609462738, - "learning_rate": 9.370901552650507e-06, - "loss": 0.0414, - "step": 62810 - }, - { - "epoch": 1.5944916867622796, - "grad_norm": 0.5600367188453674, - "learning_rate": 9.370055421584804e-06, - "loss": 0.0486, - "step": 62815 - }, - { - "epoch": 1.5946186064221348, - "grad_norm": 0.565544843673706, - "learning_rate": 9.369209290519102e-06, - "loss": 0.0475, - "step": 62820 - }, - { - "epoch": 1.59474552608199, - "grad_norm": 0.4004949629306793, - "learning_rate": 9.3683631594534e-06, - "loss": 0.0446, - "step": 62825 - }, - { - "epoch": 1.5948724457418453, - "grad_norm": 0.5325344800949097, - "learning_rate": 9.367517028387699e-06, - "loss": 0.0338, - "step": 62830 - }, - { - "epoch": 1.5949993654017007, - "grad_norm": 0.3074474632740021, - "learning_rate": 9.366670897321996e-06, - "loss": 0.0391, - "step": 62835 - }, - { - "epoch": 1.5951262850615562, - "grad_norm": 0.4754837155342102, - "learning_rate": 9.365824766256294e-06, - "loss": 0.072, - "step": 62840 - }, - { - "epoch": 1.5952532047214114, - "grad_norm": 0.45502030849456787, - "learning_rate": 9.36497863519059e-06, - "loss": 0.0358, - "step": 62845 - }, - { - "epoch": 1.5953801243812666, - "grad_norm": 0.3063869774341583, - "learning_rate": 9.364132504124889e-06, - "loss": 0.0605, - "step": 62850 - }, - { - "epoch": 1.5955070440411219, - "grad_norm": 0.5104553699493408, - "learning_rate": 9.363286373059187e-06, - "loss": 0.0435, - "step": 62855 - }, - { - "epoch": 1.5956339637009773, - "grad_norm": 0.5605509877204895, - "learning_rate": 9.362440241993486e-06, - "loss": 0.0488, - "step": 62860 - }, - { - "epoch": 1.5957608833608325, - "grad_norm": 0.5093178153038025, - "learning_rate": 9.361594110927783e-06, - "loss": 0.0288, - "step": 62865 - }, - { - "epoch": 1.595887803020688, - "grad_norm": 0.5029808878898621, - "learning_rate": 9.360747979862081e-06, - "loss": 0.0431, - "step": 62870 - }, - { - "epoch": 1.5960147226805432, - "grad_norm": 0.5432028770446777, - "learning_rate": 9.35990184879638e-06, - "loss": 0.0557, - "step": 62875 - }, - { - "epoch": 1.5961416423403985, - "grad_norm": 0.4750657379627228, - "learning_rate": 9.359055717730678e-06, - "loss": 0.0339, - "step": 62880 - }, - { - "epoch": 1.5962685620002537, - "grad_norm": 0.4013741910457611, - "learning_rate": 9.358209586664975e-06, - "loss": 0.0452, - "step": 62885 - }, - { - "epoch": 1.5963954816601091, - "grad_norm": 0.5300403833389282, - "learning_rate": 9.357363455599273e-06, - "loss": 0.0361, - "step": 62890 - }, - { - "epoch": 1.5965224013199646, - "grad_norm": 0.39895951747894287, - "learning_rate": 9.356517324533571e-06, - "loss": 0.0269, - "step": 62895 - }, - { - "epoch": 1.5966493209798198, - "grad_norm": 0.7120612263679504, - "learning_rate": 9.35567119346787e-06, - "loss": 0.0418, - "step": 62900 - }, - { - "epoch": 1.596776240639675, - "grad_norm": 0.2795284390449524, - "learning_rate": 9.354825062402166e-06, - "loss": 0.0431, - "step": 62905 - }, - { - "epoch": 1.5969031602995303, - "grad_norm": 0.5581324696540833, - "learning_rate": 9.353978931336465e-06, - "loss": 0.0539, - "step": 62910 - }, - { - "epoch": 1.5970300799593857, - "grad_norm": 0.3133277893066406, - "learning_rate": 9.353132800270762e-06, - "loss": 0.0316, - "step": 62915 - }, - { - "epoch": 1.5971569996192412, - "grad_norm": 0.5882917046546936, - "learning_rate": 9.35228666920506e-06, - "loss": 0.0319, - "step": 62920 - }, - { - "epoch": 1.5972839192790964, - "grad_norm": 0.30251070857048035, - "learning_rate": 9.351440538139358e-06, - "loss": 0.0246, - "step": 62925 - }, - { - "epoch": 1.5974108389389516, - "grad_norm": 0.2572955787181854, - "learning_rate": 9.350594407073657e-06, - "loss": 0.0313, - "step": 62930 - }, - { - "epoch": 1.5975377585988069, - "grad_norm": 0.35934683680534363, - "learning_rate": 9.349748276007953e-06, - "loss": 0.0309, - "step": 62935 - }, - { - "epoch": 1.5976646782586623, - "grad_norm": 0.3783254325389862, - "learning_rate": 9.348902144942252e-06, - "loss": 0.0376, - "step": 62940 - }, - { - "epoch": 1.5977915979185175, - "grad_norm": 0.3514781594276428, - "learning_rate": 9.34805601387655e-06, - "loss": 0.039, - "step": 62945 - }, - { - "epoch": 1.597918517578373, - "grad_norm": 0.4677058160305023, - "learning_rate": 9.347209882810849e-06, - "loss": 0.0361, - "step": 62950 - }, - { - "epoch": 1.5980454372382282, - "grad_norm": 0.2999211251735687, - "learning_rate": 9.346363751745145e-06, - "loss": 0.0486, - "step": 62955 - }, - { - "epoch": 1.5981723568980835, - "grad_norm": 0.398325651884079, - "learning_rate": 9.345517620679444e-06, - "loss": 0.0542, - "step": 62960 - }, - { - "epoch": 1.5982992765579387, - "grad_norm": 0.6843740344047546, - "learning_rate": 9.344671489613742e-06, - "loss": 0.04, - "step": 62965 - }, - { - "epoch": 1.5984261962177941, - "grad_norm": 0.737666666507721, - "learning_rate": 9.34382535854804e-06, - "loss": 0.0521, - "step": 62970 - }, - { - "epoch": 1.5985531158776496, - "grad_norm": 0.7078918814659119, - "learning_rate": 9.342979227482339e-06, - "loss": 0.0475, - "step": 62975 - }, - { - "epoch": 1.5986800355375048, - "grad_norm": 0.5671383142471313, - "learning_rate": 9.342133096416636e-06, - "loss": 0.0503, - "step": 62980 - }, - { - "epoch": 1.59880695519736, - "grad_norm": 0.42145687341690063, - "learning_rate": 9.341286965350934e-06, - "loss": 0.0415, - "step": 62985 - }, - { - "epoch": 1.5989338748572153, - "grad_norm": 0.4152757525444031, - "learning_rate": 9.34044083428523e-06, - "loss": 0.0401, - "step": 62990 - }, - { - "epoch": 1.5990607945170707, - "grad_norm": 0.43627598881721497, - "learning_rate": 9.33959470321953e-06, - "loss": 0.0477, - "step": 62995 - }, - { - "epoch": 1.599187714176926, - "grad_norm": 0.32532137632369995, - "learning_rate": 9.338748572153828e-06, - "loss": 0.0357, - "step": 63000 - }, - { - "epoch": 1.5993146338367814, - "grad_norm": 0.4923105835914612, - "learning_rate": 9.337902441088126e-06, - "loss": 0.0387, - "step": 63005 - }, - { - "epoch": 1.5994415534966366, - "grad_norm": 0.2872585654258728, - "learning_rate": 9.337056310022423e-06, - "loss": 0.0356, - "step": 63010 - }, - { - "epoch": 1.5995684731564919, - "grad_norm": 0.6953533291816711, - "learning_rate": 9.336210178956721e-06, - "loss": 0.0359, - "step": 63015 - }, - { - "epoch": 1.599695392816347, - "grad_norm": 0.35004740953445435, - "learning_rate": 9.33536404789102e-06, - "loss": 0.0353, - "step": 63020 - }, - { - "epoch": 1.5998223124762025, - "grad_norm": 0.4120618999004364, - "learning_rate": 9.334517916825318e-06, - "loss": 0.0394, - "step": 63025 - }, - { - "epoch": 1.599949232136058, - "grad_norm": 0.45172956585884094, - "learning_rate": 9.333671785759615e-06, - "loss": 0.028, - "step": 63030 - }, - { - "epoch": 1.6000761517959132, - "grad_norm": 0.3953922390937805, - "learning_rate": 9.332825654693913e-06, - "loss": 0.0547, - "step": 63035 - }, - { - "epoch": 1.6002030714557685, - "grad_norm": 0.41744473576545715, - "learning_rate": 9.331979523628211e-06, - "loss": 0.0236, - "step": 63040 - }, - { - "epoch": 1.6003299911156237, - "grad_norm": 0.4613676965236664, - "learning_rate": 9.33113339256251e-06, - "loss": 0.0346, - "step": 63045 - }, - { - "epoch": 1.6004569107754791, - "grad_norm": 0.3882189095020294, - "learning_rate": 9.330287261496807e-06, - "loss": 0.0451, - "step": 63050 - }, - { - "epoch": 1.6005838304353346, - "grad_norm": 1.6592633724212646, - "learning_rate": 9.329441130431105e-06, - "loss": 0.046, - "step": 63055 - }, - { - "epoch": 1.6007107500951898, - "grad_norm": 0.5953924059867859, - "learning_rate": 9.328594999365402e-06, - "loss": 0.0533, - "step": 63060 - }, - { - "epoch": 1.600837669755045, - "grad_norm": 0.5375702977180481, - "learning_rate": 9.3277488682997e-06, - "loss": 0.0418, - "step": 63065 - }, - { - "epoch": 1.6009645894149003, - "grad_norm": 0.7887471914291382, - "learning_rate": 9.326902737233998e-06, - "loss": 0.0449, - "step": 63070 - }, - { - "epoch": 1.6010915090747557, - "grad_norm": 0.4567158818244934, - "learning_rate": 9.326056606168297e-06, - "loss": 0.0512, - "step": 63075 - }, - { - "epoch": 1.601218428734611, - "grad_norm": 0.3737211525440216, - "learning_rate": 9.325210475102594e-06, - "loss": 0.0379, - "step": 63080 - }, - { - "epoch": 1.6013453483944664, - "grad_norm": 0.4298843741416931, - "learning_rate": 9.324364344036892e-06, - "loss": 0.04, - "step": 63085 - }, - { - "epoch": 1.6014722680543216, - "grad_norm": 0.6610221266746521, - "learning_rate": 9.32351821297119e-06, - "loss": 0.0563, - "step": 63090 - }, - { - "epoch": 1.6015991877141769, - "grad_norm": 0.49968236684799194, - "learning_rate": 9.322672081905489e-06, - "loss": 0.0454, - "step": 63095 - }, - { - "epoch": 1.601726107374032, - "grad_norm": 0.3711508810520172, - "learning_rate": 9.321825950839785e-06, - "loss": 0.0485, - "step": 63100 - }, - { - "epoch": 1.6018530270338875, - "grad_norm": 0.712639570236206, - "learning_rate": 9.320979819774084e-06, - "loss": 0.0424, - "step": 63105 - }, - { - "epoch": 1.601979946693743, - "grad_norm": 0.6757246255874634, - "learning_rate": 9.320133688708382e-06, - "loss": 0.0403, - "step": 63110 - }, - { - "epoch": 1.6021068663535982, - "grad_norm": 0.3874015808105469, - "learning_rate": 9.31928755764268e-06, - "loss": 0.0349, - "step": 63115 - }, - { - "epoch": 1.6022337860134535, - "grad_norm": 0.42351147532463074, - "learning_rate": 9.318441426576977e-06, - "loss": 0.0456, - "step": 63120 - }, - { - "epoch": 1.6023607056733087, - "grad_norm": 0.3366149663925171, - "learning_rate": 9.317595295511276e-06, - "loss": 0.0314, - "step": 63125 - }, - { - "epoch": 1.6024876253331641, - "grad_norm": 0.18523290753364563, - "learning_rate": 9.316749164445572e-06, - "loss": 0.0524, - "step": 63130 - }, - { - "epoch": 1.6026145449930196, - "grad_norm": 0.6445825099945068, - "learning_rate": 9.315903033379871e-06, - "loss": 0.0521, - "step": 63135 - }, - { - "epoch": 1.6027414646528748, - "grad_norm": 0.4839903712272644, - "learning_rate": 9.31505690231417e-06, - "loss": 0.0343, - "step": 63140 - }, - { - "epoch": 1.60286838431273, - "grad_norm": 0.5203821063041687, - "learning_rate": 9.314210771248468e-06, - "loss": 0.0476, - "step": 63145 - }, - { - "epoch": 1.6029953039725853, - "grad_norm": 0.8018707036972046, - "learning_rate": 9.313364640182764e-06, - "loss": 0.0543, - "step": 63150 - }, - { - "epoch": 1.6031222236324405, - "grad_norm": 0.4172990620136261, - "learning_rate": 9.312518509117063e-06, - "loss": 0.0495, - "step": 63155 - }, - { - "epoch": 1.603249143292296, - "grad_norm": 0.35851675271987915, - "learning_rate": 9.311672378051361e-06, - "loss": 0.031, - "step": 63160 - }, - { - "epoch": 1.6033760629521514, - "grad_norm": 0.43608400225639343, - "learning_rate": 9.31082624698566e-06, - "loss": 0.0398, - "step": 63165 - }, - { - "epoch": 1.6035029826120066, - "grad_norm": 0.4570017158985138, - "learning_rate": 9.309980115919956e-06, - "loss": 0.0354, - "step": 63170 - }, - { - "epoch": 1.6036299022718619, - "grad_norm": 0.4151071012020111, - "learning_rate": 9.309133984854255e-06, - "loss": 0.0285, - "step": 63175 - }, - { - "epoch": 1.603756821931717, - "grad_norm": 2.380167007446289, - "learning_rate": 9.308287853788553e-06, - "loss": 0.0334, - "step": 63180 - }, - { - "epoch": 1.6038837415915725, - "grad_norm": 0.6386299729347229, - "learning_rate": 9.307441722722852e-06, - "loss": 0.0447, - "step": 63185 - }, - { - "epoch": 1.604010661251428, - "grad_norm": 0.41524606943130493, - "learning_rate": 9.306595591657148e-06, - "loss": 0.0367, - "step": 63190 - }, - { - "epoch": 1.6041375809112832, - "grad_norm": 0.4500667452812195, - "learning_rate": 9.305749460591447e-06, - "loss": 0.0276, - "step": 63195 - }, - { - "epoch": 1.6042645005711385, - "grad_norm": 0.4230383038520813, - "learning_rate": 9.304903329525743e-06, - "loss": 0.0397, - "step": 63200 - }, - { - "epoch": 1.6043914202309937, - "grad_norm": 0.2872556149959564, - "learning_rate": 9.304057198460042e-06, - "loss": 0.0268, - "step": 63205 - }, - { - "epoch": 1.6045183398908491, - "grad_norm": 0.5463922023773193, - "learning_rate": 9.30321106739434e-06, - "loss": 0.0429, - "step": 63210 - }, - { - "epoch": 1.6046452595507044, - "grad_norm": 0.6803659200668335, - "learning_rate": 9.302364936328639e-06, - "loss": 0.0379, - "step": 63215 - }, - { - "epoch": 1.6047721792105598, - "grad_norm": 0.47018730640411377, - "learning_rate": 9.301518805262935e-06, - "loss": 0.0522, - "step": 63220 - }, - { - "epoch": 1.604899098870415, - "grad_norm": 0.5390296578407288, - "learning_rate": 9.300672674197234e-06, - "loss": 0.0514, - "step": 63225 - }, - { - "epoch": 1.6050260185302703, - "grad_norm": 0.3665148913860321, - "learning_rate": 9.299826543131532e-06, - "loss": 0.0476, - "step": 63230 - }, - { - "epoch": 1.6051529381901255, - "grad_norm": 0.43988293409347534, - "learning_rate": 9.29898041206583e-06, - "loss": 0.0364, - "step": 63235 - }, - { - "epoch": 1.605279857849981, - "grad_norm": 0.6119487881660461, - "learning_rate": 9.298134281000127e-06, - "loss": 0.0367, - "step": 63240 - }, - { - "epoch": 1.6054067775098364, - "grad_norm": 1.281150221824646, - "learning_rate": 9.297288149934426e-06, - "loss": 0.0465, - "step": 63245 - }, - { - "epoch": 1.6055336971696916, - "grad_norm": 0.4822534918785095, - "learning_rate": 9.296442018868724e-06, - "loss": 0.0401, - "step": 63250 - }, - { - "epoch": 1.6056606168295469, - "grad_norm": 0.30741187930107117, - "learning_rate": 9.295595887803022e-06, - "loss": 0.0341, - "step": 63255 - }, - { - "epoch": 1.605787536489402, - "grad_norm": 0.4460466802120209, - "learning_rate": 9.294749756737319e-06, - "loss": 0.0446, - "step": 63260 - }, - { - "epoch": 1.6059144561492575, - "grad_norm": 0.7325794100761414, - "learning_rate": 9.293903625671617e-06, - "loss": 0.0393, - "step": 63265 - }, - { - "epoch": 1.606041375809113, - "grad_norm": 0.5223475694656372, - "learning_rate": 9.293057494605914e-06, - "loss": 0.0608, - "step": 63270 - }, - { - "epoch": 1.6061682954689682, - "grad_norm": 0.6527661085128784, - "learning_rate": 9.292211363540213e-06, - "loss": 0.0605, - "step": 63275 - }, - { - "epoch": 1.6062952151288235, - "grad_norm": 0.7878105640411377, - "learning_rate": 9.291365232474511e-06, - "loss": 0.0536, - "step": 63280 - }, - { - "epoch": 1.6064221347886787, - "grad_norm": 0.4613818824291229, - "learning_rate": 9.29051910140881e-06, - "loss": 0.0321, - "step": 63285 - }, - { - "epoch": 1.6065490544485341, - "grad_norm": 0.9195228815078735, - "learning_rate": 9.289672970343106e-06, - "loss": 0.0436, - "step": 63290 - }, - { - "epoch": 1.6066759741083894, - "grad_norm": 0.6934165954589844, - "learning_rate": 9.288826839277405e-06, - "loss": 0.0574, - "step": 63295 - }, - { - "epoch": 1.6068028937682448, - "grad_norm": 0.5767850875854492, - "learning_rate": 9.287980708211703e-06, - "loss": 0.043, - "step": 63300 - }, - { - "epoch": 1.6069298134281, - "grad_norm": 0.4335283935070038, - "learning_rate": 9.287134577146001e-06, - "loss": 0.0431, - "step": 63305 - }, - { - "epoch": 1.6070567330879553, - "grad_norm": 1.4111207723617554, - "learning_rate": 9.286288446080298e-06, - "loss": 0.0349, - "step": 63310 - }, - { - "epoch": 1.6071836527478105, - "grad_norm": 0.5023530125617981, - "learning_rate": 9.285442315014596e-06, - "loss": 0.0425, - "step": 63315 - }, - { - "epoch": 1.607310572407666, - "grad_norm": 0.4598220884799957, - "learning_rate": 9.284596183948895e-06, - "loss": 0.0527, - "step": 63320 - }, - { - "epoch": 1.6074374920675214, - "grad_norm": 0.8642185926437378, - "learning_rate": 9.283750052883193e-06, - "loss": 0.0523, - "step": 63325 - }, - { - "epoch": 1.6075644117273766, - "grad_norm": 0.3432472348213196, - "learning_rate": 9.28290392181749e-06, - "loss": 0.0305, - "step": 63330 - }, - { - "epoch": 1.6076913313872319, - "grad_norm": 0.5629320740699768, - "learning_rate": 9.282057790751788e-06, - "loss": 0.0492, - "step": 63335 - }, - { - "epoch": 1.607818251047087, - "grad_norm": 0.5198811888694763, - "learning_rate": 9.281211659686085e-06, - "loss": 0.054, - "step": 63340 - }, - { - "epoch": 1.6079451707069425, - "grad_norm": 0.3467923104763031, - "learning_rate": 9.280365528620383e-06, - "loss": 0.0431, - "step": 63345 - }, - { - "epoch": 1.6080720903667978, - "grad_norm": 0.4788595736026764, - "learning_rate": 9.279519397554682e-06, - "loss": 0.0333, - "step": 63350 - }, - { - "epoch": 1.6081990100266532, - "grad_norm": 0.45954430103302, - "learning_rate": 9.27867326648898e-06, - "loss": 0.0417, - "step": 63355 - }, - { - "epoch": 1.6083259296865084, - "grad_norm": 0.4584799110889435, - "learning_rate": 9.277827135423277e-06, - "loss": 0.0464, - "step": 63360 - }, - { - "epoch": 1.6084528493463637, - "grad_norm": 0.468094140291214, - "learning_rate": 9.276981004357575e-06, - "loss": 0.0374, - "step": 63365 - }, - { - "epoch": 1.608579769006219, - "grad_norm": 0.46645092964172363, - "learning_rate": 9.276134873291874e-06, - "loss": 0.0386, - "step": 63370 - }, - { - "epoch": 1.6087066886660744, - "grad_norm": 0.3446393311023712, - "learning_rate": 9.275288742226172e-06, - "loss": 0.0593, - "step": 63375 - }, - { - "epoch": 1.6088336083259298, - "grad_norm": 0.4474169611930847, - "learning_rate": 9.274442611160469e-06, - "loss": 0.0616, - "step": 63380 - }, - { - "epoch": 1.608960527985785, - "grad_norm": 0.2949829399585724, - "learning_rate": 9.273596480094767e-06, - "loss": 0.0464, - "step": 63385 - }, - { - "epoch": 1.6090874476456403, - "grad_norm": 0.3349860608577728, - "learning_rate": 9.272750349029066e-06, - "loss": 0.0482, - "step": 63390 - }, - { - "epoch": 1.6092143673054955, - "grad_norm": 0.5600398778915405, - "learning_rate": 9.271904217963364e-06, - "loss": 0.0405, - "step": 63395 - }, - { - "epoch": 1.609341286965351, - "grad_norm": 0.5524774789810181, - "learning_rate": 9.27105808689766e-06, - "loss": 0.0296, - "step": 63400 - }, - { - "epoch": 1.6094682066252064, - "grad_norm": 0.3927488327026367, - "learning_rate": 9.27021195583196e-06, - "loss": 0.0281, - "step": 63405 - }, - { - "epoch": 1.6095951262850616, - "grad_norm": 0.4921715259552002, - "learning_rate": 9.269365824766256e-06, - "loss": 0.0546, - "step": 63410 - }, - { - "epoch": 1.6097220459449169, - "grad_norm": 0.3514084815979004, - "learning_rate": 9.268519693700554e-06, - "loss": 0.0487, - "step": 63415 - }, - { - "epoch": 1.609848965604772, - "grad_norm": 0.7566776871681213, - "learning_rate": 9.267673562634853e-06, - "loss": 0.0501, - "step": 63420 - }, - { - "epoch": 1.6099758852646275, - "grad_norm": 0.5805408358573914, - "learning_rate": 9.266827431569151e-06, - "loss": 0.035, - "step": 63425 - }, - { - "epoch": 1.6101028049244828, - "grad_norm": 0.49801546335220337, - "learning_rate": 9.265981300503448e-06, - "loss": 0.0359, - "step": 63430 - }, - { - "epoch": 1.6102297245843382, - "grad_norm": 0.2918674647808075, - "learning_rate": 9.265135169437746e-06, - "loss": 0.0328, - "step": 63435 - }, - { - "epoch": 1.6103566442441934, - "grad_norm": 0.3871235251426697, - "learning_rate": 9.264289038372045e-06, - "loss": 0.0256, - "step": 63440 - }, - { - "epoch": 1.6104835639040487, - "grad_norm": 0.6045909523963928, - "learning_rate": 9.263442907306343e-06, - "loss": 0.0317, - "step": 63445 - }, - { - "epoch": 1.610610483563904, - "grad_norm": 0.9752066731452942, - "learning_rate": 9.26259677624064e-06, - "loss": 0.0337, - "step": 63450 - }, - { - "epoch": 1.6107374032237594, - "grad_norm": 0.3964722752571106, - "learning_rate": 9.261750645174938e-06, - "loss": 0.0311, - "step": 63455 - }, - { - "epoch": 1.6108643228836148, - "grad_norm": 0.5753036141395569, - "learning_rate": 9.260904514109237e-06, - "loss": 0.0409, - "step": 63460 - }, - { - "epoch": 1.61099124254347, - "grad_norm": 0.6297651529312134, - "learning_rate": 9.260058383043535e-06, - "loss": 0.0453, - "step": 63465 - }, - { - "epoch": 1.6111181622033253, - "grad_norm": 0.33676305413246155, - "learning_rate": 9.259212251977832e-06, - "loss": 0.0395, - "step": 63470 - }, - { - "epoch": 1.6112450818631805, - "grad_norm": 0.6746768355369568, - "learning_rate": 9.25836612091213e-06, - "loss": 0.0298, - "step": 63475 - }, - { - "epoch": 1.611372001523036, - "grad_norm": 0.3784902095794678, - "learning_rate": 9.257519989846428e-06, - "loss": 0.051, - "step": 63480 - }, - { - "epoch": 1.6114989211828912, - "grad_norm": 0.4403267502784729, - "learning_rate": 9.256673858780725e-06, - "loss": 0.0358, - "step": 63485 - }, - { - "epoch": 1.6116258408427466, - "grad_norm": 0.35793337225914, - "learning_rate": 9.255827727715024e-06, - "loss": 0.0362, - "step": 63490 - }, - { - "epoch": 1.6117527605026019, - "grad_norm": 0.34768640995025635, - "learning_rate": 9.254981596649322e-06, - "loss": 0.044, - "step": 63495 - }, - { - "epoch": 1.611879680162457, - "grad_norm": 0.47699669003486633, - "learning_rate": 9.25413546558362e-06, - "loss": 0.0384, - "step": 63500 - }, - { - "epoch": 1.6120065998223123, - "grad_norm": 0.41342785954475403, - "learning_rate": 9.253289334517917e-06, - "loss": 0.039, - "step": 63505 - }, - { - "epoch": 1.6121335194821678, - "grad_norm": 0.3642013669013977, - "learning_rate": 9.252443203452215e-06, - "loss": 0.0393, - "step": 63510 - }, - { - "epoch": 1.6122604391420232, - "grad_norm": 0.3344127833843231, - "learning_rate": 9.251597072386514e-06, - "loss": 0.0502, - "step": 63515 - }, - { - "epoch": 1.6123873588018784, - "grad_norm": 0.5588001608848572, - "learning_rate": 9.250750941320812e-06, - "loss": 0.0257, - "step": 63520 - }, - { - "epoch": 1.6125142784617337, - "grad_norm": 0.4163677990436554, - "learning_rate": 9.249904810255109e-06, - "loss": 0.0363, - "step": 63525 - }, - { - "epoch": 1.612641198121589, - "grad_norm": 0.31508588790893555, - "learning_rate": 9.249058679189407e-06, - "loss": 0.0426, - "step": 63530 - }, - { - "epoch": 1.6127681177814444, - "grad_norm": 0.4718151092529297, - "learning_rate": 9.248212548123706e-06, - "loss": 0.0502, - "step": 63535 - }, - { - "epoch": 1.6128950374412998, - "grad_norm": 0.39480146765708923, - "learning_rate": 9.247366417058004e-06, - "loss": 0.0368, - "step": 63540 - }, - { - "epoch": 1.613021957101155, - "grad_norm": 0.6064801216125488, - "learning_rate": 9.246520285992301e-06, - "loss": 0.0388, - "step": 63545 - }, - { - "epoch": 1.6131488767610103, - "grad_norm": 0.47751763463020325, - "learning_rate": 9.2456741549266e-06, - "loss": 0.0603, - "step": 63550 - }, - { - "epoch": 1.6132757964208655, - "grad_norm": 0.47426578402519226, - "learning_rate": 9.244828023860896e-06, - "loss": 0.0351, - "step": 63555 - }, - { - "epoch": 1.613402716080721, - "grad_norm": 0.5113775134086609, - "learning_rate": 9.243981892795194e-06, - "loss": 0.0409, - "step": 63560 - }, - { - "epoch": 1.6135296357405762, - "grad_norm": 0.46520280838012695, - "learning_rate": 9.243135761729493e-06, - "loss": 0.0296, - "step": 63565 - }, - { - "epoch": 1.6136565554004316, - "grad_norm": 0.31470081210136414, - "learning_rate": 9.242289630663791e-06, - "loss": 0.0299, - "step": 63570 - }, - { - "epoch": 1.6137834750602869, - "grad_norm": 0.45933106541633606, - "learning_rate": 9.241443499598088e-06, - "loss": 0.0435, - "step": 63575 - }, - { - "epoch": 1.613910394720142, - "grad_norm": 0.46607598662376404, - "learning_rate": 9.240597368532386e-06, - "loss": 0.0411, - "step": 63580 - }, - { - "epoch": 1.6140373143799973, - "grad_norm": 0.605941116809845, - "learning_rate": 9.239751237466685e-06, - "loss": 0.0551, - "step": 63585 - }, - { - "epoch": 1.6141642340398528, - "grad_norm": 0.23459722101688385, - "learning_rate": 9.238905106400983e-06, - "loss": 0.0327, - "step": 63590 - }, - { - "epoch": 1.6142911536997082, - "grad_norm": 0.5809925198554993, - "learning_rate": 9.23805897533528e-06, - "loss": 0.0483, - "step": 63595 - }, - { - "epoch": 1.6144180733595634, - "grad_norm": 0.7808517813682556, - "learning_rate": 9.237212844269578e-06, - "loss": 0.0572, - "step": 63600 - }, - { - "epoch": 1.6145449930194187, - "grad_norm": 0.5828653573989868, - "learning_rate": 9.236366713203877e-06, - "loss": 0.0437, - "step": 63605 - }, - { - "epoch": 1.614671912679274, - "grad_norm": 0.341297447681427, - "learning_rate": 9.235520582138175e-06, - "loss": 0.0369, - "step": 63610 - }, - { - "epoch": 1.6147988323391294, - "grad_norm": 0.41023504734039307, - "learning_rate": 9.234674451072472e-06, - "loss": 0.0412, - "step": 63615 - }, - { - "epoch": 1.6149257519989848, - "grad_norm": 0.31916362047195435, - "learning_rate": 9.23382832000677e-06, - "loss": 0.0395, - "step": 63620 - }, - { - "epoch": 1.61505267165884, - "grad_norm": 0.2650102376937866, - "learning_rate": 9.232982188941067e-06, - "loss": 0.0459, - "step": 63625 - }, - { - "epoch": 1.6151795913186953, - "grad_norm": 0.32522478699684143, - "learning_rate": 9.232136057875365e-06, - "loss": 0.054, - "step": 63630 - }, - { - "epoch": 1.6153065109785505, - "grad_norm": 0.4820187985897064, - "learning_rate": 9.231289926809664e-06, - "loss": 0.0377, - "step": 63635 - }, - { - "epoch": 1.615433430638406, - "grad_norm": 0.4370163381099701, - "learning_rate": 9.230443795743962e-06, - "loss": 0.0415, - "step": 63640 - }, - { - "epoch": 1.6155603502982612, - "grad_norm": 0.3549247682094574, - "learning_rate": 9.229597664678259e-06, - "loss": 0.0321, - "step": 63645 - }, - { - "epoch": 1.6156872699581166, - "grad_norm": 0.4686078131198883, - "learning_rate": 9.228751533612557e-06, - "loss": 0.0418, - "step": 63650 - }, - { - "epoch": 1.6158141896179719, - "grad_norm": 0.41919198632240295, - "learning_rate": 9.227905402546856e-06, - "loss": 0.0491, - "step": 63655 - }, - { - "epoch": 1.615941109277827, - "grad_norm": 0.36859560012817383, - "learning_rate": 9.227059271481154e-06, - "loss": 0.0268, - "step": 63660 - }, - { - "epoch": 1.6160680289376823, - "grad_norm": 1.6593579053878784, - "learning_rate": 9.22621314041545e-06, - "loss": 0.0327, - "step": 63665 - }, - { - "epoch": 1.6161949485975378, - "grad_norm": 0.26635411381721497, - "learning_rate": 9.225367009349749e-06, - "loss": 0.0314, - "step": 63670 - }, - { - "epoch": 1.6163218682573932, - "grad_norm": 0.5649136304855347, - "learning_rate": 9.224520878284047e-06, - "loss": 0.044, - "step": 63675 - }, - { - "epoch": 1.6164487879172484, - "grad_norm": 0.6981995701789856, - "learning_rate": 9.223674747218346e-06, - "loss": 0.0581, - "step": 63680 - }, - { - "epoch": 1.6165757075771037, - "grad_norm": 0.25615954399108887, - "learning_rate": 9.222828616152643e-06, - "loss": 0.0436, - "step": 63685 - }, - { - "epoch": 1.616702627236959, - "grad_norm": 0.36270999908447266, - "learning_rate": 9.221982485086941e-06, - "loss": 0.0373, - "step": 63690 - }, - { - "epoch": 1.6168295468968144, - "grad_norm": 0.8146507740020752, - "learning_rate": 9.221136354021238e-06, - "loss": 0.0391, - "step": 63695 - }, - { - "epoch": 1.6169564665566696, - "grad_norm": 0.6639034152030945, - "learning_rate": 9.220290222955536e-06, - "loss": 0.0468, - "step": 63700 - }, - { - "epoch": 1.617083386216525, - "grad_norm": 0.5143311619758606, - "learning_rate": 9.219444091889835e-06, - "loss": 0.058, - "step": 63705 - }, - { - "epoch": 1.6172103058763803, - "grad_norm": 0.6397662162780762, - "learning_rate": 9.218597960824133e-06, - "loss": 0.0615, - "step": 63710 - }, - { - "epoch": 1.6173372255362355, - "grad_norm": 0.5295546054840088, - "learning_rate": 9.21775182975843e-06, - "loss": 0.0394, - "step": 63715 - }, - { - "epoch": 1.6174641451960907, - "grad_norm": 0.6192764043807983, - "learning_rate": 9.216905698692728e-06, - "loss": 0.0533, - "step": 63720 - }, - { - "epoch": 1.6175910648559462, - "grad_norm": 0.5573492050170898, - "learning_rate": 9.216059567627026e-06, - "loss": 0.0496, - "step": 63725 - }, - { - "epoch": 1.6177179845158016, - "grad_norm": 0.33959975838661194, - "learning_rate": 9.215213436561325e-06, - "loss": 0.0328, - "step": 63730 - }, - { - "epoch": 1.6178449041756569, - "grad_norm": 0.41650959849357605, - "learning_rate": 9.214367305495622e-06, - "loss": 0.0431, - "step": 63735 - }, - { - "epoch": 1.617971823835512, - "grad_norm": 0.6734082102775574, - "learning_rate": 9.21352117442992e-06, - "loss": 0.0361, - "step": 63740 - }, - { - "epoch": 1.6180987434953673, - "grad_norm": 0.41702553629875183, - "learning_rate": 9.212675043364218e-06, - "loss": 0.0337, - "step": 63745 - }, - { - "epoch": 1.6182256631552228, - "grad_norm": 0.8171368837356567, - "learning_rate": 9.211828912298517e-06, - "loss": 0.031, - "step": 63750 - }, - { - "epoch": 1.6183525828150782, - "grad_norm": 0.5161720514297485, - "learning_rate": 9.210982781232813e-06, - "loss": 0.0401, - "step": 63755 - }, - { - "epoch": 1.6184795024749334, - "grad_norm": 0.45257875323295593, - "learning_rate": 9.210136650167112e-06, - "loss": 0.0448, - "step": 63760 - }, - { - "epoch": 1.6186064221347887, - "grad_norm": 0.285356730222702, - "learning_rate": 9.209290519101409e-06, - "loss": 0.0405, - "step": 63765 - }, - { - "epoch": 1.618733341794644, - "grad_norm": 0.5070114135742188, - "learning_rate": 9.208444388035707e-06, - "loss": 0.0603, - "step": 63770 - }, - { - "epoch": 1.6188602614544993, - "grad_norm": 0.5397271513938904, - "learning_rate": 9.207598256970005e-06, - "loss": 0.0392, - "step": 63775 - }, - { - "epoch": 1.6189871811143546, - "grad_norm": 0.43517449498176575, - "learning_rate": 9.206752125904304e-06, - "loss": 0.0324, - "step": 63780 - }, - { - "epoch": 1.61911410077421, - "grad_norm": 0.4272690415382385, - "learning_rate": 9.2059059948386e-06, - "loss": 0.0371, - "step": 63785 - }, - { - "epoch": 1.6192410204340653, - "grad_norm": 0.9959965944290161, - "learning_rate": 9.205059863772899e-06, - "loss": 0.0307, - "step": 63790 - }, - { - "epoch": 1.6193679400939205, - "grad_norm": 0.5115115642547607, - "learning_rate": 9.204213732707197e-06, - "loss": 0.0451, - "step": 63795 - }, - { - "epoch": 1.6194948597537757, - "grad_norm": 0.4692324101924896, - "learning_rate": 9.203367601641496e-06, - "loss": 0.0345, - "step": 63800 - }, - { - "epoch": 1.6196217794136312, - "grad_norm": 0.6453138589859009, - "learning_rate": 9.202521470575792e-06, - "loss": 0.0357, - "step": 63805 - }, - { - "epoch": 1.6197486990734866, - "grad_norm": 0.4722641706466675, - "learning_rate": 9.20167533951009e-06, - "loss": 0.0262, - "step": 63810 - }, - { - "epoch": 1.6198756187333418, - "grad_norm": 0.5147367715835571, - "learning_rate": 9.20082920844439e-06, - "loss": 0.0406, - "step": 63815 - }, - { - "epoch": 1.620002538393197, - "grad_norm": 0.3285628855228424, - "learning_rate": 9.199983077378688e-06, - "loss": 0.0365, - "step": 63820 - }, - { - "epoch": 1.6201294580530523, - "grad_norm": 0.5340840816497803, - "learning_rate": 9.199136946312984e-06, - "loss": 0.0714, - "step": 63825 - }, - { - "epoch": 1.6202563777129078, - "grad_norm": 0.41982871294021606, - "learning_rate": 9.198290815247283e-06, - "loss": 0.0433, - "step": 63830 - }, - { - "epoch": 1.620383297372763, - "grad_norm": 0.5652270913124084, - "learning_rate": 9.19744468418158e-06, - "loss": 0.0428, - "step": 63835 - }, - { - "epoch": 1.6205102170326184, - "grad_norm": 0.6425602436065674, - "learning_rate": 9.196598553115878e-06, - "loss": 0.0436, - "step": 63840 - }, - { - "epoch": 1.6206371366924737, - "grad_norm": 0.41644319891929626, - "learning_rate": 9.195752422050176e-06, - "loss": 0.0521, - "step": 63845 - }, - { - "epoch": 1.620764056352329, - "grad_norm": 0.5372703671455383, - "learning_rate": 9.194906290984475e-06, - "loss": 0.0428, - "step": 63850 - }, - { - "epoch": 1.6208909760121841, - "grad_norm": 1.3957033157348633, - "learning_rate": 9.194060159918771e-06, - "loss": 0.0405, - "step": 63855 - }, - { - "epoch": 1.6210178956720396, - "grad_norm": 0.47421392798423767, - "learning_rate": 9.19321402885307e-06, - "loss": 0.0324, - "step": 63860 - }, - { - "epoch": 1.621144815331895, - "grad_norm": 0.5943052172660828, - "learning_rate": 9.192367897787368e-06, - "loss": 0.0333, - "step": 63865 - }, - { - "epoch": 1.6212717349917503, - "grad_norm": 0.2728966176509857, - "learning_rate": 9.191521766721667e-06, - "loss": 0.0474, - "step": 63870 - }, - { - "epoch": 1.6213986546516055, - "grad_norm": 0.6633517146110535, - "learning_rate": 9.190675635655963e-06, - "loss": 0.0408, - "step": 63875 - }, - { - "epoch": 1.6215255743114607, - "grad_norm": 0.39599454402923584, - "learning_rate": 9.189829504590262e-06, - "loss": 0.0277, - "step": 63880 - }, - { - "epoch": 1.6216524939713162, - "grad_norm": 0.49275216460227966, - "learning_rate": 9.18898337352456e-06, - "loss": 0.0324, - "step": 63885 - }, - { - "epoch": 1.6217794136311716, - "grad_norm": 0.4989050626754761, - "learning_rate": 9.188137242458858e-06, - "loss": 0.0406, - "step": 63890 - }, - { - "epoch": 1.6219063332910268, - "grad_norm": 0.3927781283855438, - "learning_rate": 9.187291111393155e-06, - "loss": 0.0362, - "step": 63895 - }, - { - "epoch": 1.622033252950882, - "grad_norm": 0.35867375135421753, - "learning_rate": 9.186444980327454e-06, - "loss": 0.0269, - "step": 63900 - }, - { - "epoch": 1.6221601726107373, - "grad_norm": 0.9725212454795837, - "learning_rate": 9.18559884926175e-06, - "loss": 0.0404, - "step": 63905 - }, - { - "epoch": 1.6222870922705928, - "grad_norm": 0.44533005356788635, - "learning_rate": 9.184752718196049e-06, - "loss": 0.0475, - "step": 63910 - }, - { - "epoch": 1.622414011930448, - "grad_norm": 0.442339688539505, - "learning_rate": 9.183906587130347e-06, - "loss": 0.0408, - "step": 63915 - }, - { - "epoch": 1.6225409315903034, - "grad_norm": 0.4648495316505432, - "learning_rate": 9.183060456064645e-06, - "loss": 0.0376, - "step": 63920 - }, - { - "epoch": 1.6226678512501587, - "grad_norm": 0.3896162509918213, - "learning_rate": 9.182214324998942e-06, - "loss": 0.0384, - "step": 63925 - }, - { - "epoch": 1.622794770910014, - "grad_norm": 0.5412706136703491, - "learning_rate": 9.18136819393324e-06, - "loss": 0.0357, - "step": 63930 - }, - { - "epoch": 1.6229216905698691, - "grad_norm": 0.4413628876209259, - "learning_rate": 9.180522062867539e-06, - "loss": 0.0597, - "step": 63935 - }, - { - "epoch": 1.6230486102297246, - "grad_norm": 0.42947766184806824, - "learning_rate": 9.179675931801837e-06, - "loss": 0.0327, - "step": 63940 - }, - { - "epoch": 1.62317552988958, - "grad_norm": 0.3911000192165375, - "learning_rate": 9.178829800736134e-06, - "loss": 0.0423, - "step": 63945 - }, - { - "epoch": 1.6233024495494353, - "grad_norm": 0.5260058641433716, - "learning_rate": 9.177983669670433e-06, - "loss": 0.0422, - "step": 63950 - }, - { - "epoch": 1.6234293692092905, - "grad_norm": 0.4706883132457733, - "learning_rate": 9.177137538604731e-06, - "loss": 0.0241, - "step": 63955 - }, - { - "epoch": 1.6235562888691457, - "grad_norm": 0.4504004418849945, - "learning_rate": 9.17629140753903e-06, - "loss": 0.0595, - "step": 63960 - }, - { - "epoch": 1.6236832085290012, - "grad_norm": 0.4297713339328766, - "learning_rate": 9.175445276473326e-06, - "loss": 0.0411, - "step": 63965 - }, - { - "epoch": 1.6238101281888566, - "grad_norm": 0.6145021915435791, - "learning_rate": 9.174599145407624e-06, - "loss": 0.0543, - "step": 63970 - }, - { - "epoch": 1.6239370478487118, - "grad_norm": 0.6268662810325623, - "learning_rate": 9.173753014341921e-06, - "loss": 0.0305, - "step": 63975 - }, - { - "epoch": 1.624063967508567, - "grad_norm": 0.33848005533218384, - "learning_rate": 9.17290688327622e-06, - "loss": 0.0409, - "step": 63980 - }, - { - "epoch": 1.6241908871684223, - "grad_norm": 0.3482067584991455, - "learning_rate": 9.172060752210518e-06, - "loss": 0.0665, - "step": 63985 - }, - { - "epoch": 1.6243178068282778, - "grad_norm": 0.7196753025054932, - "learning_rate": 9.171214621144816e-06, - "loss": 0.0637, - "step": 63990 - }, - { - "epoch": 1.624444726488133, - "grad_norm": 0.6924344897270203, - "learning_rate": 9.170368490079113e-06, - "loss": 0.0394, - "step": 63995 - }, - { - "epoch": 1.6245716461479884, - "grad_norm": 0.6481807827949524, - "learning_rate": 9.169522359013411e-06, - "loss": 0.0436, - "step": 64000 - }, - { - "epoch": 1.6246985658078437, - "grad_norm": 0.30813145637512207, - "learning_rate": 9.16867622794771e-06, - "loss": 0.0407, - "step": 64005 - }, - { - "epoch": 1.624825485467699, - "grad_norm": 0.5522648692131042, - "learning_rate": 9.167830096882008e-06, - "loss": 0.0566, - "step": 64010 - }, - { - "epoch": 1.6249524051275541, - "grad_norm": 0.9720830917358398, - "learning_rate": 9.166983965816307e-06, - "loss": 0.0522, - "step": 64015 - }, - { - "epoch": 1.6250793247874096, - "grad_norm": 0.21150535345077515, - "learning_rate": 9.166137834750603e-06, - "loss": 0.0455, - "step": 64020 - }, - { - "epoch": 1.625206244447265, - "grad_norm": 0.638489305973053, - "learning_rate": 9.165291703684902e-06, - "loss": 0.0317, - "step": 64025 - }, - { - "epoch": 1.6253331641071203, - "grad_norm": 0.47028714418411255, - "learning_rate": 9.1644455726192e-06, - "loss": 0.0448, - "step": 64030 - }, - { - "epoch": 1.6254600837669755, - "grad_norm": 0.3059314787387848, - "learning_rate": 9.163599441553499e-06, - "loss": 0.0413, - "step": 64035 - }, - { - "epoch": 1.6255870034268307, - "grad_norm": 1.0288840532302856, - "learning_rate": 9.162753310487795e-06, - "loss": 0.0356, - "step": 64040 - }, - { - "epoch": 1.6257139230866862, - "grad_norm": 0.47384369373321533, - "learning_rate": 9.161907179422094e-06, - "loss": 0.0537, - "step": 64045 - }, - { - "epoch": 1.6258408427465414, - "grad_norm": 0.6514115929603577, - "learning_rate": 9.16106104835639e-06, - "loss": 0.0411, - "step": 64050 - }, - { - "epoch": 1.6259677624063968, - "grad_norm": 0.5335904955863953, - "learning_rate": 9.160214917290689e-06, - "loss": 0.045, - "step": 64055 - }, - { - "epoch": 1.626094682066252, - "grad_norm": 0.2410622388124466, - "learning_rate": 9.159368786224987e-06, - "loss": 0.0245, - "step": 64060 - }, - { - "epoch": 1.6262216017261073, - "grad_norm": 0.5280641317367554, - "learning_rate": 9.158522655159286e-06, - "loss": 0.045, - "step": 64065 - }, - { - "epoch": 1.6263485213859625, - "grad_norm": 0.4681042432785034, - "learning_rate": 9.157676524093582e-06, - "loss": 0.0538, - "step": 64070 - }, - { - "epoch": 1.626475441045818, - "grad_norm": 0.6372799277305603, - "learning_rate": 9.15683039302788e-06, - "loss": 0.0428, - "step": 64075 - }, - { - "epoch": 1.6266023607056734, - "grad_norm": 0.8389225602149963, - "learning_rate": 9.155984261962179e-06, - "loss": 0.0319, - "step": 64080 - }, - { - "epoch": 1.6267292803655287, - "grad_norm": 0.4167573153972626, - "learning_rate": 9.155138130896477e-06, - "loss": 0.0583, - "step": 64085 - }, - { - "epoch": 1.626856200025384, - "grad_norm": 0.2793150246143341, - "learning_rate": 9.154291999830774e-06, - "loss": 0.031, - "step": 64090 - }, - { - "epoch": 1.6269831196852391, - "grad_norm": 0.2367010861635208, - "learning_rate": 9.153445868765073e-06, - "loss": 0.0449, - "step": 64095 - }, - { - "epoch": 1.6271100393450946, - "grad_norm": 0.4478657841682434, - "learning_rate": 9.152599737699371e-06, - "loss": 0.0422, - "step": 64100 - }, - { - "epoch": 1.62723695900495, - "grad_norm": 0.4798625111579895, - "learning_rate": 9.15175360663367e-06, - "loss": 0.0372, - "step": 64105 - }, - { - "epoch": 1.6273638786648053, - "grad_norm": 0.4597681760787964, - "learning_rate": 9.150907475567966e-06, - "loss": 0.0286, - "step": 64110 - }, - { - "epoch": 1.6274907983246605, - "grad_norm": 0.4191845953464508, - "learning_rate": 9.150061344502265e-06, - "loss": 0.0336, - "step": 64115 - }, - { - "epoch": 1.6276177179845157, - "grad_norm": 0.3320334255695343, - "learning_rate": 9.149215213436561e-06, - "loss": 0.0452, - "step": 64120 - }, - { - "epoch": 1.6277446376443712, - "grad_norm": 0.5614311695098877, - "learning_rate": 9.14836908237086e-06, - "loss": 0.0339, - "step": 64125 - }, - { - "epoch": 1.6278715573042264, - "grad_norm": 0.46490612626075745, - "learning_rate": 9.147522951305158e-06, - "loss": 0.0367, - "step": 64130 - }, - { - "epoch": 1.6279984769640818, - "grad_norm": 0.603071391582489, - "learning_rate": 9.146676820239456e-06, - "loss": 0.0446, - "step": 64135 - }, - { - "epoch": 1.628125396623937, - "grad_norm": 0.6703506708145142, - "learning_rate": 9.145830689173753e-06, - "loss": 0.0318, - "step": 64140 - }, - { - "epoch": 1.6282523162837923, - "grad_norm": 0.4714650511741638, - "learning_rate": 9.144984558108052e-06, - "loss": 0.0371, - "step": 64145 - }, - { - "epoch": 1.6283792359436475, - "grad_norm": 0.548242449760437, - "learning_rate": 9.14413842704235e-06, - "loss": 0.0452, - "step": 64150 - }, - { - "epoch": 1.628506155603503, - "grad_norm": 0.19964079558849335, - "learning_rate": 9.143292295976648e-06, - "loss": 0.0368, - "step": 64155 - }, - { - "epoch": 1.6286330752633584, - "grad_norm": 0.48019611835479736, - "learning_rate": 9.142446164910945e-06, - "loss": 0.0448, - "step": 64160 - }, - { - "epoch": 1.6287599949232137, - "grad_norm": 0.5065741539001465, - "learning_rate": 9.141600033845243e-06, - "loss": 0.0299, - "step": 64165 - }, - { - "epoch": 1.628886914583069, - "grad_norm": 0.29440590739250183, - "learning_rate": 9.140753902779542e-06, - "loss": 0.0323, - "step": 64170 - }, - { - "epoch": 1.6290138342429241, - "grad_norm": 0.3208966553211212, - "learning_rate": 9.13990777171384e-06, - "loss": 0.047, - "step": 64175 - }, - { - "epoch": 1.6291407539027796, - "grad_norm": 0.6405035853385925, - "learning_rate": 9.139061640648137e-06, - "loss": 0.0567, - "step": 64180 - }, - { - "epoch": 1.6292676735626348, - "grad_norm": 0.46018657088279724, - "learning_rate": 9.138215509582435e-06, - "loss": 0.0453, - "step": 64185 - }, - { - "epoch": 1.6293945932224903, - "grad_norm": 0.6060919761657715, - "learning_rate": 9.137369378516732e-06, - "loss": 0.0367, - "step": 64190 - }, - { - "epoch": 1.6295215128823455, - "grad_norm": 0.4076005518436432, - "learning_rate": 9.13652324745103e-06, - "loss": 0.0292, - "step": 64195 - }, - { - "epoch": 1.6296484325422007, - "grad_norm": 0.4588644206523895, - "learning_rate": 9.135677116385329e-06, - "loss": 0.0536, - "step": 64200 - }, - { - "epoch": 1.629775352202056, - "grad_norm": 2.5951735973358154, - "learning_rate": 9.134830985319627e-06, - "loss": 0.037, - "step": 64205 - }, - { - "epoch": 1.6299022718619114, - "grad_norm": 0.5038602948188782, - "learning_rate": 9.133984854253924e-06, - "loss": 0.0443, - "step": 64210 - }, - { - "epoch": 1.6300291915217668, - "grad_norm": 0.4969112277030945, - "learning_rate": 9.133138723188222e-06, - "loss": 0.045, - "step": 64215 - }, - { - "epoch": 1.630156111181622, - "grad_norm": 0.3243715763092041, - "learning_rate": 9.13229259212252e-06, - "loss": 0.0405, - "step": 64220 - }, - { - "epoch": 1.6302830308414773, - "grad_norm": 0.38331305980682373, - "learning_rate": 9.13144646105682e-06, - "loss": 0.0587, - "step": 64225 - }, - { - "epoch": 1.6304099505013325, - "grad_norm": 0.42282360792160034, - "learning_rate": 9.130600329991116e-06, - "loss": 0.0455, - "step": 64230 - }, - { - "epoch": 1.630536870161188, - "grad_norm": 0.5075732469558716, - "learning_rate": 9.129754198925414e-06, - "loss": 0.038, - "step": 64235 - }, - { - "epoch": 1.6306637898210434, - "grad_norm": 1.441519856452942, - "learning_rate": 9.128908067859713e-06, - "loss": 0.0525, - "step": 64240 - }, - { - "epoch": 1.6307907094808987, - "grad_norm": 0.4947074055671692, - "learning_rate": 9.128061936794011e-06, - "loss": 0.0476, - "step": 64245 - }, - { - "epoch": 1.6309176291407539, - "grad_norm": 0.6208208799362183, - "learning_rate": 9.127215805728308e-06, - "loss": 0.0364, - "step": 64250 - }, - { - "epoch": 1.6310445488006091, - "grad_norm": 0.5617443919181824, - "learning_rate": 9.126369674662606e-06, - "loss": 0.0285, - "step": 64255 - }, - { - "epoch": 1.6311714684604646, - "grad_norm": 0.38992369174957275, - "learning_rate": 9.125523543596903e-06, - "loss": 0.0377, - "step": 64260 - }, - { - "epoch": 1.6312983881203198, - "grad_norm": 0.591162383556366, - "learning_rate": 9.124677412531201e-06, - "loss": 0.0383, - "step": 64265 - }, - { - "epoch": 1.6314253077801752, - "grad_norm": 0.3943580090999603, - "learning_rate": 9.1238312814655e-06, - "loss": 0.026, - "step": 64270 - }, - { - "epoch": 1.6315522274400305, - "grad_norm": 0.6115946173667908, - "learning_rate": 9.122985150399798e-06, - "loss": 0.0385, - "step": 64275 - }, - { - "epoch": 1.6316791470998857, - "grad_norm": 0.4216555953025818, - "learning_rate": 9.122139019334095e-06, - "loss": 0.0467, - "step": 64280 - }, - { - "epoch": 1.631806066759741, - "grad_norm": 0.5023709535598755, - "learning_rate": 9.121292888268393e-06, - "loss": 0.0509, - "step": 64285 - }, - { - "epoch": 1.6319329864195964, - "grad_norm": 0.24627235531806946, - "learning_rate": 9.120446757202692e-06, - "loss": 0.0258, - "step": 64290 - }, - { - "epoch": 1.6320599060794518, - "grad_norm": 0.4677705764770508, - "learning_rate": 9.11960062613699e-06, - "loss": 0.0246, - "step": 64295 - }, - { - "epoch": 1.632186825739307, - "grad_norm": 0.38627541065216064, - "learning_rate": 9.118754495071287e-06, - "loss": 0.0346, - "step": 64300 - }, - { - "epoch": 1.6323137453991623, - "grad_norm": 0.30341580510139465, - "learning_rate": 9.117908364005585e-06, - "loss": 0.0364, - "step": 64305 - }, - { - "epoch": 1.6324406650590175, - "grad_norm": 0.36754441261291504, - "learning_rate": 9.117062232939884e-06, - "loss": 0.042, - "step": 64310 - }, - { - "epoch": 1.632567584718873, - "grad_norm": 0.49889931082725525, - "learning_rate": 9.116216101874182e-06, - "loss": 0.0344, - "step": 64315 - }, - { - "epoch": 1.6326945043787284, - "grad_norm": 0.23548609018325806, - "learning_rate": 9.115369970808479e-06, - "loss": 0.0275, - "step": 64320 - }, - { - "epoch": 1.6328214240385837, - "grad_norm": 0.7161731719970703, - "learning_rate": 9.114523839742777e-06, - "loss": 0.0524, - "step": 64325 - }, - { - "epoch": 1.6329483436984389, - "grad_norm": 0.734479546546936, - "learning_rate": 9.113677708677074e-06, - "loss": 0.0352, - "step": 64330 - }, - { - "epoch": 1.6330752633582941, - "grad_norm": 1.011337161064148, - "learning_rate": 9.112831577611372e-06, - "loss": 0.0439, - "step": 64335 - }, - { - "epoch": 1.6332021830181493, - "grad_norm": 0.43547528982162476, - "learning_rate": 9.11198544654567e-06, - "loss": 0.0464, - "step": 64340 - }, - { - "epoch": 1.6333291026780048, - "grad_norm": 1.9175692796707153, - "learning_rate": 9.111139315479969e-06, - "loss": 0.0399, - "step": 64345 - }, - { - "epoch": 1.6334560223378602, - "grad_norm": 0.5999352931976318, - "learning_rate": 9.110293184414266e-06, - "loss": 0.0568, - "step": 64350 - }, - { - "epoch": 1.6335829419977155, - "grad_norm": 0.45990899205207825, - "learning_rate": 9.109447053348564e-06, - "loss": 0.0581, - "step": 64355 - }, - { - "epoch": 1.6337098616575707, - "grad_norm": 0.42526349425315857, - "learning_rate": 9.108600922282863e-06, - "loss": 0.0423, - "step": 64360 - }, - { - "epoch": 1.633836781317426, - "grad_norm": 0.6034319996833801, - "learning_rate": 9.107754791217161e-06, - "loss": 0.0463, - "step": 64365 - }, - { - "epoch": 1.6339637009772814, - "grad_norm": 0.35851991176605225, - "learning_rate": 9.106908660151458e-06, - "loss": 0.0457, - "step": 64370 - }, - { - "epoch": 1.6340906206371368, - "grad_norm": 0.44103479385375977, - "learning_rate": 9.106062529085756e-06, - "loss": 0.0426, - "step": 64375 - }, - { - "epoch": 1.634217540296992, - "grad_norm": 0.47143620252609253, - "learning_rate": 9.105216398020054e-06, - "loss": 0.0442, - "step": 64380 - }, - { - "epoch": 1.6343444599568473, - "grad_norm": 0.5629554390907288, - "learning_rate": 9.104370266954353e-06, - "loss": 0.0394, - "step": 64385 - }, - { - "epoch": 1.6344713796167025, - "grad_norm": 0.3190833032131195, - "learning_rate": 9.10352413588865e-06, - "loss": 0.0507, - "step": 64390 - }, - { - "epoch": 1.634598299276558, - "grad_norm": 0.5460191965103149, - "learning_rate": 9.102678004822948e-06, - "loss": 0.0333, - "step": 64395 - }, - { - "epoch": 1.6347252189364132, - "grad_norm": 0.497383177280426, - "learning_rate": 9.101831873757245e-06, - "loss": 0.0336, - "step": 64400 - }, - { - "epoch": 1.6348521385962687, - "grad_norm": 0.43851038813591003, - "learning_rate": 9.100985742691543e-06, - "loss": 0.0326, - "step": 64405 - }, - { - "epoch": 1.6349790582561239, - "grad_norm": 0.6462606191635132, - "learning_rate": 9.100139611625841e-06, - "loss": 0.0412, - "step": 64410 - }, - { - "epoch": 1.6351059779159791, - "grad_norm": 0.6334230303764343, - "learning_rate": 9.09929348056014e-06, - "loss": 0.0591, - "step": 64415 - }, - { - "epoch": 1.6352328975758343, - "grad_norm": 0.47148555517196655, - "learning_rate": 9.098447349494437e-06, - "loss": 0.0347, - "step": 64420 - }, - { - "epoch": 1.6353598172356898, - "grad_norm": 0.48526328802108765, - "learning_rate": 9.097601218428735e-06, - "loss": 0.045, - "step": 64425 - }, - { - "epoch": 1.6354867368955452, - "grad_norm": 0.5678483247756958, - "learning_rate": 9.096755087363033e-06, - "loss": 0.0419, - "step": 64430 - }, - { - "epoch": 1.6356136565554005, - "grad_norm": 0.34521743655204773, - "learning_rate": 9.095908956297332e-06, - "loss": 0.0466, - "step": 64435 - }, - { - "epoch": 1.6357405762152557, - "grad_norm": 0.22155451774597168, - "learning_rate": 9.095062825231628e-06, - "loss": 0.0306, - "step": 64440 - }, - { - "epoch": 1.635867495875111, - "grad_norm": 0.5973362326622009, - "learning_rate": 9.094216694165927e-06, - "loss": 0.039, - "step": 64445 - }, - { - "epoch": 1.6359944155349664, - "grad_norm": 0.5300512909889221, - "learning_rate": 9.093370563100225e-06, - "loss": 0.0431, - "step": 64450 - }, - { - "epoch": 1.6361213351948218, - "grad_norm": 0.3708115816116333, - "learning_rate": 9.092524432034524e-06, - "loss": 0.0331, - "step": 64455 - }, - { - "epoch": 1.636248254854677, - "grad_norm": 0.25492945313453674, - "learning_rate": 9.09167830096882e-06, - "loss": 0.0491, - "step": 64460 - }, - { - "epoch": 1.6363751745145323, - "grad_norm": 0.2946164906024933, - "learning_rate": 9.090832169903119e-06, - "loss": 0.0349, - "step": 64465 - }, - { - "epoch": 1.6365020941743875, - "grad_norm": 0.45026201009750366, - "learning_rate": 9.089986038837415e-06, - "loss": 0.0381, - "step": 64470 - }, - { - "epoch": 1.636629013834243, - "grad_norm": 0.35574960708618164, - "learning_rate": 9.089139907771714e-06, - "loss": 0.049, - "step": 64475 - }, - { - "epoch": 1.6367559334940982, - "grad_norm": 0.3953196108341217, - "learning_rate": 9.088293776706012e-06, - "loss": 0.0429, - "step": 64480 - }, - { - "epoch": 1.6368828531539537, - "grad_norm": 0.7226465940475464, - "learning_rate": 9.08744764564031e-06, - "loss": 0.0449, - "step": 64485 - }, - { - "epoch": 1.6370097728138089, - "grad_norm": 0.4070194363594055, - "learning_rate": 9.086601514574607e-06, - "loss": 0.0278, - "step": 64490 - }, - { - "epoch": 1.6371366924736641, - "grad_norm": 0.5000313520431519, - "learning_rate": 9.085755383508906e-06, - "loss": 0.0391, - "step": 64495 - }, - { - "epoch": 1.6372636121335193, - "grad_norm": 0.6408107876777649, - "learning_rate": 9.084909252443204e-06, - "loss": 0.0495, - "step": 64500 - }, - { - "epoch": 1.6373905317933748, - "grad_norm": 0.418018639087677, - "learning_rate": 9.084063121377503e-06, - "loss": 0.0235, - "step": 64505 - }, - { - "epoch": 1.6375174514532302, - "grad_norm": 0.7451960444450378, - "learning_rate": 9.0832169903118e-06, - "loss": 0.0415, - "step": 64510 - }, - { - "epoch": 1.6376443711130855, - "grad_norm": 0.6853523850440979, - "learning_rate": 9.082370859246098e-06, - "loss": 0.0487, - "step": 64515 - }, - { - "epoch": 1.6377712907729407, - "grad_norm": 0.37463679909706116, - "learning_rate": 9.081524728180396e-06, - "loss": 0.0316, - "step": 64520 - }, - { - "epoch": 1.637898210432796, - "grad_norm": 0.7820852994918823, - "learning_rate": 9.080678597114695e-06, - "loss": 0.0344, - "step": 64525 - }, - { - "epoch": 1.6380251300926514, - "grad_norm": 0.32570603489875793, - "learning_rate": 9.079832466048993e-06, - "loss": 0.0376, - "step": 64530 - }, - { - "epoch": 1.6381520497525066, - "grad_norm": 0.3261527121067047, - "learning_rate": 9.07898633498329e-06, - "loss": 0.0358, - "step": 64535 - }, - { - "epoch": 1.638278969412362, - "grad_norm": 0.4507273733615875, - "learning_rate": 9.078140203917588e-06, - "loss": 0.0442, - "step": 64540 - }, - { - "epoch": 1.6384058890722173, - "grad_norm": 0.5383818745613098, - "learning_rate": 9.077294072851885e-06, - "loss": 0.0448, - "step": 64545 - }, - { - "epoch": 1.6385328087320725, - "grad_norm": 0.49342072010040283, - "learning_rate": 9.076447941786183e-06, - "loss": 0.0323, - "step": 64550 - }, - { - "epoch": 1.6386597283919278, - "grad_norm": 0.3386206030845642, - "learning_rate": 9.075601810720482e-06, - "loss": 0.0459, - "step": 64555 - }, - { - "epoch": 1.6387866480517832, - "grad_norm": 1.0043385028839111, - "learning_rate": 9.07475567965478e-06, - "loss": 0.0493, - "step": 64560 - }, - { - "epoch": 1.6389135677116387, - "grad_norm": 0.7200702428817749, - "learning_rate": 9.073909548589077e-06, - "loss": 0.0357, - "step": 64565 - }, - { - "epoch": 1.6390404873714939, - "grad_norm": 0.6868210434913635, - "learning_rate": 9.073063417523375e-06, - "loss": 0.0344, - "step": 64570 - }, - { - "epoch": 1.639167407031349, - "grad_norm": 0.6256726384162903, - "learning_rate": 9.072217286457673e-06, - "loss": 0.055, - "step": 64575 - }, - { - "epoch": 1.6392943266912043, - "grad_norm": 0.3997354507446289, - "learning_rate": 9.071371155391972e-06, - "loss": 0.0417, - "step": 64580 - }, - { - "epoch": 1.6394212463510598, - "grad_norm": 0.23764227330684662, - "learning_rate": 9.070525024326269e-06, - "loss": 0.0292, - "step": 64585 - }, - { - "epoch": 1.6395481660109152, - "grad_norm": 0.6107457280158997, - "learning_rate": 9.069678893260567e-06, - "loss": 0.0343, - "step": 64590 - }, - { - "epoch": 1.6396750856707705, - "grad_norm": 0.662048876285553, - "learning_rate": 9.068832762194865e-06, - "loss": 0.0359, - "step": 64595 - }, - { - "epoch": 1.6398020053306257, - "grad_norm": 0.6067936420440674, - "learning_rate": 9.067986631129164e-06, - "loss": 0.0415, - "step": 64600 - }, - { - "epoch": 1.639928924990481, - "grad_norm": 0.4943082332611084, - "learning_rate": 9.06714050006346e-06, - "loss": 0.0308, - "step": 64605 - }, - { - "epoch": 1.6400558446503364, - "grad_norm": 0.7095955610275269, - "learning_rate": 9.066294368997759e-06, - "loss": 0.0556, - "step": 64610 - }, - { - "epoch": 1.6401827643101916, - "grad_norm": 0.537886381149292, - "learning_rate": 9.065448237932056e-06, - "loss": 0.0373, - "step": 64615 - }, - { - "epoch": 1.640309683970047, - "grad_norm": 0.8942108154296875, - "learning_rate": 9.064602106866354e-06, - "loss": 0.0337, - "step": 64620 - }, - { - "epoch": 1.6404366036299023, - "grad_norm": 0.5603339076042175, - "learning_rate": 9.063755975800652e-06, - "loss": 0.0493, - "step": 64625 - }, - { - "epoch": 1.6405635232897575, - "grad_norm": 0.48070114850997925, - "learning_rate": 9.06290984473495e-06, - "loss": 0.0323, - "step": 64630 - }, - { - "epoch": 1.6406904429496127, - "grad_norm": 0.2241421490907669, - "learning_rate": 9.062063713669248e-06, - "loss": 0.0342, - "step": 64635 - }, - { - "epoch": 1.6408173626094682, - "grad_norm": 0.5175349712371826, - "learning_rate": 9.061217582603546e-06, - "loss": 0.0525, - "step": 64640 - }, - { - "epoch": 1.6409442822693237, - "grad_norm": 0.4282264709472656, - "learning_rate": 9.060371451537844e-06, - "loss": 0.0423, - "step": 64645 - }, - { - "epoch": 1.6410712019291789, - "grad_norm": 0.5412687063217163, - "learning_rate": 9.059525320472143e-06, - "loss": 0.0355, - "step": 64650 - }, - { - "epoch": 1.641198121589034, - "grad_norm": 0.6425674557685852, - "learning_rate": 9.05867918940644e-06, - "loss": 0.0346, - "step": 64655 - }, - { - "epoch": 1.6413250412488893, - "grad_norm": 0.6645756363868713, - "learning_rate": 9.057833058340738e-06, - "loss": 0.0495, - "step": 64660 - }, - { - "epoch": 1.6414519609087448, - "grad_norm": 0.5342534780502319, - "learning_rate": 9.056986927275036e-06, - "loss": 0.0374, - "step": 64665 - }, - { - "epoch": 1.6415788805686002, - "grad_norm": 0.3558924198150635, - "learning_rate": 9.056140796209335e-06, - "loss": 0.0401, - "step": 64670 - }, - { - "epoch": 1.6417058002284555, - "grad_norm": 1.8199994564056396, - "learning_rate": 9.055294665143631e-06, - "loss": 0.0581, - "step": 64675 - }, - { - "epoch": 1.6418327198883107, - "grad_norm": 0.18004541099071503, - "learning_rate": 9.05444853407793e-06, - "loss": 0.0456, - "step": 64680 - }, - { - "epoch": 1.641959639548166, - "grad_norm": 0.5120164752006531, - "learning_rate": 9.053602403012226e-06, - "loss": 0.0435, - "step": 64685 - }, - { - "epoch": 1.6420865592080212, - "grad_norm": 0.5783012509346008, - "learning_rate": 9.052756271946525e-06, - "loss": 0.0409, - "step": 64690 - }, - { - "epoch": 1.6422134788678766, - "grad_norm": 0.663221538066864, - "learning_rate": 9.051910140880823e-06, - "loss": 0.0321, - "step": 64695 - }, - { - "epoch": 1.642340398527732, - "grad_norm": 0.4595798850059509, - "learning_rate": 9.051064009815122e-06, - "loss": 0.0284, - "step": 64700 - }, - { - "epoch": 1.6424673181875873, - "grad_norm": 0.4805099666118622, - "learning_rate": 9.050217878749418e-06, - "loss": 0.0445, - "step": 64705 - }, - { - "epoch": 1.6425942378474425, - "grad_norm": 0.45831379294395447, - "learning_rate": 9.049371747683717e-06, - "loss": 0.0366, - "step": 64710 - }, - { - "epoch": 1.6427211575072977, - "grad_norm": 0.7373628616333008, - "learning_rate": 9.048525616618015e-06, - "loss": 0.039, - "step": 64715 - }, - { - "epoch": 1.6428480771671532, - "grad_norm": 1.4743049144744873, - "learning_rate": 9.047679485552314e-06, - "loss": 0.0255, - "step": 64720 - }, - { - "epoch": 1.6429749968270086, - "grad_norm": 0.5852136015892029, - "learning_rate": 9.04683335448661e-06, - "loss": 0.0393, - "step": 64725 - }, - { - "epoch": 1.6431019164868639, - "grad_norm": 0.4568997025489807, - "learning_rate": 9.045987223420909e-06, - "loss": 0.0417, - "step": 64730 - }, - { - "epoch": 1.643228836146719, - "grad_norm": 0.6939733624458313, - "learning_rate": 9.045141092355207e-06, - "loss": 0.0279, - "step": 64735 - }, - { - "epoch": 1.6433557558065743, - "grad_norm": 0.39523980021476746, - "learning_rate": 9.044294961289505e-06, - "loss": 0.0228, - "step": 64740 - }, - { - "epoch": 1.6434826754664298, - "grad_norm": 0.4145376980304718, - "learning_rate": 9.043448830223802e-06, - "loss": 0.0349, - "step": 64745 - }, - { - "epoch": 1.643609595126285, - "grad_norm": 0.8373459577560425, - "learning_rate": 9.0426026991581e-06, - "loss": 0.0499, - "step": 64750 - }, - { - "epoch": 1.6437365147861405, - "grad_norm": 0.4276578426361084, - "learning_rate": 9.041756568092397e-06, - "loss": 0.0426, - "step": 64755 - }, - { - "epoch": 1.6438634344459957, - "grad_norm": 0.4968733787536621, - "learning_rate": 9.040910437026696e-06, - "loss": 0.0277, - "step": 64760 - }, - { - "epoch": 1.643990354105851, - "grad_norm": 0.5852135419845581, - "learning_rate": 9.040064305960994e-06, - "loss": 0.0351, - "step": 64765 - }, - { - "epoch": 1.6441172737657062, - "grad_norm": 0.6406775116920471, - "learning_rate": 9.039218174895293e-06, - "loss": 0.0339, - "step": 64770 - }, - { - "epoch": 1.6442441934255616, - "grad_norm": 0.5282018184661865, - "learning_rate": 9.03837204382959e-06, - "loss": 0.042, - "step": 64775 - }, - { - "epoch": 1.644371113085417, - "grad_norm": 0.34750673174858093, - "learning_rate": 9.037525912763888e-06, - "loss": 0.0479, - "step": 64780 - }, - { - "epoch": 1.6444980327452723, - "grad_norm": 0.37349244952201843, - "learning_rate": 9.036679781698186e-06, - "loss": 0.0523, - "step": 64785 - }, - { - "epoch": 1.6446249524051275, - "grad_norm": 0.4894627034664154, - "learning_rate": 9.035833650632484e-06, - "loss": 0.0417, - "step": 64790 - }, - { - "epoch": 1.6447518720649827, - "grad_norm": 0.37052634358406067, - "learning_rate": 9.034987519566781e-06, - "loss": 0.0608, - "step": 64795 - }, - { - "epoch": 1.6448787917248382, - "grad_norm": 0.819925844669342, - "learning_rate": 9.03414138850108e-06, - "loss": 0.0415, - "step": 64800 - }, - { - "epoch": 1.6450057113846936, - "grad_norm": 0.3935888707637787, - "learning_rate": 9.033295257435378e-06, - "loss": 0.045, - "step": 64805 - }, - { - "epoch": 1.6451326310445489, - "grad_norm": 0.5721043348312378, - "learning_rate": 9.032449126369676e-06, - "loss": 0.0689, - "step": 64810 - }, - { - "epoch": 1.645259550704404, - "grad_norm": 0.6604814529418945, - "learning_rate": 9.031602995303973e-06, - "loss": 0.032, - "step": 64815 - }, - { - "epoch": 1.6453864703642593, - "grad_norm": 0.3584373891353607, - "learning_rate": 9.030756864238271e-06, - "loss": 0.0374, - "step": 64820 - }, - { - "epoch": 1.6455133900241148, - "grad_norm": 0.5639249086380005, - "learning_rate": 9.029910733172568e-06, - "loss": 0.0348, - "step": 64825 - }, - { - "epoch": 1.64564030968397, - "grad_norm": 0.41663745045661926, - "learning_rate": 9.029064602106867e-06, - "loss": 0.0327, - "step": 64830 - }, - { - "epoch": 1.6457672293438255, - "grad_norm": 0.3377990424633026, - "learning_rate": 9.028218471041165e-06, - "loss": 0.0346, - "step": 64835 - }, - { - "epoch": 1.6458941490036807, - "grad_norm": 0.45964446663856506, - "learning_rate": 9.027372339975463e-06, - "loss": 0.0346, - "step": 64840 - }, - { - "epoch": 1.646021068663536, - "grad_norm": 3.190828800201416, - "learning_rate": 9.02652620890976e-06, - "loss": 0.0365, - "step": 64845 - }, - { - "epoch": 1.6461479883233912, - "grad_norm": 0.3284561336040497, - "learning_rate": 9.025680077844058e-06, - "loss": 0.0631, - "step": 64850 - }, - { - "epoch": 1.6462749079832466, - "grad_norm": 0.4600352942943573, - "learning_rate": 9.024833946778357e-06, - "loss": 0.0361, - "step": 64855 - }, - { - "epoch": 1.646401827643102, - "grad_norm": 0.550199568271637, - "learning_rate": 9.023987815712655e-06, - "loss": 0.0497, - "step": 64860 - }, - { - "epoch": 1.6465287473029573, - "grad_norm": 0.6381253600120544, - "learning_rate": 9.023141684646952e-06, - "loss": 0.0522, - "step": 64865 - }, - { - "epoch": 1.6466556669628125, - "grad_norm": 0.6644925475120544, - "learning_rate": 9.02229555358125e-06, - "loss": 0.057, - "step": 64870 - }, - { - "epoch": 1.6467825866226677, - "grad_norm": 0.4541895091533661, - "learning_rate": 9.021449422515549e-06, - "loss": 0.035, - "step": 64875 - }, - { - "epoch": 1.6469095062825232, - "grad_norm": 0.5211121439933777, - "learning_rate": 9.020603291449847e-06, - "loss": 0.0447, - "step": 64880 - }, - { - "epoch": 1.6470364259423784, - "grad_norm": 0.6463835835456848, - "learning_rate": 9.019757160384144e-06, - "loss": 0.0474, - "step": 64885 - }, - { - "epoch": 1.6471633456022339, - "grad_norm": 0.29644089937210083, - "learning_rate": 9.018911029318442e-06, - "loss": 0.0519, - "step": 64890 - }, - { - "epoch": 1.647290265262089, - "grad_norm": 0.3042180836200714, - "learning_rate": 9.018064898252739e-06, - "loss": 0.0524, - "step": 64895 - }, - { - "epoch": 1.6474171849219443, - "grad_norm": 0.3909907639026642, - "learning_rate": 9.017218767187037e-06, - "loss": 0.0378, - "step": 64900 - }, - { - "epoch": 1.6475441045817996, - "grad_norm": 1.0574438571929932, - "learning_rate": 9.016372636121336e-06, - "loss": 0.0399, - "step": 64905 - }, - { - "epoch": 1.647671024241655, - "grad_norm": 0.7279480695724487, - "learning_rate": 9.015526505055634e-06, - "loss": 0.0324, - "step": 64910 - }, - { - "epoch": 1.6477979439015105, - "grad_norm": 0.6197366118431091, - "learning_rate": 9.014680373989931e-06, - "loss": 0.0369, - "step": 64915 - }, - { - "epoch": 1.6479248635613657, - "grad_norm": 0.311667263507843, - "learning_rate": 9.01383424292423e-06, - "loss": 0.0371, - "step": 64920 - }, - { - "epoch": 1.648051783221221, - "grad_norm": 0.43110620975494385, - "learning_rate": 9.012988111858528e-06, - "loss": 0.0398, - "step": 64925 - }, - { - "epoch": 1.6481787028810762, - "grad_norm": 1.095973253250122, - "learning_rate": 9.012141980792826e-06, - "loss": 0.0401, - "step": 64930 - }, - { - "epoch": 1.6483056225409316, - "grad_norm": 0.6360438466072083, - "learning_rate": 9.011295849727123e-06, - "loss": 0.0308, - "step": 64935 - }, - { - "epoch": 1.648432542200787, - "grad_norm": 0.4072820246219635, - "learning_rate": 9.010449718661421e-06, - "loss": 0.0536, - "step": 64940 - }, - { - "epoch": 1.6485594618606423, - "grad_norm": 0.643193244934082, - "learning_rate": 9.00960358759572e-06, - "loss": 0.0652, - "step": 64945 - }, - { - "epoch": 1.6486863815204975, - "grad_norm": 0.520168125629425, - "learning_rate": 9.008757456530018e-06, - "loss": 0.0447, - "step": 64950 - }, - { - "epoch": 1.6488133011803527, - "grad_norm": 0.40682411193847656, - "learning_rate": 9.007911325464315e-06, - "loss": 0.0439, - "step": 64955 - }, - { - "epoch": 1.6489402208402082, - "grad_norm": 0.5233508348464966, - "learning_rate": 9.007065194398613e-06, - "loss": 0.0499, - "step": 64960 - }, - { - "epoch": 1.6490671405000634, - "grad_norm": 2.627847194671631, - "learning_rate": 9.00621906333291e-06, - "loss": 0.0497, - "step": 64965 - }, - { - "epoch": 1.6491940601599189, - "grad_norm": 0.4716075658798218, - "learning_rate": 9.005372932267208e-06, - "loss": 0.0513, - "step": 64970 - }, - { - "epoch": 1.649320979819774, - "grad_norm": 0.38859617710113525, - "learning_rate": 9.004526801201507e-06, - "loss": 0.0461, - "step": 64975 - }, - { - "epoch": 1.6494478994796293, - "grad_norm": 0.25452059507369995, - "learning_rate": 9.003680670135805e-06, - "loss": 0.0302, - "step": 64980 - }, - { - "epoch": 1.6495748191394846, - "grad_norm": 0.5008302927017212, - "learning_rate": 9.002834539070102e-06, - "loss": 0.0523, - "step": 64985 - }, - { - "epoch": 1.64970173879934, - "grad_norm": 0.2988584041595459, - "learning_rate": 9.0019884080044e-06, - "loss": 0.0578, - "step": 64990 - }, - { - "epoch": 1.6498286584591955, - "grad_norm": 0.5405301451683044, - "learning_rate": 9.001142276938699e-06, - "loss": 0.0434, - "step": 64995 - }, - { - "epoch": 1.6499555781190507, - "grad_norm": 0.3135091960430145, - "learning_rate": 9.000296145872997e-06, - "loss": 0.0338, - "step": 65000 - }, - { - "epoch": 1.650082497778906, - "grad_norm": 0.621650755405426, - "learning_rate": 8.999450014807294e-06, - "loss": 0.0472, - "step": 65005 - }, - { - "epoch": 1.6502094174387612, - "grad_norm": 0.5747529864311218, - "learning_rate": 8.998603883741592e-06, - "loss": 0.0347, - "step": 65010 - }, - { - "epoch": 1.6503363370986166, - "grad_norm": 0.3751327097415924, - "learning_rate": 8.99775775267589e-06, - "loss": 0.0331, - "step": 65015 - }, - { - "epoch": 1.650463256758472, - "grad_norm": 0.7275444865226746, - "learning_rate": 8.996911621610189e-06, - "loss": 0.0453, - "step": 65020 - }, - { - "epoch": 1.6505901764183273, - "grad_norm": 0.45727744698524475, - "learning_rate": 8.996065490544486e-06, - "loss": 0.0381, - "step": 65025 - }, - { - "epoch": 1.6507170960781825, - "grad_norm": 0.27328670024871826, - "learning_rate": 8.995219359478784e-06, - "loss": 0.0466, - "step": 65030 - }, - { - "epoch": 1.6508440157380377, - "grad_norm": 0.3264765739440918, - "learning_rate": 8.994373228413082e-06, - "loss": 0.053, - "step": 65035 - }, - { - "epoch": 1.650970935397893, - "grad_norm": 0.24433475732803345, - "learning_rate": 8.993527097347379e-06, - "loss": 0.0476, - "step": 65040 - }, - { - "epoch": 1.6510978550577484, - "grad_norm": 0.7253446578979492, - "learning_rate": 8.992680966281678e-06, - "loss": 0.0426, - "step": 65045 - }, - { - "epoch": 1.6512247747176039, - "grad_norm": 0.34394681453704834, - "learning_rate": 8.991834835215976e-06, - "loss": 0.0419, - "step": 65050 - }, - { - "epoch": 1.651351694377459, - "grad_norm": 0.37882453203201294, - "learning_rate": 8.990988704150274e-06, - "loss": 0.0365, - "step": 65055 - }, - { - "epoch": 1.6514786140373143, - "grad_norm": 0.3902720510959625, - "learning_rate": 8.990142573084571e-06, - "loss": 0.0415, - "step": 65060 - }, - { - "epoch": 1.6516055336971696, - "grad_norm": 0.5651002526283264, - "learning_rate": 8.98929644201887e-06, - "loss": 0.0409, - "step": 65065 - }, - { - "epoch": 1.651732453357025, - "grad_norm": 0.4605546295642853, - "learning_rate": 8.988450310953168e-06, - "loss": 0.0352, - "step": 65070 - }, - { - "epoch": 1.6518593730168805, - "grad_norm": 0.3283293545246124, - "learning_rate": 8.987604179887466e-06, - "loss": 0.0328, - "step": 65075 - }, - { - "epoch": 1.6519862926767357, - "grad_norm": 0.5749640464782715, - "learning_rate": 8.986758048821763e-06, - "loss": 0.0462, - "step": 65080 - }, - { - "epoch": 1.652113212336591, - "grad_norm": 0.6464762091636658, - "learning_rate": 8.985911917756061e-06, - "loss": 0.0409, - "step": 65085 - }, - { - "epoch": 1.6522401319964461, - "grad_norm": 0.3721090853214264, - "learning_rate": 8.98506578669036e-06, - "loss": 0.0314, - "step": 65090 - }, - { - "epoch": 1.6523670516563016, - "grad_norm": 0.9961184859275818, - "learning_rate": 8.984219655624658e-06, - "loss": 0.0398, - "step": 65095 - }, - { - "epoch": 1.6524939713161568, - "grad_norm": 0.3242759108543396, - "learning_rate": 8.983373524558955e-06, - "loss": 0.0354, - "step": 65100 - }, - { - "epoch": 1.6526208909760123, - "grad_norm": 0.6023969054222107, - "learning_rate": 8.982527393493253e-06, - "loss": 0.0278, - "step": 65105 - }, - { - "epoch": 1.6527478106358675, - "grad_norm": 0.2688922584056854, - "learning_rate": 8.98168126242755e-06, - "loss": 0.0311, - "step": 65110 - }, - { - "epoch": 1.6528747302957227, - "grad_norm": 0.6650024056434631, - "learning_rate": 8.980835131361848e-06, - "loss": 0.0388, - "step": 65115 - }, - { - "epoch": 1.653001649955578, - "grad_norm": 0.33371973037719727, - "learning_rate": 8.979989000296147e-06, - "loss": 0.0627, - "step": 65120 - }, - { - "epoch": 1.6531285696154334, - "grad_norm": 0.7486276626586914, - "learning_rate": 8.979142869230445e-06, - "loss": 0.0438, - "step": 65125 - }, - { - "epoch": 1.6532554892752889, - "grad_norm": 0.5987439751625061, - "learning_rate": 8.978296738164742e-06, - "loss": 0.0482, - "step": 65130 - }, - { - "epoch": 1.653382408935144, - "grad_norm": 0.4204729497432709, - "learning_rate": 8.97745060709904e-06, - "loss": 0.0394, - "step": 65135 - }, - { - "epoch": 1.6535093285949993, - "grad_norm": 0.7958856821060181, - "learning_rate": 8.976604476033339e-06, - "loss": 0.0343, - "step": 65140 - }, - { - "epoch": 1.6536362482548546, - "grad_norm": 0.5209841132164001, - "learning_rate": 8.975758344967637e-06, - "loss": 0.0435, - "step": 65145 - }, - { - "epoch": 1.65376316791471, - "grad_norm": 0.48195716738700867, - "learning_rate": 8.974912213901934e-06, - "loss": 0.0336, - "step": 65150 - }, - { - "epoch": 1.6538900875745655, - "grad_norm": 0.38710111379623413, - "learning_rate": 8.974066082836232e-06, - "loss": 0.0353, - "step": 65155 - }, - { - "epoch": 1.6540170072344207, - "grad_norm": 0.3894824981689453, - "learning_rate": 8.97321995177053e-06, - "loss": 0.0357, - "step": 65160 - }, - { - "epoch": 1.654143926894276, - "grad_norm": 0.3283344507217407, - "learning_rate": 8.972373820704829e-06, - "loss": 0.0312, - "step": 65165 - }, - { - "epoch": 1.6542708465541311, - "grad_norm": 0.45823222398757935, - "learning_rate": 8.971527689639126e-06, - "loss": 0.0325, - "step": 65170 - }, - { - "epoch": 1.6543977662139866, - "grad_norm": 0.36615675687789917, - "learning_rate": 8.970681558573424e-06, - "loss": 0.0264, - "step": 65175 - }, - { - "epoch": 1.6545246858738418, - "grad_norm": 0.603642463684082, - "learning_rate": 8.96983542750772e-06, - "loss": 0.0467, - "step": 65180 - }, - { - "epoch": 1.6546516055336973, - "grad_norm": 0.3421114683151245, - "learning_rate": 8.96898929644202e-06, - "loss": 0.0312, - "step": 65185 - }, - { - "epoch": 1.6547785251935525, - "grad_norm": 0.4449631869792938, - "learning_rate": 8.968143165376318e-06, - "loss": 0.0363, - "step": 65190 - }, - { - "epoch": 1.6549054448534077, - "grad_norm": 1.424605369567871, - "learning_rate": 8.967297034310616e-06, - "loss": 0.0423, - "step": 65195 - }, - { - "epoch": 1.655032364513263, - "grad_norm": 0.6134064197540283, - "learning_rate": 8.966450903244913e-06, - "loss": 0.0461, - "step": 65200 - }, - { - "epoch": 1.6551592841731184, - "grad_norm": 0.5012558698654175, - "learning_rate": 8.965604772179211e-06, - "loss": 0.0365, - "step": 65205 - }, - { - "epoch": 1.6552862038329739, - "grad_norm": 0.3400554955005646, - "learning_rate": 8.96475864111351e-06, - "loss": 0.0457, - "step": 65210 - }, - { - "epoch": 1.655413123492829, - "grad_norm": 0.32065147161483765, - "learning_rate": 8.963912510047808e-06, - "loss": 0.025, - "step": 65215 - }, - { - "epoch": 1.6555400431526843, - "grad_norm": 0.302823543548584, - "learning_rate": 8.963066378982105e-06, - "loss": 0.0309, - "step": 65220 - }, - { - "epoch": 1.6556669628125396, - "grad_norm": 0.6175329089164734, - "learning_rate": 8.962220247916403e-06, - "loss": 0.053, - "step": 65225 - }, - { - "epoch": 1.655793882472395, - "grad_norm": 0.310511976480484, - "learning_rate": 8.961374116850701e-06, - "loss": 0.0359, - "step": 65230 - }, - { - "epoch": 1.6559208021322502, - "grad_norm": 0.30996888875961304, - "learning_rate": 8.960527985785e-06, - "loss": 0.0459, - "step": 65235 - }, - { - "epoch": 1.6560477217921057, - "grad_norm": 0.4682442545890808, - "learning_rate": 8.959681854719297e-06, - "loss": 0.0253, - "step": 65240 - }, - { - "epoch": 1.656174641451961, - "grad_norm": 1.470855474472046, - "learning_rate": 8.958835723653595e-06, - "loss": 0.0565, - "step": 65245 - }, - { - "epoch": 1.6563015611118161, - "grad_norm": 0.41784846782684326, - "learning_rate": 8.957989592587892e-06, - "loss": 0.0332, - "step": 65250 - }, - { - "epoch": 1.6564284807716714, - "grad_norm": 0.34236133098602295, - "learning_rate": 8.95714346152219e-06, - "loss": 0.0309, - "step": 65255 - }, - { - "epoch": 1.6565554004315268, - "grad_norm": 0.3203859031200409, - "learning_rate": 8.956297330456488e-06, - "loss": 0.0486, - "step": 65260 - }, - { - "epoch": 1.6566823200913823, - "grad_norm": 0.2484777867794037, - "learning_rate": 8.955451199390787e-06, - "loss": 0.0251, - "step": 65265 - }, - { - "epoch": 1.6568092397512375, - "grad_norm": 0.4660380184650421, - "learning_rate": 8.954605068325084e-06, - "loss": 0.0391, - "step": 65270 - }, - { - "epoch": 1.6569361594110927, - "grad_norm": 0.36795127391815186, - "learning_rate": 8.953758937259382e-06, - "loss": 0.0319, - "step": 65275 - }, - { - "epoch": 1.657063079070948, - "grad_norm": 0.40406039357185364, - "learning_rate": 8.95291280619368e-06, - "loss": 0.0405, - "step": 65280 - }, - { - "epoch": 1.6571899987308034, - "grad_norm": 0.6806068420410156, - "learning_rate": 8.952066675127979e-06, - "loss": 0.0452, - "step": 65285 - }, - { - "epoch": 1.6573169183906589, - "grad_norm": 0.38200870156288147, - "learning_rate": 8.951220544062275e-06, - "loss": 0.0356, - "step": 65290 - }, - { - "epoch": 1.657443838050514, - "grad_norm": 0.5553832650184631, - "learning_rate": 8.950374412996574e-06, - "loss": 0.0394, - "step": 65295 - }, - { - "epoch": 1.6575707577103693, - "grad_norm": 0.47494634985923767, - "learning_rate": 8.949528281930872e-06, - "loss": 0.0302, - "step": 65300 - }, - { - "epoch": 1.6576976773702246, - "grad_norm": 0.9558948278427124, - "learning_rate": 8.94868215086517e-06, - "loss": 0.0533, - "step": 65305 - }, - { - "epoch": 1.65782459703008, - "grad_norm": 0.5140542387962341, - "learning_rate": 8.947836019799467e-06, - "loss": 0.034, - "step": 65310 - }, - { - "epoch": 1.6579515166899352, - "grad_norm": 0.3224159777164459, - "learning_rate": 8.946989888733766e-06, - "loss": 0.0269, - "step": 65315 - }, - { - "epoch": 1.6580784363497907, - "grad_norm": 0.437104731798172, - "learning_rate": 8.946143757668063e-06, - "loss": 0.0429, - "step": 65320 - }, - { - "epoch": 1.658205356009646, - "grad_norm": 0.43346646428108215, - "learning_rate": 8.945297626602361e-06, - "loss": 0.0362, - "step": 65325 - }, - { - "epoch": 1.6583322756695011, - "grad_norm": 0.7601327896118164, - "learning_rate": 8.94445149553666e-06, - "loss": 0.0368, - "step": 65330 - }, - { - "epoch": 1.6584591953293564, - "grad_norm": 0.6759348511695862, - "learning_rate": 8.943605364470958e-06, - "loss": 0.0313, - "step": 65335 - }, - { - "epoch": 1.6585861149892118, - "grad_norm": 0.39935269951820374, - "learning_rate": 8.942759233405254e-06, - "loss": 0.036, - "step": 65340 - }, - { - "epoch": 1.6587130346490673, - "grad_norm": 0.4410076439380646, - "learning_rate": 8.941913102339553e-06, - "loss": 0.0419, - "step": 65345 - }, - { - "epoch": 1.6588399543089225, - "grad_norm": 0.4200136363506317, - "learning_rate": 8.941066971273851e-06, - "loss": 0.0542, - "step": 65350 - }, - { - "epoch": 1.6589668739687777, - "grad_norm": 0.4646438658237457, - "learning_rate": 8.94022084020815e-06, - "loss": 0.0341, - "step": 65355 - }, - { - "epoch": 1.659093793628633, - "grad_norm": 0.5119356513023376, - "learning_rate": 8.939374709142446e-06, - "loss": 0.0402, - "step": 65360 - }, - { - "epoch": 1.6592207132884884, - "grad_norm": 0.3879612684249878, - "learning_rate": 8.938528578076745e-06, - "loss": 0.0439, - "step": 65365 - }, - { - "epoch": 1.6593476329483439, - "grad_norm": 0.5797907710075378, - "learning_rate": 8.937682447011043e-06, - "loss": 0.045, - "step": 65370 - }, - { - "epoch": 1.659474552608199, - "grad_norm": 0.42097386717796326, - "learning_rate": 8.936836315945342e-06, - "loss": 0.0494, - "step": 65375 - }, - { - "epoch": 1.6596014722680543, - "grad_norm": 0.4611661434173584, - "learning_rate": 8.935990184879638e-06, - "loss": 0.033, - "step": 65380 - }, - { - "epoch": 1.6597283919279096, - "grad_norm": 0.2561880946159363, - "learning_rate": 8.935144053813937e-06, - "loss": 0.032, - "step": 65385 - }, - { - "epoch": 1.6598553115877648, - "grad_norm": 0.461259663105011, - "learning_rate": 8.934297922748233e-06, - "loss": 0.0355, - "step": 65390 - }, - { - "epoch": 1.6599822312476202, - "grad_norm": 0.4437582492828369, - "learning_rate": 8.933451791682532e-06, - "loss": 0.0338, - "step": 65395 - }, - { - "epoch": 1.6601091509074757, - "grad_norm": 0.5059611201286316, - "learning_rate": 8.93260566061683e-06, - "loss": 0.0277, - "step": 65400 - }, - { - "epoch": 1.660236070567331, - "grad_norm": 0.5022279620170593, - "learning_rate": 8.931759529551129e-06, - "loss": 0.0398, - "step": 65405 - }, - { - "epoch": 1.6603629902271861, - "grad_norm": 0.5416222810745239, - "learning_rate": 8.930913398485425e-06, - "loss": 0.0481, - "step": 65410 - }, - { - "epoch": 1.6604899098870414, - "grad_norm": 0.4798409938812256, - "learning_rate": 8.930067267419724e-06, - "loss": 0.0506, - "step": 65415 - }, - { - "epoch": 1.6606168295468968, - "grad_norm": 0.32006117701530457, - "learning_rate": 8.929221136354022e-06, - "loss": 0.0595, - "step": 65420 - }, - { - "epoch": 1.6607437492067523, - "grad_norm": 0.3255143165588379, - "learning_rate": 8.92837500528832e-06, - "loss": 0.0465, - "step": 65425 - }, - { - "epoch": 1.6608706688666075, - "grad_norm": 1.0788993835449219, - "learning_rate": 8.927528874222617e-06, - "loss": 0.0653, - "step": 65430 - }, - { - "epoch": 1.6609975885264627, - "grad_norm": 0.33083873987197876, - "learning_rate": 8.926682743156916e-06, - "loss": 0.0367, - "step": 65435 - }, - { - "epoch": 1.661124508186318, - "grad_norm": 0.4609372317790985, - "learning_rate": 8.925836612091214e-06, - "loss": 0.056, - "step": 65440 - }, - { - "epoch": 1.6612514278461734, - "grad_norm": 0.298467755317688, - "learning_rate": 8.924990481025512e-06, - "loss": 0.0344, - "step": 65445 - }, - { - "epoch": 1.6613783475060286, - "grad_norm": 0.5068796873092651, - "learning_rate": 8.924144349959809e-06, - "loss": 0.0292, - "step": 65450 - }, - { - "epoch": 1.661505267165884, - "grad_norm": 0.8118805289268494, - "learning_rate": 8.923298218894108e-06, - "loss": 0.0328, - "step": 65455 - }, - { - "epoch": 1.6616321868257393, - "grad_norm": 0.2662195563316345, - "learning_rate": 8.922452087828404e-06, - "loss": 0.0481, - "step": 65460 - }, - { - "epoch": 1.6617591064855946, - "grad_norm": 0.380209356546402, - "learning_rate": 8.921605956762703e-06, - "loss": 0.0353, - "step": 65465 - }, - { - "epoch": 1.6618860261454498, - "grad_norm": 0.46479079127311707, - "learning_rate": 8.920759825697001e-06, - "loss": 0.0223, - "step": 65470 - }, - { - "epoch": 1.6620129458053052, - "grad_norm": 0.5821226239204407, - "learning_rate": 8.9199136946313e-06, - "loss": 0.0364, - "step": 65475 - }, - { - "epoch": 1.6621398654651607, - "grad_norm": 0.47177696228027344, - "learning_rate": 8.919067563565596e-06, - "loss": 0.0351, - "step": 65480 - }, - { - "epoch": 1.662266785125016, - "grad_norm": 0.3013375401496887, - "learning_rate": 8.918221432499895e-06, - "loss": 0.0247, - "step": 65485 - }, - { - "epoch": 1.6623937047848711, - "grad_norm": 0.24483278393745422, - "learning_rate": 8.917375301434193e-06, - "loss": 0.0348, - "step": 65490 - }, - { - "epoch": 1.6625206244447264, - "grad_norm": 0.47850891947746277, - "learning_rate": 8.916529170368491e-06, - "loss": 0.0401, - "step": 65495 - }, - { - "epoch": 1.6626475441045818, - "grad_norm": 0.7117896676063538, - "learning_rate": 8.915683039302788e-06, - "loss": 0.0413, - "step": 65500 - }, - { - "epoch": 1.6627744637644373, - "grad_norm": 0.4195505380630493, - "learning_rate": 8.914836908237086e-06, - "loss": 0.0531, - "step": 65505 - }, - { - "epoch": 1.6629013834242925, - "grad_norm": 0.44881853461265564, - "learning_rate": 8.913990777171385e-06, - "loss": 0.0304, - "step": 65510 - }, - { - "epoch": 1.6630283030841477, - "grad_norm": 0.46761614084243774, - "learning_rate": 8.913144646105683e-06, - "loss": 0.0239, - "step": 65515 - }, - { - "epoch": 1.663155222744003, - "grad_norm": 1.070847988128662, - "learning_rate": 8.91229851503998e-06, - "loss": 0.0312, - "step": 65520 - }, - { - "epoch": 1.6632821424038584, - "grad_norm": 0.37590980529785156, - "learning_rate": 8.911452383974278e-06, - "loss": 0.0341, - "step": 65525 - }, - { - "epoch": 1.6634090620637136, - "grad_norm": 0.33409586548805237, - "learning_rate": 8.910606252908575e-06, - "loss": 0.0268, - "step": 65530 - }, - { - "epoch": 1.663535981723569, - "grad_norm": 0.48899519443511963, - "learning_rate": 8.909760121842873e-06, - "loss": 0.0522, - "step": 65535 - }, - { - "epoch": 1.6636629013834243, - "grad_norm": 0.49646005034446716, - "learning_rate": 8.908913990777172e-06, - "loss": 0.0371, - "step": 65540 - }, - { - "epoch": 1.6637898210432795, - "grad_norm": 0.40349963307380676, - "learning_rate": 8.90806785971147e-06, - "loss": 0.0312, - "step": 65545 - }, - { - "epoch": 1.6639167407031348, - "grad_norm": 0.5190420746803284, - "learning_rate": 8.907221728645769e-06, - "loss": 0.0478, - "step": 65550 - }, - { - "epoch": 1.6640436603629902, - "grad_norm": 0.38384461402893066, - "learning_rate": 8.906375597580065e-06, - "loss": 0.0252, - "step": 65555 - }, - { - "epoch": 1.6641705800228457, - "grad_norm": 0.43671801686286926, - "learning_rate": 8.905529466514364e-06, - "loss": 0.0436, - "step": 65560 - }, - { - "epoch": 1.664297499682701, - "grad_norm": 0.6491867303848267, - "learning_rate": 8.904683335448662e-06, - "loss": 0.0455, - "step": 65565 - }, - { - "epoch": 1.6644244193425561, - "grad_norm": 0.3442470133304596, - "learning_rate": 8.90383720438296e-06, - "loss": 0.0466, - "step": 65570 - }, - { - "epoch": 1.6645513390024114, - "grad_norm": 0.5232798457145691, - "learning_rate": 8.902991073317257e-06, - "loss": 0.0348, - "step": 65575 - }, - { - "epoch": 1.6646782586622668, - "grad_norm": 0.5155237913131714, - "learning_rate": 8.902144942251556e-06, - "loss": 0.0436, - "step": 65580 - }, - { - "epoch": 1.664805178322122, - "grad_norm": 0.338069349527359, - "learning_rate": 8.901298811185854e-06, - "loss": 0.041, - "step": 65585 - }, - { - "epoch": 1.6649320979819775, - "grad_norm": 0.491205096244812, - "learning_rate": 8.900452680120153e-06, - "loss": 0.0383, - "step": 65590 - }, - { - "epoch": 1.6650590176418327, - "grad_norm": 0.7130605578422546, - "learning_rate": 8.89960654905445e-06, - "loss": 0.0452, - "step": 65595 - }, - { - "epoch": 1.665185937301688, - "grad_norm": 0.584938645362854, - "learning_rate": 8.898760417988748e-06, - "loss": 0.0537, - "step": 65600 - }, - { - "epoch": 1.6653128569615432, - "grad_norm": 0.5185157060623169, - "learning_rate": 8.897914286923044e-06, - "loss": 0.0419, - "step": 65605 - }, - { - "epoch": 1.6654397766213986, - "grad_norm": 0.5132018327713013, - "learning_rate": 8.897068155857343e-06, - "loss": 0.0408, - "step": 65610 - }, - { - "epoch": 1.665566696281254, - "grad_norm": 0.45525306463241577, - "learning_rate": 8.896222024791641e-06, - "loss": 0.035, - "step": 65615 - }, - { - "epoch": 1.6656936159411093, - "grad_norm": 0.513904333114624, - "learning_rate": 8.89537589372594e-06, - "loss": 0.0318, - "step": 65620 - }, - { - "epoch": 1.6658205356009645, - "grad_norm": 0.45823290944099426, - "learning_rate": 8.894529762660236e-06, - "loss": 0.0421, - "step": 65625 - }, - { - "epoch": 1.6659474552608198, - "grad_norm": 0.42932388186454773, - "learning_rate": 8.893683631594535e-06, - "loss": 0.0398, - "step": 65630 - }, - { - "epoch": 1.6660743749206752, - "grad_norm": 0.40488401055336, - "learning_rate": 8.892837500528833e-06, - "loss": 0.0258, - "step": 65635 - }, - { - "epoch": 1.6662012945805307, - "grad_norm": 0.3210211992263794, - "learning_rate": 8.891991369463131e-06, - "loss": 0.0266, - "step": 65640 - }, - { - "epoch": 1.666328214240386, - "grad_norm": 0.3507245182991028, - "learning_rate": 8.891145238397428e-06, - "loss": 0.0414, - "step": 65645 - }, - { - "epoch": 1.6664551339002411, - "grad_norm": 0.2561591565608978, - "learning_rate": 8.890299107331727e-06, - "loss": 0.0444, - "step": 65650 - }, - { - "epoch": 1.6665820535600964, - "grad_norm": 0.5184684991836548, - "learning_rate": 8.889452976266025e-06, - "loss": 0.0413, - "step": 65655 - }, - { - "epoch": 1.6667089732199518, - "grad_norm": 0.41977182030677795, - "learning_rate": 8.888606845200323e-06, - "loss": 0.0421, - "step": 65660 - }, - { - "epoch": 1.666835892879807, - "grad_norm": 0.5815332531929016, - "learning_rate": 8.88776071413462e-06, - "loss": 0.0302, - "step": 65665 - }, - { - "epoch": 1.6669628125396625, - "grad_norm": 0.6118481159210205, - "learning_rate": 8.886914583068918e-06, - "loss": 0.024, - "step": 65670 - }, - { - "epoch": 1.6670897321995177, - "grad_norm": 0.30551496148109436, - "learning_rate": 8.886068452003215e-06, - "loss": 0.0196, - "step": 65675 - }, - { - "epoch": 1.667216651859373, - "grad_norm": 0.30426421761512756, - "learning_rate": 8.885222320937514e-06, - "loss": 0.0243, - "step": 65680 - }, - { - "epoch": 1.6673435715192282, - "grad_norm": 0.4013094902038574, - "learning_rate": 8.884376189871812e-06, - "loss": 0.0433, - "step": 65685 - }, - { - "epoch": 1.6674704911790836, - "grad_norm": 0.9512150883674622, - "learning_rate": 8.88353005880611e-06, - "loss": 0.0516, - "step": 65690 - }, - { - "epoch": 1.667597410838939, - "grad_norm": 0.3686421811580658, - "learning_rate": 8.882683927740407e-06, - "loss": 0.0294, - "step": 65695 - }, - { - "epoch": 1.6677243304987943, - "grad_norm": 0.5723350644111633, - "learning_rate": 8.881837796674705e-06, - "loss": 0.042, - "step": 65700 - }, - { - "epoch": 1.6678512501586495, - "grad_norm": 0.48495015501976013, - "learning_rate": 8.880991665609004e-06, - "loss": 0.0398, - "step": 65705 - }, - { - "epoch": 1.6679781698185048, - "grad_norm": 0.5551964640617371, - "learning_rate": 8.880145534543302e-06, - "loss": 0.0435, - "step": 65710 - }, - { - "epoch": 1.6681050894783602, - "grad_norm": 0.37346550822257996, - "learning_rate": 8.879299403477599e-06, - "loss": 0.0282, - "step": 65715 - }, - { - "epoch": 1.6682320091382155, - "grad_norm": 0.36427873373031616, - "learning_rate": 8.878453272411897e-06, - "loss": 0.0291, - "step": 65720 - }, - { - "epoch": 1.668358928798071, - "grad_norm": 0.36655494570732117, - "learning_rate": 8.877607141346196e-06, - "loss": 0.0325, - "step": 65725 - }, - { - "epoch": 1.6684858484579261, - "grad_norm": 0.48270854353904724, - "learning_rate": 8.876761010280494e-06, - "loss": 0.0463, - "step": 65730 - }, - { - "epoch": 1.6686127681177814, - "grad_norm": 1.3981819152832031, - "learning_rate": 8.875914879214791e-06, - "loss": 0.0518, - "step": 65735 - }, - { - "epoch": 1.6687396877776366, - "grad_norm": 1.0330018997192383, - "learning_rate": 8.87506874814909e-06, - "loss": 0.0441, - "step": 65740 - }, - { - "epoch": 1.668866607437492, - "grad_norm": 0.600074291229248, - "learning_rate": 8.874222617083386e-06, - "loss": 0.0422, - "step": 65745 - }, - { - "epoch": 1.6689935270973475, - "grad_norm": 0.3690608739852905, - "learning_rate": 8.873376486017684e-06, - "loss": 0.0596, - "step": 65750 - }, - { - "epoch": 1.6691204467572027, - "grad_norm": 0.32155323028564453, - "learning_rate": 8.872530354951983e-06, - "loss": 0.0431, - "step": 65755 - }, - { - "epoch": 1.669247366417058, - "grad_norm": 0.43784135580062866, - "learning_rate": 8.871684223886281e-06, - "loss": 0.0431, - "step": 65760 - }, - { - "epoch": 1.6693742860769132, - "grad_norm": 0.4634493887424469, - "learning_rate": 8.870838092820578e-06, - "loss": 0.0419, - "step": 65765 - }, - { - "epoch": 1.6695012057367686, - "grad_norm": 0.4154454469680786, - "learning_rate": 8.869991961754876e-06, - "loss": 0.0481, - "step": 65770 - }, - { - "epoch": 1.669628125396624, - "grad_norm": 0.40176627039909363, - "learning_rate": 8.869145830689175e-06, - "loss": 0.0289, - "step": 65775 - }, - { - "epoch": 1.6697550450564793, - "grad_norm": 0.39772501587867737, - "learning_rate": 8.868299699623473e-06, - "loss": 0.0539, - "step": 65780 - }, - { - "epoch": 1.6698819647163345, - "grad_norm": 0.3660581409931183, - "learning_rate": 8.86745356855777e-06, - "loss": 0.0403, - "step": 65785 - }, - { - "epoch": 1.6700088843761898, - "grad_norm": 2.4944915771484375, - "learning_rate": 8.866607437492068e-06, - "loss": 0.0372, - "step": 65790 - }, - { - "epoch": 1.6701358040360452, - "grad_norm": 0.5914658308029175, - "learning_rate": 8.865761306426367e-06, - "loss": 0.0451, - "step": 65795 - }, - { - "epoch": 1.6702627236959005, - "grad_norm": 0.44492366909980774, - "learning_rate": 8.864915175360665e-06, - "loss": 0.037, - "step": 65800 - }, - { - "epoch": 1.670389643355756, - "grad_norm": 0.4743790030479431, - "learning_rate": 8.864069044294962e-06, - "loss": 0.0427, - "step": 65805 - }, - { - "epoch": 1.6705165630156111, - "grad_norm": 0.3694162964820862, - "learning_rate": 8.86322291322926e-06, - "loss": 0.0286, - "step": 65810 - }, - { - "epoch": 1.6706434826754664, - "grad_norm": 0.43514424562454224, - "learning_rate": 8.862376782163557e-06, - "loss": 0.0402, - "step": 65815 - }, - { - "epoch": 1.6707704023353216, - "grad_norm": 0.2710176408290863, - "learning_rate": 8.861530651097855e-06, - "loss": 0.0255, - "step": 65820 - }, - { - "epoch": 1.670897321995177, - "grad_norm": 0.8756222724914551, - "learning_rate": 8.860684520032154e-06, - "loss": 0.0382, - "step": 65825 - }, - { - "epoch": 1.6710242416550325, - "grad_norm": 0.46575260162353516, - "learning_rate": 8.859838388966452e-06, - "loss": 0.0369, - "step": 65830 - }, - { - "epoch": 1.6711511613148877, - "grad_norm": 0.5153847336769104, - "learning_rate": 8.858992257900749e-06, - "loss": 0.0398, - "step": 65835 - }, - { - "epoch": 1.671278080974743, - "grad_norm": 0.6150141358375549, - "learning_rate": 8.858146126835047e-06, - "loss": 0.0309, - "step": 65840 - }, - { - "epoch": 1.6714050006345982, - "grad_norm": 0.4426746070384979, - "learning_rate": 8.857299995769346e-06, - "loss": 0.05, - "step": 65845 - }, - { - "epoch": 1.6715319202944536, - "grad_norm": 0.5002992153167725, - "learning_rate": 8.856453864703644e-06, - "loss": 0.0319, - "step": 65850 - }, - { - "epoch": 1.671658839954309, - "grad_norm": 0.850205659866333, - "learning_rate": 8.85560773363794e-06, - "loss": 0.0192, - "step": 65855 - }, - { - "epoch": 1.6717857596141643, - "grad_norm": 0.26524120569229126, - "learning_rate": 8.854761602572239e-06, - "loss": 0.0343, - "step": 65860 - }, - { - "epoch": 1.6719126792740195, - "grad_norm": 0.1921084076166153, - "learning_rate": 8.853915471506538e-06, - "loss": 0.0399, - "step": 65865 - }, - { - "epoch": 1.6720395989338748, - "grad_norm": 0.5905672311782837, - "learning_rate": 8.853069340440836e-06, - "loss": 0.0325, - "step": 65870 - }, - { - "epoch": 1.6721665185937302, - "grad_norm": 0.3053434193134308, - "learning_rate": 8.852223209375133e-06, - "loss": 0.0324, - "step": 65875 - }, - { - "epoch": 1.6722934382535855, - "grad_norm": 0.4531998932361603, - "learning_rate": 8.851377078309431e-06, - "loss": 0.0401, - "step": 65880 - }, - { - "epoch": 1.672420357913441, - "grad_norm": 0.6914463639259338, - "learning_rate": 8.850530947243728e-06, - "loss": 0.0331, - "step": 65885 - }, - { - "epoch": 1.6725472775732961, - "grad_norm": 0.2950552701950073, - "learning_rate": 8.849684816178026e-06, - "loss": 0.0281, - "step": 65890 - }, - { - "epoch": 1.6726741972331514, - "grad_norm": 0.4090563654899597, - "learning_rate": 8.848838685112325e-06, - "loss": 0.0437, - "step": 65895 - }, - { - "epoch": 1.6728011168930066, - "grad_norm": 0.7089372277259827, - "learning_rate": 8.847992554046623e-06, - "loss": 0.0303, - "step": 65900 - }, - { - "epoch": 1.672928036552862, - "grad_norm": 0.43852096796035767, - "learning_rate": 8.84714642298092e-06, - "loss": 0.0517, - "step": 65905 - }, - { - "epoch": 1.6730549562127175, - "grad_norm": 0.4478702247142792, - "learning_rate": 8.846300291915218e-06, - "loss": 0.0483, - "step": 65910 - }, - { - "epoch": 1.6731818758725727, - "grad_norm": 0.20854711532592773, - "learning_rate": 8.845454160849516e-06, - "loss": 0.042, - "step": 65915 - }, - { - "epoch": 1.673308795532428, - "grad_norm": 0.4255557954311371, - "learning_rate": 8.844608029783815e-06, - "loss": 0.0311, - "step": 65920 - }, - { - "epoch": 1.6734357151922832, - "grad_norm": 0.38500937819480896, - "learning_rate": 8.843761898718112e-06, - "loss": 0.0444, - "step": 65925 - }, - { - "epoch": 1.6735626348521386, - "grad_norm": 0.5492475628852844, - "learning_rate": 8.84291576765241e-06, - "loss": 0.0458, - "step": 65930 - }, - { - "epoch": 1.6736895545119939, - "grad_norm": 0.5182275176048279, - "learning_rate": 8.842069636586708e-06, - "loss": 0.0333, - "step": 65935 - }, - { - "epoch": 1.6738164741718493, - "grad_norm": 0.39061209559440613, - "learning_rate": 8.841223505521007e-06, - "loss": 0.0342, - "step": 65940 - }, - { - "epoch": 1.6739433938317045, - "grad_norm": 0.8187559843063354, - "learning_rate": 8.840377374455303e-06, - "loss": 0.0501, - "step": 65945 - }, - { - "epoch": 1.6740703134915598, - "grad_norm": 0.34921470284461975, - "learning_rate": 8.839531243389602e-06, - "loss": 0.0321, - "step": 65950 - }, - { - "epoch": 1.674197233151415, - "grad_norm": 0.29391464591026306, - "learning_rate": 8.838685112323899e-06, - "loss": 0.0302, - "step": 65955 - }, - { - "epoch": 1.6743241528112704, - "grad_norm": 0.6205379366874695, - "learning_rate": 8.837838981258197e-06, - "loss": 0.048, - "step": 65960 - }, - { - "epoch": 1.674451072471126, - "grad_norm": 0.30118992924690247, - "learning_rate": 8.836992850192495e-06, - "loss": 0.0324, - "step": 65965 - }, - { - "epoch": 1.6745779921309811, - "grad_norm": 0.3559834659099579, - "learning_rate": 8.836146719126794e-06, - "loss": 0.0383, - "step": 65970 - }, - { - "epoch": 1.6747049117908364, - "grad_norm": 0.4122340977191925, - "learning_rate": 8.83530058806109e-06, - "loss": 0.054, - "step": 65975 - }, - { - "epoch": 1.6748318314506916, - "grad_norm": 0.3905998170375824, - "learning_rate": 8.834454456995389e-06, - "loss": 0.0315, - "step": 65980 - }, - { - "epoch": 1.674958751110547, - "grad_norm": 0.6218470931053162, - "learning_rate": 8.833608325929687e-06, - "loss": 0.0391, - "step": 65985 - }, - { - "epoch": 1.6750856707704025, - "grad_norm": 0.5476656556129456, - "learning_rate": 8.832762194863986e-06, - "loss": 0.0439, - "step": 65990 - }, - { - "epoch": 1.6752125904302577, - "grad_norm": 0.2571362257003784, - "learning_rate": 8.831916063798282e-06, - "loss": 0.0355, - "step": 65995 - }, - { - "epoch": 1.675339510090113, - "grad_norm": 0.3388216197490692, - "learning_rate": 8.83106993273258e-06, - "loss": 0.0311, - "step": 66000 - }, - { - "epoch": 1.6754664297499682, - "grad_norm": 0.4164949953556061, - "learning_rate": 8.83022380166688e-06, - "loss": 0.0483, - "step": 66005 - }, - { - "epoch": 1.6755933494098236, - "grad_norm": 0.47087544202804565, - "learning_rate": 8.829377670601178e-06, - "loss": 0.041, - "step": 66010 - }, - { - "epoch": 1.6757202690696789, - "grad_norm": 0.2950464189052582, - "learning_rate": 8.828531539535474e-06, - "loss": 0.0514, - "step": 66015 - }, - { - "epoch": 1.6758471887295343, - "grad_norm": 0.5095893740653992, - "learning_rate": 8.827685408469773e-06, - "loss": 0.0257, - "step": 66020 - }, - { - "epoch": 1.6759741083893895, - "grad_norm": 0.7854303121566772, - "learning_rate": 8.82683927740407e-06, - "loss": 0.0375, - "step": 66025 - }, - { - "epoch": 1.6761010280492448, - "grad_norm": 0.9727424383163452, - "learning_rate": 8.825993146338368e-06, - "loss": 0.0452, - "step": 66030 - }, - { - "epoch": 1.6762279477091, - "grad_norm": 0.29191431403160095, - "learning_rate": 8.825147015272666e-06, - "loss": 0.0371, - "step": 66035 - }, - { - "epoch": 1.6763548673689554, - "grad_norm": 0.5080496072769165, - "learning_rate": 8.824300884206965e-06, - "loss": 0.0649, - "step": 66040 - }, - { - "epoch": 1.676481787028811, - "grad_norm": 0.7445560097694397, - "learning_rate": 8.823454753141261e-06, - "loss": 0.0579, - "step": 66045 - }, - { - "epoch": 1.6766087066886661, - "grad_norm": 0.6759259104728699, - "learning_rate": 8.82260862207556e-06, - "loss": 0.036, - "step": 66050 - }, - { - "epoch": 1.6767356263485214, - "grad_norm": 0.39209362864494324, - "learning_rate": 8.821762491009858e-06, - "loss": 0.0427, - "step": 66055 - }, - { - "epoch": 1.6768625460083766, - "grad_norm": 0.38782352209091187, - "learning_rate": 8.820916359944157e-06, - "loss": 0.036, - "step": 66060 - }, - { - "epoch": 1.676989465668232, - "grad_norm": 0.42194050550460815, - "learning_rate": 8.820070228878455e-06, - "loss": 0.0404, - "step": 66065 - }, - { - "epoch": 1.6771163853280873, - "grad_norm": 0.44654712080955505, - "learning_rate": 8.819224097812752e-06, - "loss": 0.0555, - "step": 66070 - }, - { - "epoch": 1.6772433049879427, - "grad_norm": 0.4860520660877228, - "learning_rate": 8.81837796674705e-06, - "loss": 0.0416, - "step": 66075 - }, - { - "epoch": 1.677370224647798, - "grad_norm": 0.2915968894958496, - "learning_rate": 8.817531835681348e-06, - "loss": 0.0287, - "step": 66080 - }, - { - "epoch": 1.6774971443076532, - "grad_norm": 0.7943536043167114, - "learning_rate": 8.816685704615647e-06, - "loss": 0.0296, - "step": 66085 - }, - { - "epoch": 1.6776240639675084, - "grad_norm": 0.4749396741390228, - "learning_rate": 8.815839573549944e-06, - "loss": 0.0472, - "step": 66090 - }, - { - "epoch": 1.6777509836273639, - "grad_norm": 0.3585720956325531, - "learning_rate": 8.814993442484242e-06, - "loss": 0.0349, - "step": 66095 - }, - { - "epoch": 1.6778779032872193, - "grad_norm": 0.6656414270401001, - "learning_rate": 8.814147311418539e-06, - "loss": 0.0288, - "step": 66100 - }, - { - "epoch": 1.6780048229470745, - "grad_norm": 0.5685589909553528, - "learning_rate": 8.813301180352837e-06, - "loss": 0.05, - "step": 66105 - }, - { - "epoch": 1.6781317426069298, - "grad_norm": 0.886485755443573, - "learning_rate": 8.812455049287135e-06, - "loss": 0.0327, - "step": 66110 - }, - { - "epoch": 1.678258662266785, - "grad_norm": 0.11526425182819366, - "learning_rate": 8.811608918221434e-06, - "loss": 0.0487, - "step": 66115 - }, - { - "epoch": 1.6783855819266404, - "grad_norm": 0.7152786254882812, - "learning_rate": 8.81076278715573e-06, - "loss": 0.0303, - "step": 66120 - }, - { - "epoch": 1.678512501586496, - "grad_norm": 0.46811434626579285, - "learning_rate": 8.809916656090029e-06, - "loss": 0.0316, - "step": 66125 - }, - { - "epoch": 1.6786394212463511, - "grad_norm": 0.4331802725791931, - "learning_rate": 8.809070525024327e-06, - "loss": 0.0301, - "step": 66130 - }, - { - "epoch": 1.6787663409062064, - "grad_norm": 0.7324458956718445, - "learning_rate": 8.808224393958626e-06, - "loss": 0.051, - "step": 66135 - }, - { - "epoch": 1.6788932605660616, - "grad_norm": 1.8500239849090576, - "learning_rate": 8.807378262892923e-06, - "loss": 0.0498, - "step": 66140 - }, - { - "epoch": 1.679020180225917, - "grad_norm": 0.4047989547252655, - "learning_rate": 8.806532131827221e-06, - "loss": 0.0322, - "step": 66145 - }, - { - "epoch": 1.6791470998857723, - "grad_norm": 0.6019065380096436, - "learning_rate": 8.80568600076152e-06, - "loss": 0.0479, - "step": 66150 - }, - { - "epoch": 1.6792740195456277, - "grad_norm": 0.4192666709423065, - "learning_rate": 8.804839869695818e-06, - "loss": 0.0578, - "step": 66155 - }, - { - "epoch": 1.679400939205483, - "grad_norm": 0.49737510085105896, - "learning_rate": 8.803993738630114e-06, - "loss": 0.0348, - "step": 66160 - }, - { - "epoch": 1.6795278588653382, - "grad_norm": 0.6024752855300903, - "learning_rate": 8.803147607564413e-06, - "loss": 0.0303, - "step": 66165 - }, - { - "epoch": 1.6796547785251934, - "grad_norm": 0.5140088200569153, - "learning_rate": 8.80230147649871e-06, - "loss": 0.0433, - "step": 66170 - }, - { - "epoch": 1.6797816981850489, - "grad_norm": 0.48640426993370056, - "learning_rate": 8.801455345433008e-06, - "loss": 0.0292, - "step": 66175 - }, - { - "epoch": 1.6799086178449043, - "grad_norm": 0.32676753401756287, - "learning_rate": 8.800609214367306e-06, - "loss": 0.0272, - "step": 66180 - }, - { - "epoch": 1.6800355375047595, - "grad_norm": 0.31193676590919495, - "learning_rate": 8.799763083301605e-06, - "loss": 0.0331, - "step": 66185 - }, - { - "epoch": 1.6801624571646148, - "grad_norm": 0.6638104319572449, - "learning_rate": 8.798916952235901e-06, - "loss": 0.0357, - "step": 66190 - }, - { - "epoch": 1.68028937682447, - "grad_norm": 1.1224009990692139, - "learning_rate": 8.7980708211702e-06, - "loss": 0.0325, - "step": 66195 - }, - { - "epoch": 1.6804162964843254, - "grad_norm": 0.416358083486557, - "learning_rate": 8.797224690104498e-06, - "loss": 0.03, - "step": 66200 - }, - { - "epoch": 1.680543216144181, - "grad_norm": 0.35072270035743713, - "learning_rate": 8.796378559038797e-06, - "loss": 0.0491, - "step": 66205 - }, - { - "epoch": 1.6806701358040361, - "grad_norm": 1.5931833982467651, - "learning_rate": 8.795532427973093e-06, - "loss": 0.039, - "step": 66210 - }, - { - "epoch": 1.6807970554638914, - "grad_norm": 0.3808829188346863, - "learning_rate": 8.794686296907392e-06, - "loss": 0.0673, - "step": 66215 - }, - { - "epoch": 1.6809239751237466, - "grad_norm": 1.33297860622406, - "learning_rate": 8.79384016584169e-06, - "loss": 0.0353, - "step": 66220 - }, - { - "epoch": 1.681050894783602, - "grad_norm": 0.6591484546661377, - "learning_rate": 8.792994034775989e-06, - "loss": 0.0381, - "step": 66225 - }, - { - "epoch": 1.6811778144434573, - "grad_norm": 0.5305606722831726, - "learning_rate": 8.792147903710285e-06, - "loss": 0.0474, - "step": 66230 - }, - { - "epoch": 1.6813047341033127, - "grad_norm": 0.496715784072876, - "learning_rate": 8.791301772644584e-06, - "loss": 0.0316, - "step": 66235 - }, - { - "epoch": 1.681431653763168, - "grad_norm": 0.3776744306087494, - "learning_rate": 8.79045564157888e-06, - "loss": 0.0353, - "step": 66240 - }, - { - "epoch": 1.6815585734230232, - "grad_norm": 0.9866896271705627, - "learning_rate": 8.789609510513179e-06, - "loss": 0.0281, - "step": 66245 - }, - { - "epoch": 1.6816854930828784, - "grad_norm": 0.1885860711336136, - "learning_rate": 8.788763379447477e-06, - "loss": 0.046, - "step": 66250 - }, - { - "epoch": 1.6818124127427339, - "grad_norm": 1.2631597518920898, - "learning_rate": 8.787917248381776e-06, - "loss": 0.0392, - "step": 66255 - }, - { - "epoch": 1.6819393324025893, - "grad_norm": 0.4177285134792328, - "learning_rate": 8.787071117316072e-06, - "loss": 0.0318, - "step": 66260 - }, - { - "epoch": 1.6820662520624445, - "grad_norm": 0.27848321199417114, - "learning_rate": 8.78622498625037e-06, - "loss": 0.0445, - "step": 66265 - }, - { - "epoch": 1.6821931717222998, - "grad_norm": 0.2013528198003769, - "learning_rate": 8.785378855184669e-06, - "loss": 0.0457, - "step": 66270 - }, - { - "epoch": 1.682320091382155, - "grad_norm": 0.6289142370223999, - "learning_rate": 8.784532724118968e-06, - "loss": 0.0512, - "step": 66275 - }, - { - "epoch": 1.6824470110420104, - "grad_norm": 0.4357750415802002, - "learning_rate": 8.783686593053264e-06, - "loss": 0.0657, - "step": 66280 - }, - { - "epoch": 1.6825739307018657, - "grad_norm": 0.6184892058372498, - "learning_rate": 8.782840461987563e-06, - "loss": 0.0519, - "step": 66285 - }, - { - "epoch": 1.6827008503617211, - "grad_norm": 0.40170618891716003, - "learning_rate": 8.781994330921861e-06, - "loss": 0.0442, - "step": 66290 - }, - { - "epoch": 1.6828277700215764, - "grad_norm": 0.4214608073234558, - "learning_rate": 8.78114819985616e-06, - "loss": 0.0491, - "step": 66295 - }, - { - "epoch": 1.6829546896814316, - "grad_norm": 1.061771273612976, - "learning_rate": 8.780302068790456e-06, - "loss": 0.0333, - "step": 66300 - }, - { - "epoch": 1.6830816093412868, - "grad_norm": 1.2863222360610962, - "learning_rate": 8.779455937724755e-06, - "loss": 0.0465, - "step": 66305 - }, - { - "epoch": 1.6832085290011423, - "grad_norm": 2.165456771850586, - "learning_rate": 8.778609806659051e-06, - "loss": 0.0392, - "step": 66310 - }, - { - "epoch": 1.6833354486609977, - "grad_norm": 0.35329729318618774, - "learning_rate": 8.77776367559335e-06, - "loss": 0.0438, - "step": 66315 - }, - { - "epoch": 1.683462368320853, - "grad_norm": 0.5363624095916748, - "learning_rate": 8.776917544527648e-06, - "loss": 0.0325, - "step": 66320 - }, - { - "epoch": 1.6835892879807082, - "grad_norm": 0.4739390015602112, - "learning_rate": 8.776071413461946e-06, - "loss": 0.0412, - "step": 66325 - }, - { - "epoch": 1.6837162076405634, - "grad_norm": 0.7049753069877625, - "learning_rate": 8.775225282396243e-06, - "loss": 0.0272, - "step": 66330 - }, - { - "epoch": 1.6838431273004189, - "grad_norm": 0.40764319896698, - "learning_rate": 8.774379151330542e-06, - "loss": 0.0423, - "step": 66335 - }, - { - "epoch": 1.6839700469602743, - "grad_norm": 3.0402259826660156, - "learning_rate": 8.77353302026484e-06, - "loss": 0.0321, - "step": 66340 - }, - { - "epoch": 1.6840969666201295, - "grad_norm": 0.7285351157188416, - "learning_rate": 8.772686889199138e-06, - "loss": 0.0459, - "step": 66345 - }, - { - "epoch": 1.6842238862799848, - "grad_norm": 0.37957075238227844, - "learning_rate": 8.771840758133435e-06, - "loss": 0.0374, - "step": 66350 - }, - { - "epoch": 1.68435080593984, - "grad_norm": 0.6791254281997681, - "learning_rate": 8.770994627067733e-06, - "loss": 0.0423, - "step": 66355 - }, - { - "epoch": 1.6844777255996954, - "grad_norm": 0.3676002025604248, - "learning_rate": 8.770148496002032e-06, - "loss": 0.0308, - "step": 66360 - }, - { - "epoch": 1.6846046452595507, - "grad_norm": 0.4358386695384979, - "learning_rate": 8.76930236493633e-06, - "loss": 0.0387, - "step": 66365 - }, - { - "epoch": 1.6847315649194061, - "grad_norm": 0.7564849257469177, - "learning_rate": 8.768456233870627e-06, - "loss": 0.032, - "step": 66370 - }, - { - "epoch": 1.6848584845792614, - "grad_norm": 0.6810858249664307, - "learning_rate": 8.767610102804925e-06, - "loss": 0.0377, - "step": 66375 - }, - { - "epoch": 1.6849854042391166, - "grad_norm": 1.196257472038269, - "learning_rate": 8.766763971739222e-06, - "loss": 0.0407, - "step": 66380 - }, - { - "epoch": 1.6851123238989718, - "grad_norm": 0.6888363361358643, - "learning_rate": 8.76591784067352e-06, - "loss": 0.0467, - "step": 66385 - }, - { - "epoch": 1.6852392435588273, - "grad_norm": 0.2957777678966522, - "learning_rate": 8.765071709607819e-06, - "loss": 0.0324, - "step": 66390 - }, - { - "epoch": 1.6853661632186827, - "grad_norm": 0.5124613046646118, - "learning_rate": 8.764225578542117e-06, - "loss": 0.0377, - "step": 66395 - }, - { - "epoch": 1.685493082878538, - "grad_norm": 0.4648364186286926, - "learning_rate": 8.763379447476414e-06, - "loss": 0.0273, - "step": 66400 - }, - { - "epoch": 1.6856200025383932, - "grad_norm": 0.4097599387168884, - "learning_rate": 8.762533316410712e-06, - "loss": 0.0387, - "step": 66405 - }, - { - "epoch": 1.6857469221982484, - "grad_norm": 0.8475550413131714, - "learning_rate": 8.76168718534501e-06, - "loss": 0.0453, - "step": 66410 - }, - { - "epoch": 1.6858738418581038, - "grad_norm": 0.48889002203941345, - "learning_rate": 8.76084105427931e-06, - "loss": 0.0487, - "step": 66415 - }, - { - "epoch": 1.686000761517959, - "grad_norm": 0.641187846660614, - "learning_rate": 8.759994923213606e-06, - "loss": 0.0443, - "step": 66420 - }, - { - "epoch": 1.6861276811778145, - "grad_norm": 0.5176681280136108, - "learning_rate": 8.759148792147904e-06, - "loss": 0.0261, - "step": 66425 - }, - { - "epoch": 1.6862546008376698, - "grad_norm": 0.5726014375686646, - "learning_rate": 8.758302661082203e-06, - "loss": 0.0444, - "step": 66430 - }, - { - "epoch": 1.686381520497525, - "grad_norm": 1.0256377458572388, - "learning_rate": 8.757456530016501e-06, - "loss": 0.0443, - "step": 66435 - }, - { - "epoch": 1.6865084401573802, - "grad_norm": 0.35093653202056885, - "learning_rate": 8.756610398950798e-06, - "loss": 0.057, - "step": 66440 - }, - { - "epoch": 1.6866353598172357, - "grad_norm": 0.5863595008850098, - "learning_rate": 8.755764267885096e-06, - "loss": 0.0402, - "step": 66445 - }, - { - "epoch": 1.6867622794770911, - "grad_norm": 0.33006179332733154, - "learning_rate": 8.754918136819393e-06, - "loss": 0.0432, - "step": 66450 - }, - { - "epoch": 1.6868891991369463, - "grad_norm": 0.424446702003479, - "learning_rate": 8.754072005753691e-06, - "loss": 0.0479, - "step": 66455 - }, - { - "epoch": 1.6870161187968016, - "grad_norm": 0.423319011926651, - "learning_rate": 8.75322587468799e-06, - "loss": 0.0421, - "step": 66460 - }, - { - "epoch": 1.6871430384566568, - "grad_norm": 0.4183114469051361, - "learning_rate": 8.752379743622288e-06, - "loss": 0.0367, - "step": 66465 - }, - { - "epoch": 1.6872699581165123, - "grad_norm": 0.3225723206996918, - "learning_rate": 8.751533612556585e-06, - "loss": 0.0357, - "step": 66470 - }, - { - "epoch": 1.6873968777763677, - "grad_norm": 0.5427568554878235, - "learning_rate": 8.750687481490883e-06, - "loss": 0.0344, - "step": 66475 - }, - { - "epoch": 1.687523797436223, - "grad_norm": 0.43606963753700256, - "learning_rate": 8.749841350425182e-06, - "loss": 0.0284, - "step": 66480 - }, - { - "epoch": 1.6876507170960782, - "grad_norm": 0.9313473105430603, - "learning_rate": 8.74899521935948e-06, - "loss": 0.0619, - "step": 66485 - }, - { - "epoch": 1.6877776367559334, - "grad_norm": 0.7552214860916138, - "learning_rate": 8.748149088293777e-06, - "loss": 0.056, - "step": 66490 - }, - { - "epoch": 1.6879045564157888, - "grad_norm": 0.3736780285835266, - "learning_rate": 8.747302957228075e-06, - "loss": 0.0469, - "step": 66495 - }, - { - "epoch": 1.688031476075644, - "grad_norm": 0.7157770395278931, - "learning_rate": 8.746456826162374e-06, - "loss": 0.053, - "step": 66500 - }, - { - "epoch": 1.6881583957354995, - "grad_norm": 0.39697855710983276, - "learning_rate": 8.745610695096672e-06, - "loss": 0.0478, - "step": 66505 - }, - { - "epoch": 1.6882853153953548, - "grad_norm": 0.4240015745162964, - "learning_rate": 8.744764564030969e-06, - "loss": 0.0307, - "step": 66510 - }, - { - "epoch": 1.68841223505521, - "grad_norm": 0.45520007610321045, - "learning_rate": 8.743918432965267e-06, - "loss": 0.0209, - "step": 66515 - }, - { - "epoch": 1.6885391547150652, - "grad_norm": 0.4305447041988373, - "learning_rate": 8.743072301899564e-06, - "loss": 0.0448, - "step": 66520 - }, - { - "epoch": 1.6886660743749207, - "grad_norm": 0.3651965856552124, - "learning_rate": 8.742226170833862e-06, - "loss": 0.0534, - "step": 66525 - }, - { - "epoch": 1.6887929940347761, - "grad_norm": 0.32513177394866943, - "learning_rate": 8.74138003976816e-06, - "loss": 0.0203, - "step": 66530 - }, - { - "epoch": 1.6889199136946313, - "grad_norm": 0.34952595829963684, - "learning_rate": 8.740533908702459e-06, - "loss": 0.0395, - "step": 66535 - }, - { - "epoch": 1.6890468333544866, - "grad_norm": 0.42108404636383057, - "learning_rate": 8.739687777636756e-06, - "loss": 0.0424, - "step": 66540 - }, - { - "epoch": 1.6891737530143418, - "grad_norm": 0.5283597111701965, - "learning_rate": 8.738841646571054e-06, - "loss": 0.043, - "step": 66545 - }, - { - "epoch": 1.6893006726741973, - "grad_norm": 0.2976240813732147, - "learning_rate": 8.737995515505353e-06, - "loss": 0.0187, - "step": 66550 - }, - { - "epoch": 1.6894275923340527, - "grad_norm": 0.36675575375556946, - "learning_rate": 8.737149384439651e-06, - "loss": 0.0284, - "step": 66555 - }, - { - "epoch": 1.689554511993908, - "grad_norm": 0.4231639802455902, - "learning_rate": 8.736303253373948e-06, - "loss": 0.0251, - "step": 66560 - }, - { - "epoch": 1.6896814316537632, - "grad_norm": 0.2986691892147064, - "learning_rate": 8.735457122308246e-06, - "loss": 0.0395, - "step": 66565 - }, - { - "epoch": 1.6898083513136184, - "grad_norm": 0.47945672273635864, - "learning_rate": 8.734610991242544e-06, - "loss": 0.024, - "step": 66570 - }, - { - "epoch": 1.6899352709734738, - "grad_norm": 0.39667078852653503, - "learning_rate": 8.733764860176843e-06, - "loss": 0.0238, - "step": 66575 - }, - { - "epoch": 1.690062190633329, - "grad_norm": 0.399665892124176, - "learning_rate": 8.732918729111141e-06, - "loss": 0.0367, - "step": 66580 - }, - { - "epoch": 1.6901891102931845, - "grad_norm": 0.4328327476978302, - "learning_rate": 8.732072598045438e-06, - "loss": 0.0287, - "step": 66585 - }, - { - "epoch": 1.6903160299530398, - "grad_norm": 0.48048537969589233, - "learning_rate": 8.731226466979736e-06, - "loss": 0.0368, - "step": 66590 - }, - { - "epoch": 1.690442949612895, - "grad_norm": 1.1321321725845337, - "learning_rate": 8.730380335914033e-06, - "loss": 0.0393, - "step": 66595 - }, - { - "epoch": 1.6905698692727502, - "grad_norm": 0.5709215402603149, - "learning_rate": 8.729534204848331e-06, - "loss": 0.0322, - "step": 66600 - }, - { - "epoch": 1.6906967889326057, - "grad_norm": 0.4793839454650879, - "learning_rate": 8.72868807378263e-06, - "loss": 0.0293, - "step": 66605 - }, - { - "epoch": 1.6908237085924611, - "grad_norm": 0.5599561333656311, - "learning_rate": 8.727841942716928e-06, - "loss": 0.0398, - "step": 66610 - }, - { - "epoch": 1.6909506282523163, - "grad_norm": 0.5616392493247986, - "learning_rate": 8.726995811651225e-06, - "loss": 0.039, - "step": 66615 - }, - { - "epoch": 1.6910775479121716, - "grad_norm": 0.5224575400352478, - "learning_rate": 8.726149680585523e-06, - "loss": 0.0391, - "step": 66620 - }, - { - "epoch": 1.6912044675720268, - "grad_norm": 1.017587661743164, - "learning_rate": 8.725303549519822e-06, - "loss": 0.0357, - "step": 66625 - }, - { - "epoch": 1.6913313872318823, - "grad_norm": 0.38529181480407715, - "learning_rate": 8.72445741845412e-06, - "loss": 0.0399, - "step": 66630 - }, - { - "epoch": 1.6914583068917375, - "grad_norm": 0.31633976101875305, - "learning_rate": 8.723611287388417e-06, - "loss": 0.0361, - "step": 66635 - }, - { - "epoch": 1.691585226551593, - "grad_norm": 0.1807829886674881, - "learning_rate": 8.722765156322715e-06, - "loss": 0.0364, - "step": 66640 - }, - { - "epoch": 1.6917121462114482, - "grad_norm": 0.47134846448898315, - "learning_rate": 8.721919025257014e-06, - "loss": 0.0513, - "step": 66645 - }, - { - "epoch": 1.6918390658713034, - "grad_norm": 0.529129147529602, - "learning_rate": 8.721072894191312e-06, - "loss": 0.0488, - "step": 66650 - }, - { - "epoch": 1.6919659855311586, - "grad_norm": 0.7438637018203735, - "learning_rate": 8.720226763125609e-06, - "loss": 0.0425, - "step": 66655 - }, - { - "epoch": 1.692092905191014, - "grad_norm": 0.7729272246360779, - "learning_rate": 8.719380632059907e-06, - "loss": 0.0325, - "step": 66660 - }, - { - "epoch": 1.6922198248508695, - "grad_norm": 0.21749162673950195, - "learning_rate": 8.718534500994204e-06, - "loss": 0.0362, - "step": 66665 - }, - { - "epoch": 1.6923467445107248, - "grad_norm": 0.40801137685775757, - "learning_rate": 8.717688369928502e-06, - "loss": 0.0492, - "step": 66670 - }, - { - "epoch": 1.69247366417058, - "grad_norm": 0.5005106925964355, - "learning_rate": 8.7168422388628e-06, - "loss": 0.0377, - "step": 66675 - }, - { - "epoch": 1.6926005838304352, - "grad_norm": 0.22861744463443756, - "learning_rate": 8.715996107797099e-06, - "loss": 0.0363, - "step": 66680 - }, - { - "epoch": 1.6927275034902907, - "grad_norm": 0.5332808494567871, - "learning_rate": 8.715149976731396e-06, - "loss": 0.0482, - "step": 66685 - }, - { - "epoch": 1.6928544231501461, - "grad_norm": 0.39794498682022095, - "learning_rate": 8.714303845665694e-06, - "loss": 0.046, - "step": 66690 - }, - { - "epoch": 1.6929813428100013, - "grad_norm": 0.32720890641212463, - "learning_rate": 8.713457714599993e-06, - "loss": 0.0465, - "step": 66695 - }, - { - "epoch": 1.6931082624698566, - "grad_norm": 0.3819579482078552, - "learning_rate": 8.712611583534291e-06, - "loss": 0.0451, - "step": 66700 - }, - { - "epoch": 1.6932351821297118, - "grad_norm": 0.5155336260795593, - "learning_rate": 8.711765452468588e-06, - "loss": 0.0551, - "step": 66705 - }, - { - "epoch": 1.6933621017895673, - "grad_norm": 0.6976820826530457, - "learning_rate": 8.710919321402886e-06, - "loss": 0.0397, - "step": 66710 - }, - { - "epoch": 1.6934890214494225, - "grad_norm": 0.45433974266052246, - "learning_rate": 8.710073190337185e-06, - "loss": 0.0543, - "step": 66715 - }, - { - "epoch": 1.693615941109278, - "grad_norm": 0.5898611545562744, - "learning_rate": 8.709227059271483e-06, - "loss": 0.03, - "step": 66720 - }, - { - "epoch": 1.6937428607691332, - "grad_norm": 1.0682870149612427, - "learning_rate": 8.70838092820578e-06, - "loss": 0.0451, - "step": 66725 - }, - { - "epoch": 1.6938697804289884, - "grad_norm": 0.4572325646877289, - "learning_rate": 8.707534797140078e-06, - "loss": 0.0267, - "step": 66730 - }, - { - "epoch": 1.6939967000888436, - "grad_norm": 0.24679934978485107, - "learning_rate": 8.706688666074375e-06, - "loss": 0.0486, - "step": 66735 - }, - { - "epoch": 1.694123619748699, - "grad_norm": 0.5378508567810059, - "learning_rate": 8.705842535008673e-06, - "loss": 0.0297, - "step": 66740 - }, - { - "epoch": 1.6942505394085545, - "grad_norm": 0.6316224932670593, - "learning_rate": 8.704996403942972e-06, - "loss": 0.0306, - "step": 66745 - }, - { - "epoch": 1.6943774590684098, - "grad_norm": 0.4232020974159241, - "learning_rate": 8.70415027287727e-06, - "loss": 0.0437, - "step": 66750 - }, - { - "epoch": 1.694504378728265, - "grad_norm": 0.5588883757591248, - "learning_rate": 8.703304141811567e-06, - "loss": 0.0369, - "step": 66755 - }, - { - "epoch": 1.6946312983881202, - "grad_norm": 0.5804545879364014, - "learning_rate": 8.702458010745865e-06, - "loss": 0.036, - "step": 66760 - }, - { - "epoch": 1.6947582180479757, - "grad_norm": 0.375618577003479, - "learning_rate": 8.701611879680163e-06, - "loss": 0.0641, - "step": 66765 - }, - { - "epoch": 1.694885137707831, - "grad_norm": 0.5220339894294739, - "learning_rate": 8.700765748614462e-06, - "loss": 0.0371, - "step": 66770 - }, - { - "epoch": 1.6950120573676863, - "grad_norm": 0.3992875814437866, - "learning_rate": 8.699919617548759e-06, - "loss": 0.0501, - "step": 66775 - }, - { - "epoch": 1.6951389770275416, - "grad_norm": 0.2506660521030426, - "learning_rate": 8.699073486483057e-06, - "loss": 0.0342, - "step": 66780 - }, - { - "epoch": 1.6952658966873968, - "grad_norm": 0.44462573528289795, - "learning_rate": 8.698227355417355e-06, - "loss": 0.0408, - "step": 66785 - }, - { - "epoch": 1.695392816347252, - "grad_norm": 0.41986700892448425, - "learning_rate": 8.697381224351654e-06, - "loss": 0.0301, - "step": 66790 - }, - { - "epoch": 1.6955197360071075, - "grad_norm": 0.5056749582290649, - "learning_rate": 8.69653509328595e-06, - "loss": 0.0367, - "step": 66795 - }, - { - "epoch": 1.695646655666963, - "grad_norm": 0.5079525709152222, - "learning_rate": 8.695688962220249e-06, - "loss": 0.0462, - "step": 66800 - }, - { - "epoch": 1.6957735753268182, - "grad_norm": 0.45803892612457275, - "learning_rate": 8.694842831154546e-06, - "loss": 0.0432, - "step": 66805 - }, - { - "epoch": 1.6959004949866734, - "grad_norm": 0.5618603229522705, - "learning_rate": 8.693996700088844e-06, - "loss": 0.0438, - "step": 66810 - }, - { - "epoch": 1.6960274146465286, - "grad_norm": 0.532414436340332, - "learning_rate": 8.693150569023142e-06, - "loss": 0.0411, - "step": 66815 - }, - { - "epoch": 1.696154334306384, - "grad_norm": 0.720840334892273, - "learning_rate": 8.69230443795744e-06, - "loss": 0.0374, - "step": 66820 - }, - { - "epoch": 1.6962812539662395, - "grad_norm": 0.43845832347869873, - "learning_rate": 8.691458306891738e-06, - "loss": 0.0538, - "step": 66825 - }, - { - "epoch": 1.6964081736260948, - "grad_norm": 0.5285247564315796, - "learning_rate": 8.690612175826036e-06, - "loss": 0.036, - "step": 66830 - }, - { - "epoch": 1.69653509328595, - "grad_norm": 1.7410032749176025, - "learning_rate": 8.689766044760334e-06, - "loss": 0.0454, - "step": 66835 - }, - { - "epoch": 1.6966620129458052, - "grad_norm": 0.4127763509750366, - "learning_rate": 8.688919913694633e-06, - "loss": 0.0346, - "step": 66840 - }, - { - "epoch": 1.6967889326056607, - "grad_norm": 0.6610112190246582, - "learning_rate": 8.68807378262893e-06, - "loss": 0.0306, - "step": 66845 - }, - { - "epoch": 1.696915852265516, - "grad_norm": 0.49820899963378906, - "learning_rate": 8.687227651563228e-06, - "loss": 0.0337, - "step": 66850 - }, - { - "epoch": 1.6970427719253713, - "grad_norm": 0.4301708936691284, - "learning_rate": 8.686381520497526e-06, - "loss": 0.0407, - "step": 66855 - }, - { - "epoch": 1.6971696915852266, - "grad_norm": 0.4637308418750763, - "learning_rate": 8.685535389431825e-06, - "loss": 0.0334, - "step": 66860 - }, - { - "epoch": 1.6972966112450818, - "grad_norm": 0.47773435711860657, - "learning_rate": 8.684689258366121e-06, - "loss": 0.0558, - "step": 66865 - }, - { - "epoch": 1.697423530904937, - "grad_norm": 0.640207052230835, - "learning_rate": 8.68384312730042e-06, - "loss": 0.0344, - "step": 66870 - }, - { - "epoch": 1.6975504505647925, - "grad_norm": 0.32884907722473145, - "learning_rate": 8.682996996234716e-06, - "loss": 0.0642, - "step": 66875 - }, - { - "epoch": 1.697677370224648, - "grad_norm": 0.3368505537509918, - "learning_rate": 8.682150865169015e-06, - "loss": 0.0365, - "step": 66880 - }, - { - "epoch": 1.6978042898845032, - "grad_norm": 0.3208058774471283, - "learning_rate": 8.681304734103313e-06, - "loss": 0.0196, - "step": 66885 - }, - { - "epoch": 1.6979312095443584, - "grad_norm": 0.4512471854686737, - "learning_rate": 8.680458603037612e-06, - "loss": 0.0454, - "step": 66890 - }, - { - "epoch": 1.6980581292042136, - "grad_norm": 0.9348480105400085, - "learning_rate": 8.679612471971908e-06, - "loss": 0.039, - "step": 66895 - }, - { - "epoch": 1.698185048864069, - "grad_norm": 0.42008835077285767, - "learning_rate": 8.678766340906207e-06, - "loss": 0.0351, - "step": 66900 - }, - { - "epoch": 1.6983119685239245, - "grad_norm": 0.3385193347930908, - "learning_rate": 8.677920209840505e-06, - "loss": 0.0283, - "step": 66905 - }, - { - "epoch": 1.6984388881837797, - "grad_norm": 0.6068434715270996, - "learning_rate": 8.677074078774804e-06, - "loss": 0.0447, - "step": 66910 - }, - { - "epoch": 1.698565807843635, - "grad_norm": 0.654518187046051, - "learning_rate": 8.6762279477091e-06, - "loss": 0.0374, - "step": 66915 - }, - { - "epoch": 1.6986927275034902, - "grad_norm": 0.34086495637893677, - "learning_rate": 8.675381816643399e-06, - "loss": 0.0482, - "step": 66920 - }, - { - "epoch": 1.6988196471633454, - "grad_norm": 0.6360894441604614, - "learning_rate": 8.674535685577697e-06, - "loss": 0.0509, - "step": 66925 - }, - { - "epoch": 1.6989465668232009, - "grad_norm": 0.37078285217285156, - "learning_rate": 8.673689554511995e-06, - "loss": 0.0299, - "step": 66930 - }, - { - "epoch": 1.6990734864830563, - "grad_norm": 0.49737128615379333, - "learning_rate": 8.672843423446292e-06, - "loss": 0.0461, - "step": 66935 - }, - { - "epoch": 1.6992004061429116, - "grad_norm": 0.6854659914970398, - "learning_rate": 8.67199729238059e-06, - "loss": 0.0489, - "step": 66940 - }, - { - "epoch": 1.6993273258027668, - "grad_norm": 2.074770450592041, - "learning_rate": 8.671151161314887e-06, - "loss": 0.0478, - "step": 66945 - }, - { - "epoch": 1.699454245462622, - "grad_norm": 0.4026559591293335, - "learning_rate": 8.670305030249186e-06, - "loss": 0.0403, - "step": 66950 - }, - { - "epoch": 1.6995811651224775, - "grad_norm": 0.36125630140304565, - "learning_rate": 8.669458899183484e-06, - "loss": 0.041, - "step": 66955 - }, - { - "epoch": 1.699708084782333, - "grad_norm": 0.4974239766597748, - "learning_rate": 8.668612768117783e-06, - "loss": 0.0447, - "step": 66960 - }, - { - "epoch": 1.6998350044421882, - "grad_norm": 0.35104668140411377, - "learning_rate": 8.66776663705208e-06, - "loss": 0.0493, - "step": 66965 - }, - { - "epoch": 1.6999619241020434, - "grad_norm": 0.16519971191883087, - "learning_rate": 8.666920505986378e-06, - "loss": 0.0297, - "step": 66970 - }, - { - "epoch": 1.7000888437618986, - "grad_norm": 0.6799324154853821, - "learning_rate": 8.666074374920676e-06, - "loss": 0.0338, - "step": 66975 - }, - { - "epoch": 1.700215763421754, - "grad_norm": 0.502739667892456, - "learning_rate": 8.665228243854974e-06, - "loss": 0.0479, - "step": 66980 - }, - { - "epoch": 1.7003426830816093, - "grad_norm": 0.3569919466972351, - "learning_rate": 8.664382112789271e-06, - "loss": 0.0498, - "step": 66985 - }, - { - "epoch": 1.7004696027414647, - "grad_norm": 0.5881399512290955, - "learning_rate": 8.66353598172357e-06, - "loss": 0.04, - "step": 66990 - }, - { - "epoch": 1.70059652240132, - "grad_norm": 0.24494007229804993, - "learning_rate": 8.662689850657868e-06, - "loss": 0.0458, - "step": 66995 - }, - { - "epoch": 1.7007234420611752, - "grad_norm": 0.5692609548568726, - "learning_rate": 8.661843719592166e-06, - "loss": 0.0519, - "step": 67000 - }, - { - "epoch": 1.7008503617210304, - "grad_norm": 0.3236960768699646, - "learning_rate": 8.660997588526463e-06, - "loss": 0.0336, - "step": 67005 - }, - { - "epoch": 1.7009772813808859, - "grad_norm": 0.6571763157844543, - "learning_rate": 8.660151457460761e-06, - "loss": 0.0467, - "step": 67010 - }, - { - "epoch": 1.7011042010407413, - "grad_norm": 0.4435391128063202, - "learning_rate": 8.659305326395058e-06, - "loss": 0.027, - "step": 67015 - }, - { - "epoch": 1.7012311207005966, - "grad_norm": 0.28625503182411194, - "learning_rate": 8.658459195329357e-06, - "loss": 0.0376, - "step": 67020 - }, - { - "epoch": 1.7013580403604518, - "grad_norm": 0.9783508777618408, - "learning_rate": 8.657613064263655e-06, - "loss": 0.031, - "step": 67025 - }, - { - "epoch": 1.701484960020307, - "grad_norm": 0.09554558992385864, - "learning_rate": 8.656766933197953e-06, - "loss": 0.0428, - "step": 67030 - }, - { - "epoch": 1.7016118796801625, - "grad_norm": 0.5458802580833435, - "learning_rate": 8.65592080213225e-06, - "loss": 0.0395, - "step": 67035 - }, - { - "epoch": 1.701738799340018, - "grad_norm": 0.6194077134132385, - "learning_rate": 8.655074671066548e-06, - "loss": 0.0574, - "step": 67040 - }, - { - "epoch": 1.7018657189998732, - "grad_norm": 0.5261430144309998, - "learning_rate": 8.654228540000847e-06, - "loss": 0.0399, - "step": 67045 - }, - { - "epoch": 1.7019926386597284, - "grad_norm": 0.9145923852920532, - "learning_rate": 8.653382408935145e-06, - "loss": 0.0432, - "step": 67050 - }, - { - "epoch": 1.7021195583195836, - "grad_norm": 0.4009668529033661, - "learning_rate": 8.652536277869442e-06, - "loss": 0.031, - "step": 67055 - }, - { - "epoch": 1.702246477979439, - "grad_norm": 0.2679159939289093, - "learning_rate": 8.65169014680374e-06, - "loss": 0.0255, - "step": 67060 - }, - { - "epoch": 1.7023733976392943, - "grad_norm": 0.82293701171875, - "learning_rate": 8.650844015738039e-06, - "loss": 0.0393, - "step": 67065 - }, - { - "epoch": 1.7025003172991497, - "grad_norm": 0.3479823172092438, - "learning_rate": 8.649997884672337e-06, - "loss": 0.0387, - "step": 67070 - }, - { - "epoch": 1.702627236959005, - "grad_norm": 0.32543447613716125, - "learning_rate": 8.649151753606634e-06, - "loss": 0.0426, - "step": 67075 - }, - { - "epoch": 1.7027541566188602, - "grad_norm": 0.40509557723999023, - "learning_rate": 8.648305622540932e-06, - "loss": 0.0394, - "step": 67080 - }, - { - "epoch": 1.7028810762787154, - "grad_norm": 0.5838325023651123, - "learning_rate": 8.647459491475229e-06, - "loss": 0.0374, - "step": 67085 - }, - { - "epoch": 1.7030079959385709, - "grad_norm": 0.5330148935317993, - "learning_rate": 8.646613360409527e-06, - "loss": 0.0404, - "step": 67090 - }, - { - "epoch": 1.7031349155984263, - "grad_norm": 2.0086278915405273, - "learning_rate": 8.645767229343826e-06, - "loss": 0.0282, - "step": 67095 - }, - { - "epoch": 1.7032618352582816, - "grad_norm": 0.2905839681625366, - "learning_rate": 8.644921098278124e-06, - "loss": 0.0329, - "step": 67100 - }, - { - "epoch": 1.7033887549181368, - "grad_norm": 0.35004523396492004, - "learning_rate": 8.644074967212423e-06, - "loss": 0.0501, - "step": 67105 - }, - { - "epoch": 1.703515674577992, - "grad_norm": 0.26358285546302795, - "learning_rate": 8.64322883614672e-06, - "loss": 0.0509, - "step": 67110 - }, - { - "epoch": 1.7036425942378475, - "grad_norm": 0.4906579852104187, - "learning_rate": 8.642382705081018e-06, - "loss": 0.0398, - "step": 67115 - }, - { - "epoch": 1.7037695138977027, - "grad_norm": 0.32191628217697144, - "learning_rate": 8.641536574015316e-06, - "loss": 0.0284, - "step": 67120 - }, - { - "epoch": 1.7038964335575582, - "grad_norm": 0.47358715534210205, - "learning_rate": 8.640690442949615e-06, - "loss": 0.0397, - "step": 67125 - }, - { - "epoch": 1.7040233532174134, - "grad_norm": 0.4499819278717041, - "learning_rate": 8.639844311883911e-06, - "loss": 0.0283, - "step": 67130 - }, - { - "epoch": 1.7041502728772686, - "grad_norm": 0.5054529309272766, - "learning_rate": 8.63899818081821e-06, - "loss": 0.0525, - "step": 67135 - }, - { - "epoch": 1.7042771925371238, - "grad_norm": 0.2551015615463257, - "learning_rate": 8.638152049752508e-06, - "loss": 0.0393, - "step": 67140 - }, - { - "epoch": 1.7044041121969793, - "grad_norm": 0.5458147525787354, - "learning_rate": 8.637305918686806e-06, - "loss": 0.0434, - "step": 67145 - }, - { - "epoch": 1.7045310318568347, - "grad_norm": 0.36381474137306213, - "learning_rate": 8.636459787621103e-06, - "loss": 0.0399, - "step": 67150 - }, - { - "epoch": 1.70465795151669, - "grad_norm": 0.6682787537574768, - "learning_rate": 8.635613656555402e-06, - "loss": 0.0482, - "step": 67155 - }, - { - "epoch": 1.7047848711765452, - "grad_norm": 0.6743947863578796, - "learning_rate": 8.634767525489698e-06, - "loss": 0.0411, - "step": 67160 - }, - { - "epoch": 1.7049117908364004, - "grad_norm": 0.5750877857208252, - "learning_rate": 8.633921394423997e-06, - "loss": 0.0486, - "step": 67165 - }, - { - "epoch": 1.7050387104962559, - "grad_norm": 0.3522535562515259, - "learning_rate": 8.633075263358295e-06, - "loss": 0.0424, - "step": 67170 - }, - { - "epoch": 1.7051656301561113, - "grad_norm": 0.38821011781692505, - "learning_rate": 8.632229132292593e-06, - "loss": 0.0385, - "step": 67175 - }, - { - "epoch": 1.7052925498159666, - "grad_norm": 0.2804947793483734, - "learning_rate": 8.63138300122689e-06, - "loss": 0.0392, - "step": 67180 - }, - { - "epoch": 1.7054194694758218, - "grad_norm": 0.5914779901504517, - "learning_rate": 8.630536870161189e-06, - "loss": 0.0324, - "step": 67185 - }, - { - "epoch": 1.705546389135677, - "grad_norm": 0.5702779293060303, - "learning_rate": 8.629690739095487e-06, - "loss": 0.0557, - "step": 67190 - }, - { - "epoch": 1.7056733087955325, - "grad_norm": 0.4389922022819519, - "learning_rate": 8.628844608029785e-06, - "loss": 0.0363, - "step": 67195 - }, - { - "epoch": 1.7058002284553877, - "grad_norm": 0.5129729509353638, - "learning_rate": 8.627998476964082e-06, - "loss": 0.039, - "step": 67200 - }, - { - "epoch": 1.7059271481152432, - "grad_norm": 0.39568811655044556, - "learning_rate": 8.62715234589838e-06, - "loss": 0.038, - "step": 67205 - }, - { - "epoch": 1.7060540677750984, - "grad_norm": 0.27893438935279846, - "learning_rate": 8.626306214832679e-06, - "loss": 0.0333, - "step": 67210 - }, - { - "epoch": 1.7061809874349536, - "grad_norm": 0.22289088368415833, - "learning_rate": 8.625460083766977e-06, - "loss": 0.038, - "step": 67215 - }, - { - "epoch": 1.7063079070948088, - "grad_norm": 0.6347943544387817, - "learning_rate": 8.624613952701274e-06, - "loss": 0.0422, - "step": 67220 - }, - { - "epoch": 1.7064348267546643, - "grad_norm": 0.5310726761817932, - "learning_rate": 8.623767821635572e-06, - "loss": 0.0359, - "step": 67225 - }, - { - "epoch": 1.7065617464145197, - "grad_norm": 0.4519781768321991, - "learning_rate": 8.622921690569869e-06, - "loss": 0.0268, - "step": 67230 - }, - { - "epoch": 1.706688666074375, - "grad_norm": 0.34778520464897156, - "learning_rate": 8.622075559504168e-06, - "loss": 0.0437, - "step": 67235 - }, - { - "epoch": 1.7068155857342302, - "grad_norm": 0.35048574209213257, - "learning_rate": 8.621229428438466e-06, - "loss": 0.0216, - "step": 67240 - }, - { - "epoch": 1.7069425053940854, - "grad_norm": 0.3596162796020508, - "learning_rate": 8.620383297372764e-06, - "loss": 0.04, - "step": 67245 - }, - { - "epoch": 1.7070694250539409, - "grad_norm": 0.6325759291648865, - "learning_rate": 8.619537166307061e-06, - "loss": 0.0417, - "step": 67250 - }, - { - "epoch": 1.7071963447137963, - "grad_norm": 0.3925301134586334, - "learning_rate": 8.61869103524136e-06, - "loss": 0.0371, - "step": 67255 - }, - { - "epoch": 1.7073232643736516, - "grad_norm": 0.4464530348777771, - "learning_rate": 8.617844904175658e-06, - "loss": 0.0515, - "step": 67260 - }, - { - "epoch": 1.7074501840335068, - "grad_norm": 0.29672253131866455, - "learning_rate": 8.616998773109956e-06, - "loss": 0.0253, - "step": 67265 - }, - { - "epoch": 1.707577103693362, - "grad_norm": 0.521115779876709, - "learning_rate": 8.616152642044253e-06, - "loss": 0.0399, - "step": 67270 - }, - { - "epoch": 1.7077040233532172, - "grad_norm": 0.45761939883232117, - "learning_rate": 8.615306510978551e-06, - "loss": 0.0421, - "step": 67275 - }, - { - "epoch": 1.7078309430130727, - "grad_norm": 0.686006486415863, - "learning_rate": 8.61446037991285e-06, - "loss": 0.0406, - "step": 67280 - }, - { - "epoch": 1.7079578626729282, - "grad_norm": 0.31226980686187744, - "learning_rate": 8.613614248847148e-06, - "loss": 0.0295, - "step": 67285 - }, - { - "epoch": 1.7080847823327834, - "grad_norm": 0.8534274697303772, - "learning_rate": 8.612768117781445e-06, - "loss": 0.0351, - "step": 67290 - }, - { - "epoch": 1.7082117019926386, - "grad_norm": 0.36923375725746155, - "learning_rate": 8.611921986715743e-06, - "loss": 0.0401, - "step": 67295 - }, - { - "epoch": 1.7083386216524938, - "grad_norm": 0.6766054034233093, - "learning_rate": 8.61107585565004e-06, - "loss": 0.0439, - "step": 67300 - }, - { - "epoch": 1.7084655413123493, - "grad_norm": 0.29809677600860596, - "learning_rate": 8.610229724584338e-06, - "loss": 0.0379, - "step": 67305 - }, - { - "epoch": 1.7085924609722047, - "grad_norm": 0.3006998300552368, - "learning_rate": 8.609383593518637e-06, - "loss": 0.033, - "step": 67310 - }, - { - "epoch": 1.70871938063206, - "grad_norm": 0.390445739030838, - "learning_rate": 8.608537462452935e-06, - "loss": 0.0286, - "step": 67315 - }, - { - "epoch": 1.7088463002919152, - "grad_norm": 0.6080141067504883, - "learning_rate": 8.607691331387232e-06, - "loss": 0.0444, - "step": 67320 - }, - { - "epoch": 1.7089732199517704, - "grad_norm": 0.7185773849487305, - "learning_rate": 8.60684520032153e-06, - "loss": 0.0467, - "step": 67325 - }, - { - "epoch": 1.7091001396116259, - "grad_norm": 0.42156165838241577, - "learning_rate": 8.605999069255829e-06, - "loss": 0.0358, - "step": 67330 - }, - { - "epoch": 1.709227059271481, - "grad_norm": 0.21136999130249023, - "learning_rate": 8.605152938190127e-06, - "loss": 0.042, - "step": 67335 - }, - { - "epoch": 1.7093539789313366, - "grad_norm": 0.6101731657981873, - "learning_rate": 8.604306807124424e-06, - "loss": 0.0417, - "step": 67340 - }, - { - "epoch": 1.7094808985911918, - "grad_norm": 0.44185465574264526, - "learning_rate": 8.603460676058722e-06, - "loss": 0.0274, - "step": 67345 - }, - { - "epoch": 1.709607818251047, - "grad_norm": 0.4041770398616791, - "learning_rate": 8.60261454499302e-06, - "loss": 0.0394, - "step": 67350 - }, - { - "epoch": 1.7097347379109022, - "grad_norm": 0.2922844886779785, - "learning_rate": 8.601768413927319e-06, - "loss": 0.025, - "step": 67355 - }, - { - "epoch": 1.7098616575707577, - "grad_norm": 0.44869354367256165, - "learning_rate": 8.600922282861616e-06, - "loss": 0.0332, - "step": 67360 - }, - { - "epoch": 1.7099885772306131, - "grad_norm": 0.48084068298339844, - "learning_rate": 8.600076151795914e-06, - "loss": 0.0303, - "step": 67365 - }, - { - "epoch": 1.7101154968904684, - "grad_norm": 0.4333471357822418, - "learning_rate": 8.59923002073021e-06, - "loss": 0.0507, - "step": 67370 - }, - { - "epoch": 1.7102424165503236, - "grad_norm": 0.5578636527061462, - "learning_rate": 8.59838388966451e-06, - "loss": 0.0478, - "step": 67375 - }, - { - "epoch": 1.7103693362101788, - "grad_norm": 0.32347333431243896, - "learning_rate": 8.597537758598808e-06, - "loss": 0.0324, - "step": 67380 - }, - { - "epoch": 1.7104962558700343, - "grad_norm": 0.8106073141098022, - "learning_rate": 8.596691627533106e-06, - "loss": 0.0458, - "step": 67385 - }, - { - "epoch": 1.7106231755298897, - "grad_norm": 0.30845651030540466, - "learning_rate": 8.595845496467403e-06, - "loss": 0.015, - "step": 67390 - }, - { - "epoch": 1.710750095189745, - "grad_norm": 0.55452960729599, - "learning_rate": 8.594999365401701e-06, - "loss": 0.0395, - "step": 67395 - }, - { - "epoch": 1.7108770148496002, - "grad_norm": 0.45646896958351135, - "learning_rate": 8.594153234336e-06, - "loss": 0.0465, - "step": 67400 - }, - { - "epoch": 1.7110039345094554, - "grad_norm": 0.41648226976394653, - "learning_rate": 8.593307103270298e-06, - "loss": 0.0487, - "step": 67405 - }, - { - "epoch": 1.7111308541693109, - "grad_norm": 0.36613428592681885, - "learning_rate": 8.592460972204595e-06, - "loss": 0.0457, - "step": 67410 - }, - { - "epoch": 1.711257773829166, - "grad_norm": 0.4015010893344879, - "learning_rate": 8.591614841138893e-06, - "loss": 0.0303, - "step": 67415 - }, - { - "epoch": 1.7113846934890216, - "grad_norm": 0.38568079471588135, - "learning_rate": 8.590768710073191e-06, - "loss": 0.0401, - "step": 67420 - }, - { - "epoch": 1.7115116131488768, - "grad_norm": 0.536830484867096, - "learning_rate": 8.58992257900749e-06, - "loss": 0.0248, - "step": 67425 - }, - { - "epoch": 1.711638532808732, - "grad_norm": 0.49493691325187683, - "learning_rate": 8.589076447941787e-06, - "loss": 0.0345, - "step": 67430 - }, - { - "epoch": 1.7117654524685872, - "grad_norm": 0.7382254004478455, - "learning_rate": 8.588230316876085e-06, - "loss": 0.0498, - "step": 67435 - }, - { - "epoch": 1.7118923721284427, - "grad_norm": 0.5080394148826599, - "learning_rate": 8.587384185810382e-06, - "loss": 0.0414, - "step": 67440 - }, - { - "epoch": 1.7120192917882981, - "grad_norm": 0.30429157614707947, - "learning_rate": 8.58653805474468e-06, - "loss": 0.052, - "step": 67445 - }, - { - "epoch": 1.7121462114481534, - "grad_norm": 0.5189821720123291, - "learning_rate": 8.585691923678978e-06, - "loss": 0.0385, - "step": 67450 - }, - { - "epoch": 1.7122731311080086, - "grad_norm": 0.31469812989234924, - "learning_rate": 8.584845792613277e-06, - "loss": 0.0328, - "step": 67455 - }, - { - "epoch": 1.7124000507678638, - "grad_norm": 0.2964833378791809, - "learning_rate": 8.583999661547574e-06, - "loss": 0.0306, - "step": 67460 - }, - { - "epoch": 1.7125269704277193, - "grad_norm": 0.7222344279289246, - "learning_rate": 8.583153530481872e-06, - "loss": 0.0377, - "step": 67465 - }, - { - "epoch": 1.7126538900875745, - "grad_norm": 0.6067143678665161, - "learning_rate": 8.58230739941617e-06, - "loss": 0.0478, - "step": 67470 - }, - { - "epoch": 1.71278080974743, - "grad_norm": 0.5495322346687317, - "learning_rate": 8.581461268350469e-06, - "loss": 0.0385, - "step": 67475 - }, - { - "epoch": 1.7129077294072852, - "grad_norm": 0.5362567901611328, - "learning_rate": 8.580615137284766e-06, - "loss": 0.0441, - "step": 67480 - }, - { - "epoch": 1.7130346490671404, - "grad_norm": 0.45274531841278076, - "learning_rate": 8.579769006219064e-06, - "loss": 0.0398, - "step": 67485 - }, - { - "epoch": 1.7131615687269957, - "grad_norm": 0.5115793347358704, - "learning_rate": 8.578922875153362e-06, - "loss": 0.0371, - "step": 67490 - }, - { - "epoch": 1.713288488386851, - "grad_norm": 1.0301432609558105, - "learning_rate": 8.57807674408766e-06, - "loss": 0.0374, - "step": 67495 - }, - { - "epoch": 1.7134154080467066, - "grad_norm": 0.6196853518486023, - "learning_rate": 8.577230613021957e-06, - "loss": 0.0527, - "step": 67500 - }, - { - "epoch": 1.7135423277065618, - "grad_norm": 0.34638485312461853, - "learning_rate": 8.576384481956256e-06, - "loss": 0.0482, - "step": 67505 - }, - { - "epoch": 1.713669247366417, - "grad_norm": 0.37146419286727905, - "learning_rate": 8.575538350890553e-06, - "loss": 0.0318, - "step": 67510 - }, - { - "epoch": 1.7137961670262722, - "grad_norm": 0.4116324484348297, - "learning_rate": 8.574692219824851e-06, - "loss": 0.0445, - "step": 67515 - }, - { - "epoch": 1.7139230866861277, - "grad_norm": 0.7748345732688904, - "learning_rate": 8.57384608875915e-06, - "loss": 0.0469, - "step": 67520 - }, - { - "epoch": 1.7140500063459831, - "grad_norm": 0.43085777759552, - "learning_rate": 8.572999957693448e-06, - "loss": 0.0465, - "step": 67525 - }, - { - "epoch": 1.7141769260058384, - "grad_norm": 0.3745441138744354, - "learning_rate": 8.572153826627744e-06, - "loss": 0.0219, - "step": 67530 - }, - { - "epoch": 1.7143038456656936, - "grad_norm": 0.5869783163070679, - "learning_rate": 8.571307695562043e-06, - "loss": 0.0467, - "step": 67535 - }, - { - "epoch": 1.7144307653255488, - "grad_norm": 0.18497814238071442, - "learning_rate": 8.570461564496341e-06, - "loss": 0.034, - "step": 67540 - }, - { - "epoch": 1.7145576849854043, - "grad_norm": 0.4053872227668762, - "learning_rate": 8.56961543343064e-06, - "loss": 0.0386, - "step": 67545 - }, - { - "epoch": 1.7146846046452595, - "grad_norm": 0.326185405254364, - "learning_rate": 8.568769302364936e-06, - "loss": 0.0221, - "step": 67550 - }, - { - "epoch": 1.714811524305115, - "grad_norm": 0.3964194357395172, - "learning_rate": 8.567923171299235e-06, - "loss": 0.0393, - "step": 67555 - }, - { - "epoch": 1.7149384439649702, - "grad_norm": 0.5181395411491394, - "learning_rate": 8.567077040233533e-06, - "loss": 0.0364, - "step": 67560 - }, - { - "epoch": 1.7150653636248254, - "grad_norm": 0.578138530254364, - "learning_rate": 8.566230909167832e-06, - "loss": 0.0411, - "step": 67565 - }, - { - "epoch": 1.7151922832846807, - "grad_norm": 0.7108726501464844, - "learning_rate": 8.565384778102128e-06, - "loss": 0.0345, - "step": 67570 - }, - { - "epoch": 1.715319202944536, - "grad_norm": 0.6025850772857666, - "learning_rate": 8.564538647036427e-06, - "loss": 0.0483, - "step": 67575 - }, - { - "epoch": 1.7154461226043916, - "grad_norm": 0.5780872106552124, - "learning_rate": 8.563692515970723e-06, - "loss": 0.0355, - "step": 67580 - }, - { - "epoch": 1.7155730422642468, - "grad_norm": 0.5985276699066162, - "learning_rate": 8.562846384905022e-06, - "loss": 0.0252, - "step": 67585 - }, - { - "epoch": 1.715699961924102, - "grad_norm": 0.3801349103450775, - "learning_rate": 8.56200025383932e-06, - "loss": 0.0404, - "step": 67590 - }, - { - "epoch": 1.7158268815839572, - "grad_norm": 0.660167932510376, - "learning_rate": 8.561154122773619e-06, - "loss": 0.039, - "step": 67595 - }, - { - "epoch": 1.7159538012438127, - "grad_norm": 0.6507192254066467, - "learning_rate": 8.560307991707915e-06, - "loss": 0.0389, - "step": 67600 - }, - { - "epoch": 1.7160807209036681, - "grad_norm": 0.25171199440956116, - "learning_rate": 8.559461860642214e-06, - "loss": 0.037, - "step": 67605 - }, - { - "epoch": 1.7162076405635234, - "grad_norm": 0.47747665643692017, - "learning_rate": 8.558615729576512e-06, - "loss": 0.0381, - "step": 67610 - }, - { - "epoch": 1.7163345602233786, - "grad_norm": 0.34853214025497437, - "learning_rate": 8.55776959851081e-06, - "loss": 0.0356, - "step": 67615 - }, - { - "epoch": 1.7164614798832338, - "grad_norm": 0.296224445104599, - "learning_rate": 8.556923467445109e-06, - "loss": 0.0471, - "step": 67620 - }, - { - "epoch": 1.716588399543089, - "grad_norm": 0.7140027284622192, - "learning_rate": 8.556077336379406e-06, - "loss": 0.0438, - "step": 67625 - }, - { - "epoch": 1.7167153192029445, - "grad_norm": 1.1645307540893555, - "learning_rate": 8.555231205313704e-06, - "loss": 0.0623, - "step": 67630 - }, - { - "epoch": 1.7168422388628, - "grad_norm": 0.5276801586151123, - "learning_rate": 8.554385074248002e-06, - "loss": 0.0448, - "step": 67635 - }, - { - "epoch": 1.7169691585226552, - "grad_norm": 0.29253995418548584, - "learning_rate": 8.5535389431823e-06, - "loss": 0.0381, - "step": 67640 - }, - { - "epoch": 1.7170960781825104, - "grad_norm": 0.49541762471199036, - "learning_rate": 8.552692812116598e-06, - "loss": 0.0376, - "step": 67645 - }, - { - "epoch": 1.7172229978423657, - "grad_norm": 0.682847261428833, - "learning_rate": 8.551846681050896e-06, - "loss": 0.0453, - "step": 67650 - }, - { - "epoch": 1.717349917502221, - "grad_norm": 0.5222645998001099, - "learning_rate": 8.551000549985193e-06, - "loss": 0.0517, - "step": 67655 - }, - { - "epoch": 1.7174768371620766, - "grad_norm": 0.5596795082092285, - "learning_rate": 8.550154418919491e-06, - "loss": 0.0295, - "step": 67660 - }, - { - "epoch": 1.7176037568219318, - "grad_norm": 0.2622252106666565, - "learning_rate": 8.54930828785379e-06, - "loss": 0.0443, - "step": 67665 - }, - { - "epoch": 1.717730676481787, - "grad_norm": 0.4284403324127197, - "learning_rate": 8.548462156788088e-06, - "loss": 0.0441, - "step": 67670 - }, - { - "epoch": 1.7178575961416422, - "grad_norm": 0.8025849461555481, - "learning_rate": 8.547616025722385e-06, - "loss": 0.0289, - "step": 67675 - }, - { - "epoch": 1.7179845158014977, - "grad_norm": 0.4579409956932068, - "learning_rate": 8.546769894656683e-06, - "loss": 0.0409, - "step": 67680 - }, - { - "epoch": 1.718111435461353, - "grad_norm": 0.36855411529541016, - "learning_rate": 8.545923763590981e-06, - "loss": 0.0433, - "step": 67685 - }, - { - "epoch": 1.7182383551212084, - "grad_norm": 0.406935453414917, - "learning_rate": 8.54507763252528e-06, - "loss": 0.0357, - "step": 67690 - }, - { - "epoch": 1.7183652747810636, - "grad_norm": 0.26523929834365845, - "learning_rate": 8.544231501459576e-06, - "loss": 0.0443, - "step": 67695 - }, - { - "epoch": 1.7184921944409188, - "grad_norm": 0.33878371119499207, - "learning_rate": 8.543385370393875e-06, - "loss": 0.0514, - "step": 67700 - }, - { - "epoch": 1.718619114100774, - "grad_norm": 0.824368417263031, - "learning_rate": 8.542539239328173e-06, - "loss": 0.0501, - "step": 67705 - }, - { - "epoch": 1.7187460337606295, - "grad_norm": 0.41628915071487427, - "learning_rate": 8.541693108262472e-06, - "loss": 0.0416, - "step": 67710 - }, - { - "epoch": 1.718872953420485, - "grad_norm": 1.1856333017349243, - "learning_rate": 8.540846977196768e-06, - "loss": 0.0372, - "step": 67715 - }, - { - "epoch": 1.7189998730803402, - "grad_norm": 0.5154406428337097, - "learning_rate": 8.540000846131067e-06, - "loss": 0.0404, - "step": 67720 - }, - { - "epoch": 1.7191267927401954, - "grad_norm": 0.45439645648002625, - "learning_rate": 8.539154715065363e-06, - "loss": 0.0294, - "step": 67725 - }, - { - "epoch": 1.7192537124000506, - "grad_norm": 0.4539130628108978, - "learning_rate": 8.538308583999662e-06, - "loss": 0.0349, - "step": 67730 - }, - { - "epoch": 1.719380632059906, - "grad_norm": 0.471727579832077, - "learning_rate": 8.53746245293396e-06, - "loss": 0.0593, - "step": 67735 - }, - { - "epoch": 1.7195075517197616, - "grad_norm": 0.25487491488456726, - "learning_rate": 8.536616321868259e-06, - "loss": 0.0338, - "step": 67740 - }, - { - "epoch": 1.7196344713796168, - "grad_norm": 0.9195664525032043, - "learning_rate": 8.535770190802555e-06, - "loss": 0.0293, - "step": 67745 - }, - { - "epoch": 1.719761391039472, - "grad_norm": 0.3924844264984131, - "learning_rate": 8.534924059736854e-06, - "loss": 0.0288, - "step": 67750 - }, - { - "epoch": 1.7198883106993272, - "grad_norm": 0.3879323899745941, - "learning_rate": 8.534077928671152e-06, - "loss": 0.0441, - "step": 67755 - }, - { - "epoch": 1.7200152303591827, - "grad_norm": 0.32251453399658203, - "learning_rate": 8.53323179760545e-06, - "loss": 0.043, - "step": 67760 - }, - { - "epoch": 1.720142150019038, - "grad_norm": 0.5954524278640747, - "learning_rate": 8.532385666539747e-06, - "loss": 0.0427, - "step": 67765 - }, - { - "epoch": 1.7202690696788934, - "grad_norm": 0.7214831709861755, - "learning_rate": 8.531539535474046e-06, - "loss": 0.0381, - "step": 67770 - }, - { - "epoch": 1.7203959893387486, - "grad_norm": 0.3493395447731018, - "learning_rate": 8.530693404408344e-06, - "loss": 0.0458, - "step": 67775 - }, - { - "epoch": 1.7205229089986038, - "grad_norm": 0.359416663646698, - "learning_rate": 8.529847273342643e-06, - "loss": 0.0445, - "step": 67780 - }, - { - "epoch": 1.720649828658459, - "grad_norm": 0.35993653535842896, - "learning_rate": 8.52900114227694e-06, - "loss": 0.028, - "step": 67785 - }, - { - "epoch": 1.7207767483183145, - "grad_norm": 0.7941647171974182, - "learning_rate": 8.528155011211238e-06, - "loss": 0.051, - "step": 67790 - }, - { - "epoch": 1.72090366797817, - "grad_norm": 0.2200041115283966, - "learning_rate": 8.527308880145534e-06, - "loss": 0.0313, - "step": 67795 - }, - { - "epoch": 1.7210305876380252, - "grad_norm": 0.41012272238731384, - "learning_rate": 8.526462749079833e-06, - "loss": 0.0338, - "step": 67800 - }, - { - "epoch": 1.7211575072978804, - "grad_norm": 1.2153871059417725, - "learning_rate": 8.525616618014131e-06, - "loss": 0.05, - "step": 67805 - }, - { - "epoch": 1.7212844269577356, - "grad_norm": 0.2832372188568115, - "learning_rate": 8.52477048694843e-06, - "loss": 0.0401, - "step": 67810 - }, - { - "epoch": 1.721411346617591, - "grad_norm": 0.5399957895278931, - "learning_rate": 8.523924355882726e-06, - "loss": 0.0516, - "step": 67815 - }, - { - "epoch": 1.7215382662774463, - "grad_norm": 0.5871930122375488, - "learning_rate": 8.523078224817025e-06, - "loss": 0.0256, - "step": 67820 - }, - { - "epoch": 1.7216651859373018, - "grad_norm": 1.1294498443603516, - "learning_rate": 8.522232093751323e-06, - "loss": 0.0244, - "step": 67825 - }, - { - "epoch": 1.721792105597157, - "grad_norm": 0.48491570353507996, - "learning_rate": 8.521385962685621e-06, - "loss": 0.0527, - "step": 67830 - }, - { - "epoch": 1.7219190252570122, - "grad_norm": 0.6031864285469055, - "learning_rate": 8.520539831619918e-06, - "loss": 0.0357, - "step": 67835 - }, - { - "epoch": 1.7220459449168675, - "grad_norm": 0.31897714734077454, - "learning_rate": 8.519693700554217e-06, - "loss": 0.0369, - "step": 67840 - }, - { - "epoch": 1.722172864576723, - "grad_norm": 0.5244006514549255, - "learning_rate": 8.518847569488515e-06, - "loss": 0.0428, - "step": 67845 - }, - { - "epoch": 1.7222997842365784, - "grad_norm": 0.6056610941886902, - "learning_rate": 8.518001438422813e-06, - "loss": 0.0391, - "step": 67850 - }, - { - "epoch": 1.7224267038964336, - "grad_norm": 0.5294071435928345, - "learning_rate": 8.51715530735711e-06, - "loss": 0.0326, - "step": 67855 - }, - { - "epoch": 1.7225536235562888, - "grad_norm": 0.45350033044815063, - "learning_rate": 8.516309176291408e-06, - "loss": 0.0371, - "step": 67860 - }, - { - "epoch": 1.722680543216144, - "grad_norm": 0.2912462055683136, - "learning_rate": 8.515463045225705e-06, - "loss": 0.0325, - "step": 67865 - }, - { - "epoch": 1.7228074628759995, - "grad_norm": 0.43256106972694397, - "learning_rate": 8.514616914160004e-06, - "loss": 0.0365, - "step": 67870 - }, - { - "epoch": 1.722934382535855, - "grad_norm": 0.25266945362091064, - "learning_rate": 8.513770783094302e-06, - "loss": 0.0401, - "step": 67875 - }, - { - "epoch": 1.7230613021957102, - "grad_norm": 0.5185664296150208, - "learning_rate": 8.5129246520286e-06, - "loss": 0.0735, - "step": 67880 - }, - { - "epoch": 1.7231882218555654, - "grad_norm": 0.4055810570716858, - "learning_rate": 8.512078520962897e-06, - "loss": 0.0354, - "step": 67885 - }, - { - "epoch": 1.7233151415154206, - "grad_norm": 0.45509862899780273, - "learning_rate": 8.511232389897196e-06, - "loss": 0.0281, - "step": 67890 - }, - { - "epoch": 1.723442061175276, - "grad_norm": 0.4680424928665161, - "learning_rate": 8.510386258831494e-06, - "loss": 0.0303, - "step": 67895 - }, - { - "epoch": 1.7235689808351313, - "grad_norm": 0.5148192644119263, - "learning_rate": 8.509540127765792e-06, - "loss": 0.0403, - "step": 67900 - }, - { - "epoch": 1.7236959004949868, - "grad_norm": 0.7315452694892883, - "learning_rate": 8.508693996700089e-06, - "loss": 0.0473, - "step": 67905 - }, - { - "epoch": 1.723822820154842, - "grad_norm": 0.5790120959281921, - "learning_rate": 8.507847865634387e-06, - "loss": 0.0301, - "step": 67910 - }, - { - "epoch": 1.7239497398146972, - "grad_norm": 0.4143039882183075, - "learning_rate": 8.507001734568686e-06, - "loss": 0.0322, - "step": 67915 - }, - { - "epoch": 1.7240766594745525, - "grad_norm": 0.37762102484703064, - "learning_rate": 8.506155603502984e-06, - "loss": 0.0563, - "step": 67920 - }, - { - "epoch": 1.724203579134408, - "grad_norm": 0.3399312496185303, - "learning_rate": 8.505309472437281e-06, - "loss": 0.0431, - "step": 67925 - }, - { - "epoch": 1.7243304987942634, - "grad_norm": 0.43181049823760986, - "learning_rate": 8.50446334137158e-06, - "loss": 0.0435, - "step": 67930 - }, - { - "epoch": 1.7244574184541186, - "grad_norm": 1.3777843713760376, - "learning_rate": 8.503617210305876e-06, - "loss": 0.0372, - "step": 67935 - }, - { - "epoch": 1.7245843381139738, - "grad_norm": 0.5571632981300354, - "learning_rate": 8.502771079240174e-06, - "loss": 0.0343, - "step": 67940 - }, - { - "epoch": 1.724711257773829, - "grad_norm": 0.5424931049346924, - "learning_rate": 8.501924948174473e-06, - "loss": 0.042, - "step": 67945 - }, - { - "epoch": 1.7248381774336845, - "grad_norm": 0.5995396971702576, - "learning_rate": 8.501078817108771e-06, - "loss": 0.0414, - "step": 67950 - }, - { - "epoch": 1.7249650970935397, - "grad_norm": 0.39566877484321594, - "learning_rate": 8.500232686043068e-06, - "loss": 0.0504, - "step": 67955 - }, - { - "epoch": 1.7250920167533952, - "grad_norm": 0.7952625155448914, - "learning_rate": 8.499386554977366e-06, - "loss": 0.0384, - "step": 67960 - }, - { - "epoch": 1.7252189364132504, - "grad_norm": 0.3598606586456299, - "learning_rate": 8.498540423911665e-06, - "loss": 0.0333, - "step": 67965 - }, - { - "epoch": 1.7253458560731056, - "grad_norm": 0.4696366786956787, - "learning_rate": 8.497694292845963e-06, - "loss": 0.0428, - "step": 67970 - }, - { - "epoch": 1.7254727757329609, - "grad_norm": 0.5419458746910095, - "learning_rate": 8.49684816178026e-06, - "loss": 0.0503, - "step": 67975 - }, - { - "epoch": 1.7255996953928163, - "grad_norm": 0.6534791588783264, - "learning_rate": 8.496002030714558e-06, - "loss": 0.047, - "step": 67980 - }, - { - "epoch": 1.7257266150526718, - "grad_norm": 0.41164153814315796, - "learning_rate": 8.495155899648857e-06, - "loss": 0.0367, - "step": 67985 - }, - { - "epoch": 1.725853534712527, - "grad_norm": 0.5120255947113037, - "learning_rate": 8.494309768583155e-06, - "loss": 0.0316, - "step": 67990 - }, - { - "epoch": 1.7259804543723822, - "grad_norm": 0.684813380241394, - "learning_rate": 8.493463637517452e-06, - "loss": 0.0506, - "step": 67995 - }, - { - "epoch": 1.7261073740322375, - "grad_norm": 0.3994351327419281, - "learning_rate": 8.49261750645175e-06, - "loss": 0.0353, - "step": 68000 - }, - { - "epoch": 1.726234293692093, - "grad_norm": 0.3653869032859802, - "learning_rate": 8.491771375386047e-06, - "loss": 0.0316, - "step": 68005 - }, - { - "epoch": 1.7263612133519484, - "grad_norm": 0.4506258964538574, - "learning_rate": 8.490925244320345e-06, - "loss": 0.049, - "step": 68010 - }, - { - "epoch": 1.7264881330118036, - "grad_norm": 0.4713050425052643, - "learning_rate": 8.490079113254644e-06, - "loss": 0.0512, - "step": 68015 - }, - { - "epoch": 1.7266150526716588, - "grad_norm": 0.23228339850902557, - "learning_rate": 8.489232982188942e-06, - "loss": 0.0214, - "step": 68020 - }, - { - "epoch": 1.726741972331514, - "grad_norm": 0.4281570017337799, - "learning_rate": 8.488386851123239e-06, - "loss": 0.0426, - "step": 68025 - }, - { - "epoch": 1.7268688919913695, - "grad_norm": 0.5282936096191406, - "learning_rate": 8.487540720057537e-06, - "loss": 0.0568, - "step": 68030 - }, - { - "epoch": 1.7269958116512247, - "grad_norm": 0.7237597107887268, - "learning_rate": 8.486694588991836e-06, - "loss": 0.0607, - "step": 68035 - }, - { - "epoch": 1.7271227313110802, - "grad_norm": 0.4920574128627777, - "learning_rate": 8.485848457926134e-06, - "loss": 0.0296, - "step": 68040 - }, - { - "epoch": 1.7272496509709354, - "grad_norm": 0.5437068939208984, - "learning_rate": 8.48500232686043e-06, - "loss": 0.0353, - "step": 68045 - }, - { - "epoch": 1.7273765706307906, - "grad_norm": 0.5915569067001343, - "learning_rate": 8.484156195794729e-06, - "loss": 0.0433, - "step": 68050 - }, - { - "epoch": 1.7275034902906459, - "grad_norm": 0.42011868953704834, - "learning_rate": 8.483310064729028e-06, - "loss": 0.0323, - "step": 68055 - }, - { - "epoch": 1.7276304099505013, - "grad_norm": 0.46975022554397583, - "learning_rate": 8.482463933663326e-06, - "loss": 0.026, - "step": 68060 - }, - { - "epoch": 1.7277573296103568, - "grad_norm": 0.41961225867271423, - "learning_rate": 8.481617802597623e-06, - "loss": 0.0335, - "step": 68065 - }, - { - "epoch": 1.727884249270212, - "grad_norm": 0.5691941380500793, - "learning_rate": 8.480771671531921e-06, - "loss": 0.034, - "step": 68070 - }, - { - "epoch": 1.7280111689300672, - "grad_norm": 0.3563283085823059, - "learning_rate": 8.479925540466218e-06, - "loss": 0.0384, - "step": 68075 - }, - { - "epoch": 1.7281380885899225, - "grad_norm": 0.4017190635204315, - "learning_rate": 8.479079409400516e-06, - "loss": 0.0337, - "step": 68080 - }, - { - "epoch": 1.728265008249778, - "grad_norm": 0.5997766256332397, - "learning_rate": 8.478233278334815e-06, - "loss": 0.0468, - "step": 68085 - }, - { - "epoch": 1.7283919279096334, - "grad_norm": 0.3664191663265228, - "learning_rate": 8.477387147269113e-06, - "loss": 0.0308, - "step": 68090 - }, - { - "epoch": 1.7285188475694886, - "grad_norm": 0.364961177110672, - "learning_rate": 8.47654101620341e-06, - "loss": 0.0596, - "step": 68095 - }, - { - "epoch": 1.7286457672293438, - "grad_norm": 0.37378716468811035, - "learning_rate": 8.475694885137708e-06, - "loss": 0.0465, - "step": 68100 - }, - { - "epoch": 1.728772686889199, - "grad_norm": 0.5306050181388855, - "learning_rate": 8.474848754072006e-06, - "loss": 0.0401, - "step": 68105 - }, - { - "epoch": 1.7288996065490545, - "grad_norm": 0.44223684072494507, - "learning_rate": 8.474002623006305e-06, - "loss": 0.0349, - "step": 68110 - }, - { - "epoch": 1.7290265262089097, - "grad_norm": 0.45365190505981445, - "learning_rate": 8.473156491940602e-06, - "loss": 0.0333, - "step": 68115 - }, - { - "epoch": 1.7291534458687652, - "grad_norm": 0.22559651732444763, - "learning_rate": 8.4723103608749e-06, - "loss": 0.0412, - "step": 68120 - }, - { - "epoch": 1.7292803655286204, - "grad_norm": 0.36491331458091736, - "learning_rate": 8.471464229809198e-06, - "loss": 0.05, - "step": 68125 - }, - { - "epoch": 1.7294072851884756, - "grad_norm": 0.9012489318847656, - "learning_rate": 8.470618098743497e-06, - "loss": 0.0359, - "step": 68130 - }, - { - "epoch": 1.7295342048483309, - "grad_norm": 0.31874895095825195, - "learning_rate": 8.469771967677795e-06, - "loss": 0.0438, - "step": 68135 - }, - { - "epoch": 1.7296611245081863, - "grad_norm": 0.4668743312358856, - "learning_rate": 8.468925836612092e-06, - "loss": 0.0364, - "step": 68140 - }, - { - "epoch": 1.7297880441680418, - "grad_norm": 0.30074343085289, - "learning_rate": 8.46807970554639e-06, - "loss": 0.0274, - "step": 68145 - }, - { - "epoch": 1.729914963827897, - "grad_norm": 0.2546868622303009, - "learning_rate": 8.467233574480687e-06, - "loss": 0.0288, - "step": 68150 - }, - { - "epoch": 1.7300418834877522, - "grad_norm": 0.7424425482749939, - "learning_rate": 8.466387443414985e-06, - "loss": 0.0503, - "step": 68155 - }, - { - "epoch": 1.7301688031476075, - "grad_norm": 0.32464298605918884, - "learning_rate": 8.465541312349284e-06, - "loss": 0.052, - "step": 68160 - }, - { - "epoch": 1.730295722807463, - "grad_norm": 0.4954010844230652, - "learning_rate": 8.464695181283582e-06, - "loss": 0.0482, - "step": 68165 - }, - { - "epoch": 1.7304226424673181, - "grad_norm": 0.5778056979179382, - "learning_rate": 8.463849050217879e-06, - "loss": 0.0454, - "step": 68170 - }, - { - "epoch": 1.7305495621271736, - "grad_norm": 0.33394336700439453, - "learning_rate": 8.463002919152177e-06, - "loss": 0.0274, - "step": 68175 - }, - { - "epoch": 1.7306764817870288, - "grad_norm": 0.46325281262397766, - "learning_rate": 8.462156788086476e-06, - "loss": 0.0347, - "step": 68180 - }, - { - "epoch": 1.730803401446884, - "grad_norm": 0.28865039348602295, - "learning_rate": 8.461310657020774e-06, - "loss": 0.0324, - "step": 68185 - }, - { - "epoch": 1.7309303211067393, - "grad_norm": 0.4371521770954132, - "learning_rate": 8.46046452595507e-06, - "loss": 0.0283, - "step": 68190 - }, - { - "epoch": 1.7310572407665947, - "grad_norm": 0.3003612160682678, - "learning_rate": 8.45961839488937e-06, - "loss": 0.0304, - "step": 68195 - }, - { - "epoch": 1.7311841604264502, - "grad_norm": 0.5357656478881836, - "learning_rate": 8.458772263823668e-06, - "loss": 0.0499, - "step": 68200 - }, - { - "epoch": 1.7313110800863054, - "grad_norm": 0.5955318212509155, - "learning_rate": 8.457926132757966e-06, - "loss": 0.0448, - "step": 68205 - }, - { - "epoch": 1.7314379997461606, - "grad_norm": 0.4316476285457611, - "learning_rate": 8.457080001692263e-06, - "loss": 0.0328, - "step": 68210 - }, - { - "epoch": 1.7315649194060159, - "grad_norm": 0.48114335536956787, - "learning_rate": 8.456233870626561e-06, - "loss": 0.0276, - "step": 68215 - }, - { - "epoch": 1.7316918390658713, - "grad_norm": 0.634631335735321, - "learning_rate": 8.455387739560858e-06, - "loss": 0.0312, - "step": 68220 - }, - { - "epoch": 1.7318187587257268, - "grad_norm": 0.24319472908973694, - "learning_rate": 8.454541608495156e-06, - "loss": 0.0335, - "step": 68225 - }, - { - "epoch": 1.731945678385582, - "grad_norm": 0.6105319261550903, - "learning_rate": 8.453695477429455e-06, - "loss": 0.0485, - "step": 68230 - }, - { - "epoch": 1.7320725980454372, - "grad_norm": 0.47716689109802246, - "learning_rate": 8.452849346363753e-06, - "loss": 0.0326, - "step": 68235 - }, - { - "epoch": 1.7321995177052925, - "grad_norm": 0.7041749954223633, - "learning_rate": 8.45200321529805e-06, - "loss": 0.0668, - "step": 68240 - }, - { - "epoch": 1.732326437365148, - "grad_norm": 0.5032219886779785, - "learning_rate": 8.451157084232348e-06, - "loss": 0.0452, - "step": 68245 - }, - { - "epoch": 1.7324533570250031, - "grad_norm": 0.42988914251327515, - "learning_rate": 8.450310953166647e-06, - "loss": 0.0472, - "step": 68250 - }, - { - "epoch": 1.7325802766848586, - "grad_norm": 0.7280640006065369, - "learning_rate": 8.449464822100945e-06, - "loss": 0.0433, - "step": 68255 - }, - { - "epoch": 1.7327071963447138, - "grad_norm": 0.3609022796154022, - "learning_rate": 8.448618691035242e-06, - "loss": 0.0411, - "step": 68260 - }, - { - "epoch": 1.732834116004569, - "grad_norm": 1.1818292140960693, - "learning_rate": 8.44777255996954e-06, - "loss": 0.042, - "step": 68265 - }, - { - "epoch": 1.7329610356644243, - "grad_norm": 0.5411110520362854, - "learning_rate": 8.446926428903838e-06, - "loss": 0.0294, - "step": 68270 - }, - { - "epoch": 1.7330879553242797, - "grad_norm": 0.5180643796920776, - "learning_rate": 8.446080297838137e-06, - "loss": 0.0304, - "step": 68275 - }, - { - "epoch": 1.7332148749841352, - "grad_norm": 0.6199233531951904, - "learning_rate": 8.445234166772434e-06, - "loss": 0.0424, - "step": 68280 - }, - { - "epoch": 1.7333417946439904, - "grad_norm": 0.4339648187160492, - "learning_rate": 8.444388035706732e-06, - "loss": 0.0373, - "step": 68285 - }, - { - "epoch": 1.7334687143038456, - "grad_norm": 0.4621620178222656, - "learning_rate": 8.443541904641029e-06, - "loss": 0.032, - "step": 68290 - }, - { - "epoch": 1.7335956339637009, - "grad_norm": 0.5191864967346191, - "learning_rate": 8.442695773575327e-06, - "loss": 0.0296, - "step": 68295 - }, - { - "epoch": 1.7337225536235563, - "grad_norm": 0.3998408317565918, - "learning_rate": 8.441849642509626e-06, - "loss": 0.0343, - "step": 68300 - }, - { - "epoch": 1.7338494732834115, - "grad_norm": 0.3299643397331238, - "learning_rate": 8.441003511443924e-06, - "loss": 0.0421, - "step": 68305 - }, - { - "epoch": 1.733976392943267, - "grad_norm": 0.6636019945144653, - "learning_rate": 8.44015738037822e-06, - "loss": 0.034, - "step": 68310 - }, - { - "epoch": 1.7341033126031222, - "grad_norm": 0.611341655254364, - "learning_rate": 8.439311249312519e-06, - "loss": 0.0544, - "step": 68315 - }, - { - "epoch": 1.7342302322629775, - "grad_norm": 0.5985356569290161, - "learning_rate": 8.438465118246817e-06, - "loss": 0.0333, - "step": 68320 - }, - { - "epoch": 1.7343571519228327, - "grad_norm": 0.23227740824222565, - "learning_rate": 8.437618987181116e-06, - "loss": 0.0426, - "step": 68325 - }, - { - "epoch": 1.7344840715826881, - "grad_norm": 0.30849406123161316, - "learning_rate": 8.436772856115413e-06, - "loss": 0.0546, - "step": 68330 - }, - { - "epoch": 1.7346109912425436, - "grad_norm": 0.4798223376274109, - "learning_rate": 8.435926725049711e-06, - "loss": 0.0374, - "step": 68335 - }, - { - "epoch": 1.7347379109023988, - "grad_norm": 0.2564360201358795, - "learning_rate": 8.43508059398401e-06, - "loss": 0.0364, - "step": 68340 - }, - { - "epoch": 1.734864830562254, - "grad_norm": 1.1794285774230957, - "learning_rate": 8.434234462918308e-06, - "loss": 0.0419, - "step": 68345 - }, - { - "epoch": 1.7349917502221093, - "grad_norm": 0.3949192762374878, - "learning_rate": 8.433388331852604e-06, - "loss": 0.0218, - "step": 68350 - }, - { - "epoch": 1.7351186698819647, - "grad_norm": 0.30904191732406616, - "learning_rate": 8.432542200786903e-06, - "loss": 0.0288, - "step": 68355 - }, - { - "epoch": 1.7352455895418202, - "grad_norm": 0.6105614304542542, - "learning_rate": 8.4316960697212e-06, - "loss": 0.0351, - "step": 68360 - }, - { - "epoch": 1.7353725092016754, - "grad_norm": 0.4355478286743164, - "learning_rate": 8.430849938655498e-06, - "loss": 0.0453, - "step": 68365 - }, - { - "epoch": 1.7354994288615306, - "grad_norm": 0.9690837860107422, - "learning_rate": 8.430003807589796e-06, - "loss": 0.0412, - "step": 68370 - }, - { - "epoch": 1.7356263485213859, - "grad_norm": 0.5276809334754944, - "learning_rate": 8.429157676524095e-06, - "loss": 0.0529, - "step": 68375 - }, - { - "epoch": 1.7357532681812413, - "grad_norm": 0.33886203169822693, - "learning_rate": 8.428311545458391e-06, - "loss": 0.0275, - "step": 68380 - }, - { - "epoch": 1.7358801878410965, - "grad_norm": 0.3879072368144989, - "learning_rate": 8.42746541439269e-06, - "loss": 0.0431, - "step": 68385 - }, - { - "epoch": 1.736007107500952, - "grad_norm": 1.090622901916504, - "learning_rate": 8.426619283326988e-06, - "loss": 0.0435, - "step": 68390 - }, - { - "epoch": 1.7361340271608072, - "grad_norm": 0.8468914031982422, - "learning_rate": 8.425773152261287e-06, - "loss": 0.0415, - "step": 68395 - }, - { - "epoch": 1.7362609468206625, - "grad_norm": 0.43383657932281494, - "learning_rate": 8.424927021195583e-06, - "loss": 0.038, - "step": 68400 - }, - { - "epoch": 1.7363878664805177, - "grad_norm": 0.5358710885047913, - "learning_rate": 8.424080890129882e-06, - "loss": 0.0409, - "step": 68405 - }, - { - "epoch": 1.7365147861403731, - "grad_norm": 0.4110832214355469, - "learning_rate": 8.42323475906418e-06, - "loss": 0.0469, - "step": 68410 - }, - { - "epoch": 1.7366417058002286, - "grad_norm": 0.13843408226966858, - "learning_rate": 8.422388627998479e-06, - "loss": 0.0266, - "step": 68415 - }, - { - "epoch": 1.7367686254600838, - "grad_norm": 0.3087815046310425, - "learning_rate": 8.421542496932775e-06, - "loss": 0.0386, - "step": 68420 - }, - { - "epoch": 1.736895545119939, - "grad_norm": 0.48518261313438416, - "learning_rate": 8.420696365867074e-06, - "loss": 0.0523, - "step": 68425 - }, - { - "epoch": 1.7370224647797943, - "grad_norm": 0.3507404327392578, - "learning_rate": 8.41985023480137e-06, - "loss": 0.0409, - "step": 68430 - }, - { - "epoch": 1.7371493844396497, - "grad_norm": 0.27166932821273804, - "learning_rate": 8.419004103735669e-06, - "loss": 0.0355, - "step": 68435 - }, - { - "epoch": 1.7372763040995052, - "grad_norm": 0.36301085352897644, - "learning_rate": 8.418157972669967e-06, - "loss": 0.0488, - "step": 68440 - }, - { - "epoch": 1.7374032237593604, - "grad_norm": 0.5055424571037292, - "learning_rate": 8.417311841604266e-06, - "loss": 0.0359, - "step": 68445 - }, - { - "epoch": 1.7375301434192156, - "grad_norm": 2.0279250144958496, - "learning_rate": 8.416465710538562e-06, - "loss": 0.0339, - "step": 68450 - }, - { - "epoch": 1.7376570630790709, - "grad_norm": 0.3960818350315094, - "learning_rate": 8.41561957947286e-06, - "loss": 0.0265, - "step": 68455 - }, - { - "epoch": 1.7377839827389263, - "grad_norm": 0.31333842873573303, - "learning_rate": 8.414773448407159e-06, - "loss": 0.0485, - "step": 68460 - }, - { - "epoch": 1.7379109023987815, - "grad_norm": 0.5592023730278015, - "learning_rate": 8.413927317341458e-06, - "loss": 0.0352, - "step": 68465 - }, - { - "epoch": 1.738037822058637, - "grad_norm": 0.3875660300254822, - "learning_rate": 8.413081186275754e-06, - "loss": 0.0482, - "step": 68470 - }, - { - "epoch": 1.7381647417184922, - "grad_norm": 0.42073798179626465, - "learning_rate": 8.412235055210053e-06, - "loss": 0.0434, - "step": 68475 - }, - { - "epoch": 1.7382916613783475, - "grad_norm": 0.671370267868042, - "learning_rate": 8.411388924144351e-06, - "loss": 0.0413, - "step": 68480 - }, - { - "epoch": 1.7384185810382027, - "grad_norm": 0.7118996381759644, - "learning_rate": 8.41054279307865e-06, - "loss": 0.0348, - "step": 68485 - }, - { - "epoch": 1.7385455006980581, - "grad_norm": 0.6869513988494873, - "learning_rate": 8.409696662012946e-06, - "loss": 0.0298, - "step": 68490 - }, - { - "epoch": 1.7386724203579136, - "grad_norm": 0.609968900680542, - "learning_rate": 8.408850530947245e-06, - "loss": 0.0489, - "step": 68495 - }, - { - "epoch": 1.7387993400177688, - "grad_norm": 0.24830467998981476, - "learning_rate": 8.408004399881541e-06, - "loss": 0.0289, - "step": 68500 - }, - { - "epoch": 1.738926259677624, - "grad_norm": 0.4855244755744934, - "learning_rate": 8.40715826881584e-06, - "loss": 0.0374, - "step": 68505 - }, - { - "epoch": 1.7390531793374793, - "grad_norm": 0.5385072827339172, - "learning_rate": 8.406312137750138e-06, - "loss": 0.0376, - "step": 68510 - }, - { - "epoch": 1.7391800989973347, - "grad_norm": 0.38083982467651367, - "learning_rate": 8.405466006684436e-06, - "loss": 0.0363, - "step": 68515 - }, - { - "epoch": 1.73930701865719, - "grad_norm": 0.31461238861083984, - "learning_rate": 8.404619875618733e-06, - "loss": 0.0262, - "step": 68520 - }, - { - "epoch": 1.7394339383170454, - "grad_norm": 0.6142295598983765, - "learning_rate": 8.403773744553032e-06, - "loss": 0.0491, - "step": 68525 - }, - { - "epoch": 1.7395608579769006, - "grad_norm": 0.3473377823829651, - "learning_rate": 8.40292761348733e-06, - "loss": 0.0427, - "step": 68530 - }, - { - "epoch": 1.7396877776367559, - "grad_norm": 0.3119000792503357, - "learning_rate": 8.402081482421628e-06, - "loss": 0.0289, - "step": 68535 - }, - { - "epoch": 1.739814697296611, - "grad_norm": 0.30449166893959045, - "learning_rate": 8.401235351355925e-06, - "loss": 0.0429, - "step": 68540 - }, - { - "epoch": 1.7399416169564665, - "grad_norm": 0.3596707582473755, - "learning_rate": 8.400389220290223e-06, - "loss": 0.0287, - "step": 68545 - }, - { - "epoch": 1.740068536616322, - "grad_norm": 0.3111491799354553, - "learning_rate": 8.399543089224522e-06, - "loss": 0.0252, - "step": 68550 - }, - { - "epoch": 1.7401954562761772, - "grad_norm": 0.221689373254776, - "learning_rate": 8.39869695815882e-06, - "loss": 0.0253, - "step": 68555 - }, - { - "epoch": 1.7403223759360325, - "grad_norm": 0.5450825691223145, - "learning_rate": 8.397850827093117e-06, - "loss": 0.0418, - "step": 68560 - }, - { - "epoch": 1.7404492955958877, - "grad_norm": 0.3254716694355011, - "learning_rate": 8.397004696027415e-06, - "loss": 0.0234, - "step": 68565 - }, - { - "epoch": 1.7405762152557431, - "grad_norm": 0.5349908471107483, - "learning_rate": 8.396158564961712e-06, - "loss": 0.0503, - "step": 68570 - }, - { - "epoch": 1.7407031349155986, - "grad_norm": 0.44219663739204407, - "learning_rate": 8.39531243389601e-06, - "loss": 0.0309, - "step": 68575 - }, - { - "epoch": 1.7408300545754538, - "grad_norm": 0.46228668093681335, - "learning_rate": 8.394466302830309e-06, - "loss": 0.0559, - "step": 68580 - }, - { - "epoch": 1.740956974235309, - "grad_norm": 0.33131080865859985, - "learning_rate": 8.393620171764607e-06, - "loss": 0.0305, - "step": 68585 - }, - { - "epoch": 1.7410838938951643, - "grad_norm": 0.3210771381855011, - "learning_rate": 8.392774040698904e-06, - "loss": 0.0235, - "step": 68590 - }, - { - "epoch": 1.7412108135550197, - "grad_norm": 0.5356435179710388, - "learning_rate": 8.391927909633202e-06, - "loss": 0.0416, - "step": 68595 - }, - { - "epoch": 1.741337733214875, - "grad_norm": 0.6881387233734131, - "learning_rate": 8.3910817785675e-06, - "loss": 0.0411, - "step": 68600 - }, - { - "epoch": 1.7414646528747304, - "grad_norm": 1.120003342628479, - "learning_rate": 8.3902356475018e-06, - "loss": 0.04, - "step": 68605 - }, - { - "epoch": 1.7415915725345856, - "grad_norm": 0.23786862194538116, - "learning_rate": 8.389389516436096e-06, - "loss": 0.0436, - "step": 68610 - }, - { - "epoch": 1.7417184921944409, - "grad_norm": 0.31591102480888367, - "learning_rate": 8.388543385370394e-06, - "loss": 0.0319, - "step": 68615 - }, - { - "epoch": 1.741845411854296, - "grad_norm": 0.5919362306594849, - "learning_rate": 8.387697254304693e-06, - "loss": 0.0529, - "step": 68620 - }, - { - "epoch": 1.7419723315141515, - "grad_norm": 0.3900323212146759, - "learning_rate": 8.386851123238991e-06, - "loss": 0.0325, - "step": 68625 - }, - { - "epoch": 1.742099251174007, - "grad_norm": 0.3584057688713074, - "learning_rate": 8.386004992173288e-06, - "loss": 0.0329, - "step": 68630 - }, - { - "epoch": 1.7422261708338622, - "grad_norm": 0.6079078912734985, - "learning_rate": 8.385158861107586e-06, - "loss": 0.0536, - "step": 68635 - }, - { - "epoch": 1.7423530904937174, - "grad_norm": 0.3902316689491272, - "learning_rate": 8.384312730041883e-06, - "loss": 0.0438, - "step": 68640 - }, - { - "epoch": 1.7424800101535727, - "grad_norm": 0.44563421607017517, - "learning_rate": 8.383466598976181e-06, - "loss": 0.0252, - "step": 68645 - }, - { - "epoch": 1.7426069298134281, - "grad_norm": 0.5778771042823792, - "learning_rate": 8.38262046791048e-06, - "loss": 0.0557, - "step": 68650 - }, - { - "epoch": 1.7427338494732834, - "grad_norm": 0.7531105279922485, - "learning_rate": 8.381774336844778e-06, - "loss": 0.0347, - "step": 68655 - }, - { - "epoch": 1.7428607691331388, - "grad_norm": 0.6797422766685486, - "learning_rate": 8.380928205779077e-06, - "loss": 0.0351, - "step": 68660 - }, - { - "epoch": 1.742987688792994, - "grad_norm": 0.3558158576488495, - "learning_rate": 8.380082074713373e-06, - "loss": 0.0419, - "step": 68665 - }, - { - "epoch": 1.7431146084528493, - "grad_norm": 0.7564252018928528, - "learning_rate": 8.379235943647672e-06, - "loss": 0.0416, - "step": 68670 - }, - { - "epoch": 1.7432415281127045, - "grad_norm": 0.2419213354587555, - "learning_rate": 8.37838981258197e-06, - "loss": 0.0317, - "step": 68675 - }, - { - "epoch": 1.74336844777256, - "grad_norm": 0.5880879759788513, - "learning_rate": 8.377543681516268e-06, - "loss": 0.0482, - "step": 68680 - }, - { - "epoch": 1.7434953674324154, - "grad_norm": 0.3300993740558624, - "learning_rate": 8.376697550450565e-06, - "loss": 0.041, - "step": 68685 - }, - { - "epoch": 1.7436222870922706, - "grad_norm": 0.5849665999412537, - "learning_rate": 8.375851419384864e-06, - "loss": 0.0445, - "step": 68690 - }, - { - "epoch": 1.7437492067521259, - "grad_norm": 0.8823477029800415, - "learning_rate": 8.375005288319162e-06, - "loss": 0.0471, - "step": 68695 - }, - { - "epoch": 1.743876126411981, - "grad_norm": 0.21982872486114502, - "learning_rate": 8.37415915725346e-06, - "loss": 0.038, - "step": 68700 - }, - { - "epoch": 1.7440030460718365, - "grad_norm": 0.3507234454154968, - "learning_rate": 8.373313026187757e-06, - "loss": 0.0319, - "step": 68705 - }, - { - "epoch": 1.744129965731692, - "grad_norm": 0.32062485814094543, - "learning_rate": 8.372466895122056e-06, - "loss": 0.0314, - "step": 68710 - }, - { - "epoch": 1.7442568853915472, - "grad_norm": 0.26143550872802734, - "learning_rate": 8.371620764056352e-06, - "loss": 0.0301, - "step": 68715 - }, - { - "epoch": 1.7443838050514024, - "grad_norm": 0.3002767264842987, - "learning_rate": 8.37077463299065e-06, - "loss": 0.0548, - "step": 68720 - }, - { - "epoch": 1.7445107247112577, - "grad_norm": 0.30976101756095886, - "learning_rate": 8.369928501924949e-06, - "loss": 0.0377, - "step": 68725 - }, - { - "epoch": 1.7446376443711131, - "grad_norm": 0.47084322571754456, - "learning_rate": 8.369082370859247e-06, - "loss": 0.025, - "step": 68730 - }, - { - "epoch": 1.7447645640309684, - "grad_norm": 0.44489529728889465, - "learning_rate": 8.368236239793544e-06, - "loss": 0.0331, - "step": 68735 - }, - { - "epoch": 1.7448914836908238, - "grad_norm": 0.4078613817691803, - "learning_rate": 8.367390108727843e-06, - "loss": 0.0305, - "step": 68740 - }, - { - "epoch": 1.745018403350679, - "grad_norm": 0.4264031946659088, - "learning_rate": 8.366543977662141e-06, - "loss": 0.0382, - "step": 68745 - }, - { - "epoch": 1.7451453230105343, - "grad_norm": 0.642102837562561, - "learning_rate": 8.36569784659644e-06, - "loss": 0.0329, - "step": 68750 - }, - { - "epoch": 1.7452722426703895, - "grad_norm": 0.5744174718856812, - "learning_rate": 8.364851715530736e-06, - "loss": 0.046, - "step": 68755 - }, - { - "epoch": 1.745399162330245, - "grad_norm": 0.38677647709846497, - "learning_rate": 8.364005584465034e-06, - "loss": 0.0398, - "step": 68760 - }, - { - "epoch": 1.7455260819901004, - "grad_norm": 0.3872421681880951, - "learning_rate": 8.363159453399333e-06, - "loss": 0.0401, - "step": 68765 - }, - { - "epoch": 1.7456530016499556, - "grad_norm": 0.3776572644710541, - "learning_rate": 8.362313322333631e-06, - "loss": 0.0471, - "step": 68770 - }, - { - "epoch": 1.7457799213098109, - "grad_norm": 0.7864038348197937, - "learning_rate": 8.361467191267928e-06, - "loss": 0.0567, - "step": 68775 - }, - { - "epoch": 1.745906840969666, - "grad_norm": 0.6484741568565369, - "learning_rate": 8.360621060202226e-06, - "loss": 0.0351, - "step": 68780 - }, - { - "epoch": 1.7460337606295215, - "grad_norm": 0.3104114234447479, - "learning_rate": 8.359774929136523e-06, - "loss": 0.034, - "step": 68785 - }, - { - "epoch": 1.746160680289377, - "grad_norm": 0.5490842461585999, - "learning_rate": 8.358928798070821e-06, - "loss": 0.0433, - "step": 68790 - }, - { - "epoch": 1.7462875999492322, - "grad_norm": 0.2991618514060974, - "learning_rate": 8.35808266700512e-06, - "loss": 0.0285, - "step": 68795 - }, - { - "epoch": 1.7464145196090874, - "grad_norm": 0.8376309871673584, - "learning_rate": 8.357236535939418e-06, - "loss": 0.0444, - "step": 68800 - }, - { - "epoch": 1.7465414392689427, - "grad_norm": 0.6593576669692993, - "learning_rate": 8.356390404873715e-06, - "loss": 0.052, - "step": 68805 - }, - { - "epoch": 1.7466683589287981, - "grad_norm": 0.43215084075927734, - "learning_rate": 8.355544273808013e-06, - "loss": 0.0378, - "step": 68810 - }, - { - "epoch": 1.7467952785886534, - "grad_norm": 0.7036052346229553, - "learning_rate": 8.354698142742312e-06, - "loss": 0.0365, - "step": 68815 - }, - { - "epoch": 1.7469221982485088, - "grad_norm": 0.41841375827789307, - "learning_rate": 8.35385201167661e-06, - "loss": 0.0458, - "step": 68820 - }, - { - "epoch": 1.747049117908364, - "grad_norm": 0.49730977416038513, - "learning_rate": 8.353005880610907e-06, - "loss": 0.0542, - "step": 68825 - }, - { - "epoch": 1.7471760375682193, - "grad_norm": 0.4872910678386688, - "learning_rate": 8.352159749545205e-06, - "loss": 0.0318, - "step": 68830 - }, - { - "epoch": 1.7473029572280745, - "grad_norm": 0.6466866135597229, - "learning_rate": 8.351313618479504e-06, - "loss": 0.0393, - "step": 68835 - }, - { - "epoch": 1.74742987688793, - "grad_norm": 0.3928212523460388, - "learning_rate": 8.350467487413802e-06, - "loss": 0.0358, - "step": 68840 - }, - { - "epoch": 1.7475567965477854, - "grad_norm": 0.6219093203544617, - "learning_rate": 8.349621356348099e-06, - "loss": 0.0444, - "step": 68845 - }, - { - "epoch": 1.7476837162076406, - "grad_norm": 0.8324838876724243, - "learning_rate": 8.348775225282397e-06, - "loss": 0.0397, - "step": 68850 - }, - { - "epoch": 1.7478106358674959, - "grad_norm": 0.5140394568443298, - "learning_rate": 8.347929094216694e-06, - "loss": 0.0555, - "step": 68855 - }, - { - "epoch": 1.747937555527351, - "grad_norm": 0.39236149191856384, - "learning_rate": 8.347082963150992e-06, - "loss": 0.0386, - "step": 68860 - }, - { - "epoch": 1.7480644751872065, - "grad_norm": 0.2722585201263428, - "learning_rate": 8.34623683208529e-06, - "loss": 0.034, - "step": 68865 - }, - { - "epoch": 1.7481913948470618, - "grad_norm": 0.366500586271286, - "learning_rate": 8.345390701019589e-06, - "loss": 0.0275, - "step": 68870 - }, - { - "epoch": 1.7483183145069172, - "grad_norm": 0.40456897020339966, - "learning_rate": 8.344544569953886e-06, - "loss": 0.0491, - "step": 68875 - }, - { - "epoch": 1.7484452341667724, - "grad_norm": 0.9731058478355408, - "learning_rate": 8.343698438888184e-06, - "loss": 0.0294, - "step": 68880 - }, - { - "epoch": 1.7485721538266277, - "grad_norm": 0.4320574104785919, - "learning_rate": 8.342852307822483e-06, - "loss": 0.043, - "step": 68885 - }, - { - "epoch": 1.748699073486483, - "grad_norm": 0.36212366819381714, - "learning_rate": 8.342006176756781e-06, - "loss": 0.0461, - "step": 68890 - }, - { - "epoch": 1.7488259931463384, - "grad_norm": 0.4030331075191498, - "learning_rate": 8.341160045691078e-06, - "loss": 0.0421, - "step": 68895 - }, - { - "epoch": 1.7489529128061938, - "grad_norm": 0.21215017139911652, - "learning_rate": 8.340313914625376e-06, - "loss": 0.0508, - "step": 68900 - }, - { - "epoch": 1.749079832466049, - "grad_norm": 0.6541277170181274, - "learning_rate": 8.339467783559675e-06, - "loss": 0.0465, - "step": 68905 - }, - { - "epoch": 1.7492067521259043, - "grad_norm": 0.4677080512046814, - "learning_rate": 8.338621652493973e-06, - "loss": 0.0289, - "step": 68910 - }, - { - "epoch": 1.7493336717857595, - "grad_norm": 0.5264366269111633, - "learning_rate": 8.33777552142827e-06, - "loss": 0.0277, - "step": 68915 - }, - { - "epoch": 1.749460591445615, - "grad_norm": 0.6588384509086609, - "learning_rate": 8.336929390362568e-06, - "loss": 0.0286, - "step": 68920 - }, - { - "epoch": 1.7495875111054704, - "grad_norm": 0.4117472767829895, - "learning_rate": 8.336083259296865e-06, - "loss": 0.0457, - "step": 68925 - }, - { - "epoch": 1.7497144307653256, - "grad_norm": 0.39143237471580505, - "learning_rate": 8.335237128231163e-06, - "loss": 0.0367, - "step": 68930 - }, - { - "epoch": 1.7498413504251809, - "grad_norm": 0.5064342021942139, - "learning_rate": 8.334390997165462e-06, - "loss": 0.0432, - "step": 68935 - }, - { - "epoch": 1.749968270085036, - "grad_norm": 0.4622403085231781, - "learning_rate": 8.33354486609976e-06, - "loss": 0.0384, - "step": 68940 - }, - { - "epoch": 1.7500951897448915, - "grad_norm": 1.198572039604187, - "learning_rate": 8.332698735034057e-06, - "loss": 0.0351, - "step": 68945 - }, - { - "epoch": 1.7502221094047468, - "grad_norm": 0.4125652611255646, - "learning_rate": 8.331852603968355e-06, - "loss": 0.0295, - "step": 68950 - }, - { - "epoch": 1.7503490290646022, - "grad_norm": 0.587165892124176, - "learning_rate": 8.331006472902653e-06, - "loss": 0.0261, - "step": 68955 - }, - { - "epoch": 1.7504759487244574, - "grad_norm": 0.5144180655479431, - "learning_rate": 8.330160341836952e-06, - "loss": 0.0428, - "step": 68960 - }, - { - "epoch": 1.7506028683843127, - "grad_norm": 0.6816078424453735, - "learning_rate": 8.329314210771249e-06, - "loss": 0.0423, - "step": 68965 - }, - { - "epoch": 1.750729788044168, - "grad_norm": 0.49700701236724854, - "learning_rate": 8.328468079705547e-06, - "loss": 0.0386, - "step": 68970 - }, - { - "epoch": 1.7508567077040234, - "grad_norm": 0.24700608849525452, - "learning_rate": 8.327621948639845e-06, - "loss": 0.0212, - "step": 68975 - }, - { - "epoch": 1.7509836273638788, - "grad_norm": 0.3755348026752472, - "learning_rate": 8.326775817574144e-06, - "loss": 0.0346, - "step": 68980 - }, - { - "epoch": 1.751110547023734, - "grad_norm": 0.3874184787273407, - "learning_rate": 8.32592968650844e-06, - "loss": 0.0403, - "step": 68985 - }, - { - "epoch": 1.7512374666835893, - "grad_norm": 0.5340925455093384, - "learning_rate": 8.325083555442739e-06, - "loss": 0.0407, - "step": 68990 - }, - { - "epoch": 1.7513643863434445, - "grad_norm": 0.37937453389167786, - "learning_rate": 8.324237424377036e-06, - "loss": 0.0398, - "step": 68995 - }, - { - "epoch": 1.7514913060033, - "grad_norm": 1.1553341150283813, - "learning_rate": 8.323391293311334e-06, - "loss": 0.0317, - "step": 69000 - }, - { - "epoch": 1.7516182256631552, - "grad_norm": 0.240932896733284, - "learning_rate": 8.322545162245632e-06, - "loss": 0.0394, - "step": 69005 - }, - { - "epoch": 1.7517451453230106, - "grad_norm": 0.6994168162345886, - "learning_rate": 8.32169903117993e-06, - "loss": 0.0444, - "step": 69010 - }, - { - "epoch": 1.7518720649828659, - "grad_norm": 0.3535151779651642, - "learning_rate": 8.320852900114228e-06, - "loss": 0.0356, - "step": 69015 - }, - { - "epoch": 1.751998984642721, - "grad_norm": 0.5617672801017761, - "learning_rate": 8.320006769048526e-06, - "loss": 0.0503, - "step": 69020 - }, - { - "epoch": 1.7521259043025763, - "grad_norm": 0.37788844108581543, - "learning_rate": 8.319160637982824e-06, - "loss": 0.0683, - "step": 69025 - }, - { - "epoch": 1.7522528239624318, - "grad_norm": 0.3941234052181244, - "learning_rate": 8.318314506917123e-06, - "loss": 0.0306, - "step": 69030 - }, - { - "epoch": 1.7523797436222872, - "grad_norm": 0.2726956009864807, - "learning_rate": 8.31746837585142e-06, - "loss": 0.0382, - "step": 69035 - }, - { - "epoch": 1.7525066632821424, - "grad_norm": 0.37414783239364624, - "learning_rate": 8.316622244785718e-06, - "loss": 0.0338, - "step": 69040 - }, - { - "epoch": 1.7526335829419977, - "grad_norm": 0.3044266104698181, - "learning_rate": 8.315776113720016e-06, - "loss": 0.0221, - "step": 69045 - }, - { - "epoch": 1.752760502601853, - "grad_norm": 0.3589276969432831, - "learning_rate": 8.314929982654315e-06, - "loss": 0.0386, - "step": 69050 - }, - { - "epoch": 1.7528874222617084, - "grad_norm": 0.4048697352409363, - "learning_rate": 8.314083851588611e-06, - "loss": 0.0355, - "step": 69055 - }, - { - "epoch": 1.7530143419215638, - "grad_norm": 0.35674893856048584, - "learning_rate": 8.31323772052291e-06, - "loss": 0.0463, - "step": 69060 - }, - { - "epoch": 1.753141261581419, - "grad_norm": 0.4069412350654602, - "learning_rate": 8.312391589457206e-06, - "loss": 0.0401, - "step": 69065 - }, - { - "epoch": 1.7532681812412743, - "grad_norm": 0.5125916004180908, - "learning_rate": 8.311545458391505e-06, - "loss": 0.0314, - "step": 69070 - }, - { - "epoch": 1.7533951009011295, - "grad_norm": 0.5154129266738892, - "learning_rate": 8.310699327325803e-06, - "loss": 0.0645, - "step": 69075 - }, - { - "epoch": 1.753522020560985, - "grad_norm": 0.3028571605682373, - "learning_rate": 8.309853196260102e-06, - "loss": 0.0422, - "step": 69080 - }, - { - "epoch": 1.7536489402208402, - "grad_norm": 0.9238010048866272, - "learning_rate": 8.309007065194398e-06, - "loss": 0.0696, - "step": 69085 - }, - { - "epoch": 1.7537758598806956, - "grad_norm": 0.29181063175201416, - "learning_rate": 8.308160934128697e-06, - "loss": 0.0383, - "step": 69090 - }, - { - "epoch": 1.7539027795405508, - "grad_norm": 0.7038292288780212, - "learning_rate": 8.307314803062995e-06, - "loss": 0.0541, - "step": 69095 - }, - { - "epoch": 1.754029699200406, - "grad_norm": 0.2706953287124634, - "learning_rate": 8.306468671997294e-06, - "loss": 0.0289, - "step": 69100 - }, - { - "epoch": 1.7541566188602613, - "grad_norm": 0.3438837230205536, - "learning_rate": 8.30562254093159e-06, - "loss": 0.0253, - "step": 69105 - }, - { - "epoch": 1.7542835385201168, - "grad_norm": 0.3112671971321106, - "learning_rate": 8.304776409865889e-06, - "loss": 0.0385, - "step": 69110 - }, - { - "epoch": 1.7544104581799722, - "grad_norm": 0.39236462116241455, - "learning_rate": 8.303930278800187e-06, - "loss": 0.0534, - "step": 69115 - }, - { - "epoch": 1.7545373778398274, - "grad_norm": 0.5588448643684387, - "learning_rate": 8.303084147734486e-06, - "loss": 0.0478, - "step": 69120 - }, - { - "epoch": 1.7546642974996827, - "grad_norm": 0.34803351759910583, - "learning_rate": 8.302238016668782e-06, - "loss": 0.061, - "step": 69125 - }, - { - "epoch": 1.754791217159538, - "grad_norm": 0.3202275037765503, - "learning_rate": 8.30139188560308e-06, - "loss": 0.0342, - "step": 69130 - }, - { - "epoch": 1.7549181368193933, - "grad_norm": 0.36695560812950134, - "learning_rate": 8.300545754537377e-06, - "loss": 0.0416, - "step": 69135 - }, - { - "epoch": 1.7550450564792488, - "grad_norm": 0.3306376338005066, - "learning_rate": 8.299699623471676e-06, - "loss": 0.042, - "step": 69140 - }, - { - "epoch": 1.755171976139104, - "grad_norm": 0.26811686158180237, - "learning_rate": 8.298853492405974e-06, - "loss": 0.0286, - "step": 69145 - }, - { - "epoch": 1.7552988957989593, - "grad_norm": 0.47243690490722656, - "learning_rate": 8.298007361340273e-06, - "loss": 0.0387, - "step": 69150 - }, - { - "epoch": 1.7554258154588145, - "grad_norm": 0.4231316149234772, - "learning_rate": 8.29716123027457e-06, - "loss": 0.0455, - "step": 69155 - }, - { - "epoch": 1.7555527351186697, - "grad_norm": 0.2906438112258911, - "learning_rate": 8.296315099208868e-06, - "loss": 0.0386, - "step": 69160 - }, - { - "epoch": 1.7556796547785252, - "grad_norm": 0.26042407751083374, - "learning_rate": 8.295468968143166e-06, - "loss": 0.0214, - "step": 69165 - }, - { - "epoch": 1.7558065744383806, - "grad_norm": 0.3839174807071686, - "learning_rate": 8.294622837077464e-06, - "loss": 0.0487, - "step": 69170 - }, - { - "epoch": 1.7559334940982358, - "grad_norm": 0.5281766653060913, - "learning_rate": 8.293776706011763e-06, - "loss": 0.0375, - "step": 69175 - }, - { - "epoch": 1.756060413758091, - "grad_norm": 0.23316040635108948, - "learning_rate": 8.29293057494606e-06, - "loss": 0.0315, - "step": 69180 - }, - { - "epoch": 1.7561873334179463, - "grad_norm": 0.37299856543540955, - "learning_rate": 8.292084443880358e-06, - "loss": 0.0343, - "step": 69185 - }, - { - "epoch": 1.7563142530778018, - "grad_norm": 0.5602080225944519, - "learning_rate": 8.291238312814656e-06, - "loss": 0.037, - "step": 69190 - }, - { - "epoch": 1.7564411727376572, - "grad_norm": 0.5453559160232544, - "learning_rate": 8.290392181748955e-06, - "loss": 0.0334, - "step": 69195 - }, - { - "epoch": 1.7565680923975124, - "grad_norm": 0.7340668439865112, - "learning_rate": 8.289546050683251e-06, - "loss": 0.0428, - "step": 69200 - }, - { - "epoch": 1.7566950120573677, - "grad_norm": 0.48727715015411377, - "learning_rate": 8.28869991961755e-06, - "loss": 0.0324, - "step": 69205 - }, - { - "epoch": 1.756821931717223, - "grad_norm": 0.7232717871665955, - "learning_rate": 8.287853788551847e-06, - "loss": 0.0601, - "step": 69210 - }, - { - "epoch": 1.7569488513770783, - "grad_norm": 0.4972955286502838, - "learning_rate": 8.287007657486145e-06, - "loss": 0.0478, - "step": 69215 - }, - { - "epoch": 1.7570757710369336, - "grad_norm": 0.5276387929916382, - "learning_rate": 8.286161526420443e-06, - "loss": 0.0487, - "step": 69220 - }, - { - "epoch": 1.757202690696789, - "grad_norm": 0.4990506172180176, - "learning_rate": 8.285315395354742e-06, - "loss": 0.0292, - "step": 69225 - }, - { - "epoch": 1.7573296103566443, - "grad_norm": 0.30739474296569824, - "learning_rate": 8.284469264289038e-06, - "loss": 0.0349, - "step": 69230 - }, - { - "epoch": 1.7574565300164995, - "grad_norm": 0.270055890083313, - "learning_rate": 8.283623133223337e-06, - "loss": 0.0241, - "step": 69235 - }, - { - "epoch": 1.7575834496763547, - "grad_norm": 0.3540394604206085, - "learning_rate": 8.282777002157635e-06, - "loss": 0.0478, - "step": 69240 - }, - { - "epoch": 1.7577103693362102, - "grad_norm": 0.4330662786960602, - "learning_rate": 8.281930871091934e-06, - "loss": 0.0514, - "step": 69245 - }, - { - "epoch": 1.7578372889960656, - "grad_norm": 0.4851292371749878, - "learning_rate": 8.28108474002623e-06, - "loss": 0.0542, - "step": 69250 - }, - { - "epoch": 1.7579642086559208, - "grad_norm": 0.5582454204559326, - "learning_rate": 8.280238608960529e-06, - "loss": 0.0526, - "step": 69255 - }, - { - "epoch": 1.758091128315776, - "grad_norm": 0.3525870740413666, - "learning_rate": 8.279392477894827e-06, - "loss": 0.0469, - "step": 69260 - }, - { - "epoch": 1.7582180479756313, - "grad_norm": 0.2986149191856384, - "learning_rate": 8.278546346829126e-06, - "loss": 0.0328, - "step": 69265 - }, - { - "epoch": 1.7583449676354868, - "grad_norm": 0.5536965131759644, - "learning_rate": 8.277700215763422e-06, - "loss": 0.0325, - "step": 69270 - }, - { - "epoch": 1.7584718872953422, - "grad_norm": 0.305147260427475, - "learning_rate": 8.27685408469772e-06, - "loss": 0.029, - "step": 69275 - }, - { - "epoch": 1.7585988069551974, - "grad_norm": 0.3833218514919281, - "learning_rate": 8.276007953632017e-06, - "loss": 0.0404, - "step": 69280 - }, - { - "epoch": 1.7587257266150527, - "grad_norm": 0.4693339765071869, - "learning_rate": 8.275161822566316e-06, - "loss": 0.0266, - "step": 69285 - }, - { - "epoch": 1.758852646274908, - "grad_norm": 0.658373236656189, - "learning_rate": 8.274315691500614e-06, - "loss": 0.0398, - "step": 69290 - }, - { - "epoch": 1.7589795659347633, - "grad_norm": 0.3469555377960205, - "learning_rate": 8.273469560434913e-06, - "loss": 0.039, - "step": 69295 - }, - { - "epoch": 1.7591064855946186, - "grad_norm": 0.5092095732688904, - "learning_rate": 8.27262342936921e-06, - "loss": 0.0441, - "step": 69300 - }, - { - "epoch": 1.759233405254474, - "grad_norm": 0.3068762719631195, - "learning_rate": 8.271777298303508e-06, - "loss": 0.0252, - "step": 69305 - }, - { - "epoch": 1.7593603249143293, - "grad_norm": 0.4438513517379761, - "learning_rate": 8.270931167237806e-06, - "loss": 0.0357, - "step": 69310 - }, - { - "epoch": 1.7594872445741845, - "grad_norm": 0.6382645964622498, - "learning_rate": 8.270085036172105e-06, - "loss": 0.0456, - "step": 69315 - }, - { - "epoch": 1.7596141642340397, - "grad_norm": 0.3316819667816162, - "learning_rate": 8.269238905106401e-06, - "loss": 0.0389, - "step": 69320 - }, - { - "epoch": 1.7597410838938952, - "grad_norm": 0.6160081028938293, - "learning_rate": 8.2683927740407e-06, - "loss": 0.0621, - "step": 69325 - }, - { - "epoch": 1.7598680035537506, - "grad_norm": 0.4836024343967438, - "learning_rate": 8.267546642974998e-06, - "loss": 0.0396, - "step": 69330 - }, - { - "epoch": 1.7599949232136058, - "grad_norm": 0.716277539730072, - "learning_rate": 8.266700511909296e-06, - "loss": 0.0452, - "step": 69335 - }, - { - "epoch": 1.760121842873461, - "grad_norm": 0.518458366394043, - "learning_rate": 8.265854380843593e-06, - "loss": 0.0271, - "step": 69340 - }, - { - "epoch": 1.7602487625333163, - "grad_norm": 0.3817666471004486, - "learning_rate": 8.265008249777892e-06, - "loss": 0.046, - "step": 69345 - }, - { - "epoch": 1.7603756821931718, - "grad_norm": 0.4785386919975281, - "learning_rate": 8.264162118712188e-06, - "loss": 0.0326, - "step": 69350 - }, - { - "epoch": 1.760502601853027, - "grad_norm": 0.4483630955219269, - "learning_rate": 8.263315987646487e-06, - "loss": 0.0474, - "step": 69355 - }, - { - "epoch": 1.7606295215128824, - "grad_norm": 0.3891487419605255, - "learning_rate": 8.262469856580785e-06, - "loss": 0.0526, - "step": 69360 - }, - { - "epoch": 1.7607564411727377, - "grad_norm": 0.5106373429298401, - "learning_rate": 8.261623725515083e-06, - "loss": 0.0523, - "step": 69365 - }, - { - "epoch": 1.760883360832593, - "grad_norm": 0.40804094076156616, - "learning_rate": 8.26077759444938e-06, - "loss": 0.0237, - "step": 69370 - }, - { - "epoch": 1.7610102804924481, - "grad_norm": 0.41930529475212097, - "learning_rate": 8.259931463383679e-06, - "loss": 0.0269, - "step": 69375 - }, - { - "epoch": 1.7611372001523036, - "grad_norm": 0.35108762979507446, - "learning_rate": 8.259085332317977e-06, - "loss": 0.0406, - "step": 69380 - }, - { - "epoch": 1.761264119812159, - "grad_norm": 0.3144512474536896, - "learning_rate": 8.258239201252275e-06, - "loss": 0.0294, - "step": 69385 - }, - { - "epoch": 1.7613910394720143, - "grad_norm": 0.27526867389678955, - "learning_rate": 8.257393070186572e-06, - "loss": 0.0342, - "step": 69390 - }, - { - "epoch": 1.7615179591318695, - "grad_norm": 0.5940231084823608, - "learning_rate": 8.25654693912087e-06, - "loss": 0.0373, - "step": 69395 - }, - { - "epoch": 1.7616448787917247, - "grad_norm": 0.26531514525413513, - "learning_rate": 8.255700808055169e-06, - "loss": 0.0313, - "step": 69400 - }, - { - "epoch": 1.7617717984515802, - "grad_norm": 0.37138256430625916, - "learning_rate": 8.254854676989467e-06, - "loss": 0.0653, - "step": 69405 - }, - { - "epoch": 1.7618987181114356, - "grad_norm": 0.37102460861206055, - "learning_rate": 8.254008545923764e-06, - "loss": 0.0336, - "step": 69410 - }, - { - "epoch": 1.7620256377712908, - "grad_norm": 0.422712117433548, - "learning_rate": 8.253162414858062e-06, - "loss": 0.0484, - "step": 69415 - }, - { - "epoch": 1.762152557431146, - "grad_norm": 0.4739077091217041, - "learning_rate": 8.252316283792359e-06, - "loss": 0.0395, - "step": 69420 - }, - { - "epoch": 1.7622794770910013, - "grad_norm": 0.4275156557559967, - "learning_rate": 8.251470152726658e-06, - "loss": 0.0287, - "step": 69425 - }, - { - "epoch": 1.7624063967508568, - "grad_norm": 0.3092404007911682, - "learning_rate": 8.250624021660956e-06, - "loss": 0.031, - "step": 69430 - }, - { - "epoch": 1.762533316410712, - "grad_norm": 2.227567195892334, - "learning_rate": 8.249777890595254e-06, - "loss": 0.032, - "step": 69435 - }, - { - "epoch": 1.7626602360705674, - "grad_norm": 0.2977258265018463, - "learning_rate": 8.248931759529551e-06, - "loss": 0.024, - "step": 69440 - }, - { - "epoch": 1.7627871557304227, - "grad_norm": 0.4248685836791992, - "learning_rate": 8.24808562846385e-06, - "loss": 0.0369, - "step": 69445 - }, - { - "epoch": 1.762914075390278, - "grad_norm": 0.42066138982772827, - "learning_rate": 8.247239497398148e-06, - "loss": 0.0341, - "step": 69450 - }, - { - "epoch": 1.7630409950501331, - "grad_norm": 0.7095091342926025, - "learning_rate": 8.246393366332446e-06, - "loss": 0.0334, - "step": 69455 - }, - { - "epoch": 1.7631679147099886, - "grad_norm": 0.3925437331199646, - "learning_rate": 8.245547235266743e-06, - "loss": 0.0434, - "step": 69460 - }, - { - "epoch": 1.763294834369844, - "grad_norm": 0.5082677602767944, - "learning_rate": 8.244701104201041e-06, - "loss": 0.0406, - "step": 69465 - }, - { - "epoch": 1.7634217540296993, - "grad_norm": 0.4444241225719452, - "learning_rate": 8.24385497313534e-06, - "loss": 0.0354, - "step": 69470 - }, - { - "epoch": 1.7635486736895545, - "grad_norm": 0.4715871214866638, - "learning_rate": 8.243008842069638e-06, - "loss": 0.0371, - "step": 69475 - }, - { - "epoch": 1.7636755933494097, - "grad_norm": 0.292479544878006, - "learning_rate": 8.242162711003935e-06, - "loss": 0.0237, - "step": 69480 - }, - { - "epoch": 1.7638025130092652, - "grad_norm": 0.3765997886657715, - "learning_rate": 8.241316579938233e-06, - "loss": 0.0338, - "step": 69485 - }, - { - "epoch": 1.7639294326691206, - "grad_norm": 0.39980557560920715, - "learning_rate": 8.24047044887253e-06, - "loss": 0.0392, - "step": 69490 - }, - { - "epoch": 1.7640563523289758, - "grad_norm": 0.35628628730773926, - "learning_rate": 8.239624317806828e-06, - "loss": 0.044, - "step": 69495 - }, - { - "epoch": 1.764183271988831, - "grad_norm": 0.7192484140396118, - "learning_rate": 8.238778186741127e-06, - "loss": 0.0375, - "step": 69500 - }, - { - "epoch": 1.7643101916486863, - "grad_norm": 0.6455184817314148, - "learning_rate": 8.237932055675425e-06, - "loss": 0.0316, - "step": 69505 - }, - { - "epoch": 1.7644371113085415, - "grad_norm": 0.416820764541626, - "learning_rate": 8.237085924609722e-06, - "loss": 0.0337, - "step": 69510 - }, - { - "epoch": 1.764564030968397, - "grad_norm": 0.3320479094982147, - "learning_rate": 8.23623979354402e-06, - "loss": 0.0352, - "step": 69515 - }, - { - "epoch": 1.7646909506282524, - "grad_norm": 1.140215516090393, - "learning_rate": 8.235393662478319e-06, - "loss": 0.0428, - "step": 69520 - }, - { - "epoch": 1.7648178702881077, - "grad_norm": 0.34476444125175476, - "learning_rate": 8.234547531412617e-06, - "loss": 0.0344, - "step": 69525 - }, - { - "epoch": 1.7649447899479629, - "grad_norm": 0.3632010221481323, - "learning_rate": 8.233701400346914e-06, - "loss": 0.0343, - "step": 69530 - }, - { - "epoch": 1.7650717096078181, - "grad_norm": 0.4061678647994995, - "learning_rate": 8.232855269281212e-06, - "loss": 0.0448, - "step": 69535 - }, - { - "epoch": 1.7651986292676736, - "grad_norm": 0.8105491399765015, - "learning_rate": 8.23200913821551e-06, - "loss": 0.0296, - "step": 69540 - }, - { - "epoch": 1.765325548927529, - "grad_norm": 0.49731746315956116, - "learning_rate": 8.231163007149809e-06, - "loss": 0.0501, - "step": 69545 - }, - { - "epoch": 1.7654524685873842, - "grad_norm": 0.26151928305625916, - "learning_rate": 8.230316876084106e-06, - "loss": 0.0337, - "step": 69550 - }, - { - "epoch": 1.7655793882472395, - "grad_norm": 1.3502308130264282, - "learning_rate": 8.229470745018404e-06, - "loss": 0.0311, - "step": 69555 - }, - { - "epoch": 1.7657063079070947, - "grad_norm": 0.7211322784423828, - "learning_rate": 8.2286246139527e-06, - "loss": 0.0426, - "step": 69560 - }, - { - "epoch": 1.7658332275669502, - "grad_norm": 0.570098876953125, - "learning_rate": 8.227778482887e-06, - "loss": 0.0291, - "step": 69565 - }, - { - "epoch": 1.7659601472268054, - "grad_norm": 0.6292459964752197, - "learning_rate": 8.226932351821298e-06, - "loss": 0.0427, - "step": 69570 - }, - { - "epoch": 1.7660870668866608, - "grad_norm": 0.6615784764289856, - "learning_rate": 8.226086220755596e-06, - "loss": 0.0549, - "step": 69575 - }, - { - "epoch": 1.766213986546516, - "grad_norm": 0.42173153162002563, - "learning_rate": 8.225240089689893e-06, - "loss": 0.0388, - "step": 69580 - }, - { - "epoch": 1.7663409062063713, - "grad_norm": 0.27700358629226685, - "learning_rate": 8.224393958624191e-06, - "loss": 0.0325, - "step": 69585 - }, - { - "epoch": 1.7664678258662265, - "grad_norm": 0.3031492531299591, - "learning_rate": 8.22354782755849e-06, - "loss": 0.0368, - "step": 69590 - }, - { - "epoch": 1.766594745526082, - "grad_norm": 0.4325595200061798, - "learning_rate": 8.222701696492788e-06, - "loss": 0.0352, - "step": 69595 - }, - { - "epoch": 1.7667216651859374, - "grad_norm": 0.3297279477119446, - "learning_rate": 8.221855565427085e-06, - "loss": 0.0343, - "step": 69600 - }, - { - "epoch": 1.7668485848457927, - "grad_norm": 0.7050110697746277, - "learning_rate": 8.221009434361383e-06, - "loss": 0.0416, - "step": 69605 - }, - { - "epoch": 1.7669755045056479, - "grad_norm": 0.6347429752349854, - "learning_rate": 8.220163303295681e-06, - "loss": 0.0298, - "step": 69610 - }, - { - "epoch": 1.7671024241655031, - "grad_norm": 0.5708776712417603, - "learning_rate": 8.21931717222998e-06, - "loss": 0.0365, - "step": 69615 - }, - { - "epoch": 1.7672293438253586, - "grad_norm": 0.49624764919281006, - "learning_rate": 8.218471041164277e-06, - "loss": 0.0551, - "step": 69620 - }, - { - "epoch": 1.767356263485214, - "grad_norm": 0.4786580502986908, - "learning_rate": 8.217624910098575e-06, - "loss": 0.0472, - "step": 69625 - }, - { - "epoch": 1.7674831831450692, - "grad_norm": 0.46171995997428894, - "learning_rate": 8.216778779032872e-06, - "loss": 0.0288, - "step": 69630 - }, - { - "epoch": 1.7676101028049245, - "grad_norm": 0.3463570475578308, - "learning_rate": 8.21593264796717e-06, - "loss": 0.0268, - "step": 69635 - }, - { - "epoch": 1.7677370224647797, - "grad_norm": 0.3689161241054535, - "learning_rate": 8.215086516901468e-06, - "loss": 0.0382, - "step": 69640 - }, - { - "epoch": 1.7678639421246352, - "grad_norm": 1.0751612186431885, - "learning_rate": 8.214240385835767e-06, - "loss": 0.0265, - "step": 69645 - }, - { - "epoch": 1.7679908617844904, - "grad_norm": 0.5610299110412598, - "learning_rate": 8.213394254770064e-06, - "loss": 0.0437, - "step": 69650 - }, - { - "epoch": 1.7681177814443458, - "grad_norm": 0.5853758454322815, - "learning_rate": 8.212548123704362e-06, - "loss": 0.0348, - "step": 69655 - }, - { - "epoch": 1.768244701104201, - "grad_norm": 1.4965475797653198, - "learning_rate": 8.21170199263866e-06, - "loss": 0.0425, - "step": 69660 - }, - { - "epoch": 1.7683716207640563, - "grad_norm": 0.6118372082710266, - "learning_rate": 8.210855861572959e-06, - "loss": 0.0455, - "step": 69665 - }, - { - "epoch": 1.7684985404239115, - "grad_norm": 0.4745662808418274, - "learning_rate": 8.210009730507256e-06, - "loss": 0.0285, - "step": 69670 - }, - { - "epoch": 1.768625460083767, - "grad_norm": 0.3280279040336609, - "learning_rate": 8.209163599441554e-06, - "loss": 0.0349, - "step": 69675 - }, - { - "epoch": 1.7687523797436224, - "grad_norm": 0.42674720287323, - "learning_rate": 8.208317468375852e-06, - "loss": 0.0393, - "step": 69680 - }, - { - "epoch": 1.7688792994034777, - "grad_norm": 0.7291773557662964, - "learning_rate": 8.20747133731015e-06, - "loss": 0.0527, - "step": 69685 - }, - { - "epoch": 1.7690062190633329, - "grad_norm": 0.34540945291519165, - "learning_rate": 8.206625206244449e-06, - "loss": 0.0407, - "step": 69690 - }, - { - "epoch": 1.7691331387231881, - "grad_norm": 0.4546838402748108, - "learning_rate": 8.205779075178746e-06, - "loss": 0.0418, - "step": 69695 - }, - { - "epoch": 1.7692600583830436, - "grad_norm": 0.2727998197078705, - "learning_rate": 8.204932944113044e-06, - "loss": 0.0419, - "step": 69700 - }, - { - "epoch": 1.7693869780428988, - "grad_norm": 0.9456978440284729, - "learning_rate": 8.204086813047341e-06, - "loss": 0.031, - "step": 69705 - }, - { - "epoch": 1.7695138977027542, - "grad_norm": 0.22618837654590607, - "learning_rate": 8.20324068198164e-06, - "loss": 0.0273, - "step": 69710 - }, - { - "epoch": 1.7696408173626095, - "grad_norm": 0.4109944999217987, - "learning_rate": 8.202394550915938e-06, - "loss": 0.0379, - "step": 69715 - }, - { - "epoch": 1.7697677370224647, - "grad_norm": 0.9685548543930054, - "learning_rate": 8.201548419850236e-06, - "loss": 0.0394, - "step": 69720 - }, - { - "epoch": 1.76989465668232, - "grad_norm": 0.46867072582244873, - "learning_rate": 8.200702288784533e-06, - "loss": 0.0451, - "step": 69725 - }, - { - "epoch": 1.7700215763421754, - "grad_norm": 0.4498618245124817, - "learning_rate": 8.199856157718831e-06, - "loss": 0.0283, - "step": 69730 - }, - { - "epoch": 1.7701484960020308, - "grad_norm": 0.6627779603004456, - "learning_rate": 8.19901002665313e-06, - "loss": 0.0427, - "step": 69735 - }, - { - "epoch": 1.770275415661886, - "grad_norm": 0.5675529837608337, - "learning_rate": 8.198163895587428e-06, - "loss": 0.0316, - "step": 69740 - }, - { - "epoch": 1.7704023353217413, - "grad_norm": 0.4448276460170746, - "learning_rate": 8.197317764521725e-06, - "loss": 0.025, - "step": 69745 - }, - { - "epoch": 1.7705292549815965, - "grad_norm": 0.33613961935043335, - "learning_rate": 8.196471633456023e-06, - "loss": 0.0286, - "step": 69750 - }, - { - "epoch": 1.770656174641452, - "grad_norm": 0.36363568902015686, - "learning_rate": 8.195625502390322e-06, - "loss": 0.0478, - "step": 69755 - }, - { - "epoch": 1.7707830943013074, - "grad_norm": 0.45522433519363403, - "learning_rate": 8.19477937132462e-06, - "loss": 0.0456, - "step": 69760 - }, - { - "epoch": 1.7709100139611627, - "grad_norm": 0.4297570288181305, - "learning_rate": 8.193933240258917e-06, - "loss": 0.0441, - "step": 69765 - }, - { - "epoch": 1.7710369336210179, - "grad_norm": 0.5973749160766602, - "learning_rate": 8.193087109193215e-06, - "loss": 0.0393, - "step": 69770 - }, - { - "epoch": 1.7711638532808731, - "grad_norm": 0.20306281745433807, - "learning_rate": 8.192240978127512e-06, - "loss": 0.0294, - "step": 69775 - }, - { - "epoch": 1.7712907729407286, - "grad_norm": 0.39405056834220886, - "learning_rate": 8.19139484706181e-06, - "loss": 0.0308, - "step": 69780 - }, - { - "epoch": 1.7714176926005838, - "grad_norm": 0.41200125217437744, - "learning_rate": 8.190548715996109e-06, - "loss": 0.0499, - "step": 69785 - }, - { - "epoch": 1.7715446122604392, - "grad_norm": 0.9105291366577148, - "learning_rate": 8.189702584930407e-06, - "loss": 0.0569, - "step": 69790 - }, - { - "epoch": 1.7716715319202945, - "grad_norm": 0.5348845720291138, - "learning_rate": 8.188856453864704e-06, - "loss": 0.0307, - "step": 69795 - }, - { - "epoch": 1.7717984515801497, - "grad_norm": 0.25718992948532104, - "learning_rate": 8.188010322799002e-06, - "loss": 0.0346, - "step": 69800 - }, - { - "epoch": 1.771925371240005, - "grad_norm": 0.5095730423927307, - "learning_rate": 8.1871641917333e-06, - "loss": 0.0456, - "step": 69805 - }, - { - "epoch": 1.7720522908998604, - "grad_norm": 0.34774741530418396, - "learning_rate": 8.186318060667599e-06, - "loss": 0.0422, - "step": 69810 - }, - { - "epoch": 1.7721792105597158, - "grad_norm": 0.520911693572998, - "learning_rate": 8.185471929601896e-06, - "loss": 0.0346, - "step": 69815 - }, - { - "epoch": 1.772306130219571, - "grad_norm": 0.34353986382484436, - "learning_rate": 8.184625798536194e-06, - "loss": 0.0393, - "step": 69820 - }, - { - "epoch": 1.7724330498794263, - "grad_norm": 0.27931085228919983, - "learning_rate": 8.183779667470492e-06, - "loss": 0.0447, - "step": 69825 - }, - { - "epoch": 1.7725599695392815, - "grad_norm": 0.4485604166984558, - "learning_rate": 8.18293353640479e-06, - "loss": 0.038, - "step": 69830 - }, - { - "epoch": 1.772686889199137, - "grad_norm": 0.513897716999054, - "learning_rate": 8.182087405339088e-06, - "loss": 0.0271, - "step": 69835 - }, - { - "epoch": 1.7728138088589924, - "grad_norm": 0.5931534171104431, - "learning_rate": 8.181241274273386e-06, - "loss": 0.0248, - "step": 69840 - }, - { - "epoch": 1.7729407285188477, - "grad_norm": 0.38315194845199585, - "learning_rate": 8.180395143207683e-06, - "loss": 0.0272, - "step": 69845 - }, - { - "epoch": 1.7730676481787029, - "grad_norm": 0.24745292961597443, - "learning_rate": 8.179549012141981e-06, - "loss": 0.0356, - "step": 69850 - }, - { - "epoch": 1.7731945678385581, - "grad_norm": 0.3309295177459717, - "learning_rate": 8.17870288107628e-06, - "loss": 0.0258, - "step": 69855 - }, - { - "epoch": 1.7733214874984133, - "grad_norm": 0.5221357941627502, - "learning_rate": 8.177856750010578e-06, - "loss": 0.0345, - "step": 69860 - }, - { - "epoch": 1.7734484071582688, - "grad_norm": 0.6552788019180298, - "learning_rate": 8.177010618944875e-06, - "loss": 0.0493, - "step": 69865 - }, - { - "epoch": 1.7735753268181242, - "grad_norm": 0.48274633288383484, - "learning_rate": 8.176164487879173e-06, - "loss": 0.0248, - "step": 69870 - }, - { - "epoch": 1.7737022464779795, - "grad_norm": 0.5915929675102234, - "learning_rate": 8.175318356813471e-06, - "loss": 0.0237, - "step": 69875 - }, - { - "epoch": 1.7738291661378347, - "grad_norm": 0.5933741927146912, - "learning_rate": 8.17447222574777e-06, - "loss": 0.0425, - "step": 69880 - }, - { - "epoch": 1.77395608579769, - "grad_norm": 0.607947587966919, - "learning_rate": 8.173626094682066e-06, - "loss": 0.0279, - "step": 69885 - }, - { - "epoch": 1.7740830054575454, - "grad_norm": 0.5264747142791748, - "learning_rate": 8.172779963616365e-06, - "loss": 0.0357, - "step": 69890 - }, - { - "epoch": 1.7742099251174008, - "grad_norm": 0.3661070764064789, - "learning_rate": 8.171933832550663e-06, - "loss": 0.0449, - "step": 69895 - }, - { - "epoch": 1.774336844777256, - "grad_norm": 0.7438878417015076, - "learning_rate": 8.171087701484962e-06, - "loss": 0.0382, - "step": 69900 - }, - { - "epoch": 1.7744637644371113, - "grad_norm": 0.36169829964637756, - "learning_rate": 8.170241570419258e-06, - "loss": 0.0431, - "step": 69905 - }, - { - "epoch": 1.7745906840969665, - "grad_norm": 0.5047697424888611, - "learning_rate": 8.169395439353557e-06, - "loss": 0.0291, - "step": 69910 - }, - { - "epoch": 1.774717603756822, - "grad_norm": 0.4185803234577179, - "learning_rate": 8.168549308287854e-06, - "loss": 0.0303, - "step": 69915 - }, - { - "epoch": 1.7748445234166772, - "grad_norm": 0.9711556434631348, - "learning_rate": 8.167703177222152e-06, - "loss": 0.0444, - "step": 69920 - }, - { - "epoch": 1.7749714430765327, - "grad_norm": 0.47670280933380127, - "learning_rate": 8.16685704615645e-06, - "loss": 0.0319, - "step": 69925 - }, - { - "epoch": 1.7750983627363879, - "grad_norm": 0.2874036133289337, - "learning_rate": 8.166010915090749e-06, - "loss": 0.0285, - "step": 69930 - }, - { - "epoch": 1.775225282396243, - "grad_norm": 0.5589892268180847, - "learning_rate": 8.165164784025045e-06, - "loss": 0.0276, - "step": 69935 - }, - { - "epoch": 1.7753522020560983, - "grad_norm": 0.4623180329799652, - "learning_rate": 8.164318652959344e-06, - "loss": 0.0448, - "step": 69940 - }, - { - "epoch": 1.7754791217159538, - "grad_norm": 0.3583493232727051, - "learning_rate": 8.163472521893642e-06, - "loss": 0.0464, - "step": 69945 - }, - { - "epoch": 1.7756060413758092, - "grad_norm": 0.2713697552680969, - "learning_rate": 8.16262639082794e-06, - "loss": 0.0336, - "step": 69950 - }, - { - "epoch": 1.7757329610356645, - "grad_norm": 0.47280585765838623, - "learning_rate": 8.161780259762237e-06, - "loss": 0.0621, - "step": 69955 - }, - { - "epoch": 1.7758598806955197, - "grad_norm": 0.3393082618713379, - "learning_rate": 8.160934128696536e-06, - "loss": 0.0394, - "step": 69960 - }, - { - "epoch": 1.775986800355375, - "grad_norm": 0.3621320128440857, - "learning_rate": 8.160087997630834e-06, - "loss": 0.047, - "step": 69965 - }, - { - "epoch": 1.7761137200152304, - "grad_norm": 0.38512855768203735, - "learning_rate": 8.159241866565133e-06, - "loss": 0.0353, - "step": 69970 - }, - { - "epoch": 1.7762406396750858, - "grad_norm": 0.4747893810272217, - "learning_rate": 8.15839573549943e-06, - "loss": 0.0544, - "step": 69975 - }, - { - "epoch": 1.776367559334941, - "grad_norm": 0.5185352563858032, - "learning_rate": 8.157549604433728e-06, - "loss": 0.0374, - "step": 69980 - }, - { - "epoch": 1.7764944789947963, - "grad_norm": 1.337363362312317, - "learning_rate": 8.156703473368024e-06, - "loss": 0.0729, - "step": 69985 - }, - { - "epoch": 1.7766213986546515, - "grad_norm": 0.34946364164352417, - "learning_rate": 8.155857342302323e-06, - "loss": 0.0298, - "step": 69990 - }, - { - "epoch": 1.776748318314507, - "grad_norm": 0.39580631256103516, - "learning_rate": 8.155011211236621e-06, - "loss": 0.0252, - "step": 69995 - }, - { - "epoch": 1.7768752379743622, - "grad_norm": 0.5099133849143982, - "learning_rate": 8.15416508017092e-06, - "loss": 0.0621, - "step": 70000 - }, - { - "epoch": 1.7770021576342176, - "grad_norm": 0.3238312900066376, - "learning_rate": 8.153318949105216e-06, - "loss": 0.027, - "step": 70005 - }, - { - "epoch": 1.7771290772940729, - "grad_norm": 0.5126914978027344, - "learning_rate": 8.152472818039515e-06, - "loss": 0.0487, - "step": 70010 - }, - { - "epoch": 1.777255996953928, - "grad_norm": 0.268119752407074, - "learning_rate": 8.151626686973813e-06, - "loss": 0.0265, - "step": 70015 - }, - { - "epoch": 1.7773829166137833, - "grad_norm": 0.37613558769226074, - "learning_rate": 8.150780555908111e-06, - "loss": 0.0452, - "step": 70020 - }, - { - "epoch": 1.7775098362736388, - "grad_norm": 0.4861273765563965, - "learning_rate": 8.149934424842408e-06, - "loss": 0.0451, - "step": 70025 - }, - { - "epoch": 1.7776367559334942, - "grad_norm": 0.5824633836746216, - "learning_rate": 8.149088293776707e-06, - "loss": 0.0517, - "step": 70030 - }, - { - "epoch": 1.7777636755933495, - "grad_norm": 2.4078550338745117, - "learning_rate": 8.148242162711005e-06, - "loss": 0.0769, - "step": 70035 - }, - { - "epoch": 1.7778905952532047, - "grad_norm": 0.438077837228775, - "learning_rate": 8.147396031645303e-06, - "loss": 0.0361, - "step": 70040 - }, - { - "epoch": 1.77801751491306, - "grad_norm": 0.3750423491001129, - "learning_rate": 8.1465499005796e-06, - "loss": 0.0378, - "step": 70045 - }, - { - "epoch": 1.7781444345729154, - "grad_norm": 0.5699222087860107, - "learning_rate": 8.145703769513898e-06, - "loss": 0.0289, - "step": 70050 - }, - { - "epoch": 1.7782713542327706, - "grad_norm": 0.3518058657646179, - "learning_rate": 8.144857638448195e-06, - "loss": 0.036, - "step": 70055 - }, - { - "epoch": 1.778398273892626, - "grad_norm": 0.42587560415267944, - "learning_rate": 8.144011507382494e-06, - "loss": 0.0433, - "step": 70060 - }, - { - "epoch": 1.7785251935524813, - "grad_norm": 0.8676615357398987, - "learning_rate": 8.143165376316792e-06, - "loss": 0.0448, - "step": 70065 - }, - { - "epoch": 1.7786521132123365, - "grad_norm": 0.25668153166770935, - "learning_rate": 8.14231924525109e-06, - "loss": 0.0215, - "step": 70070 - }, - { - "epoch": 1.7787790328721917, - "grad_norm": 0.3112512230873108, - "learning_rate": 8.141473114185387e-06, - "loss": 0.0456, - "step": 70075 - }, - { - "epoch": 1.7789059525320472, - "grad_norm": 0.501594603061676, - "learning_rate": 8.140626983119686e-06, - "loss": 0.0502, - "step": 70080 - }, - { - "epoch": 1.7790328721919026, - "grad_norm": 0.250700443983078, - "learning_rate": 8.139780852053984e-06, - "loss": 0.0235, - "step": 70085 - }, - { - "epoch": 1.7791597918517579, - "grad_norm": 0.28784486651420593, - "learning_rate": 8.138934720988282e-06, - "loss": 0.0279, - "step": 70090 - }, - { - "epoch": 1.779286711511613, - "grad_norm": 0.20001453161239624, - "learning_rate": 8.138088589922579e-06, - "loss": 0.036, - "step": 70095 - }, - { - "epoch": 1.7794136311714683, - "grad_norm": 0.5380121469497681, - "learning_rate": 8.137242458856877e-06, - "loss": 0.0377, - "step": 70100 - }, - { - "epoch": 1.7795405508313238, - "grad_norm": 0.34635505080223083, - "learning_rate": 8.136396327791176e-06, - "loss": 0.0284, - "step": 70105 - }, - { - "epoch": 1.7796674704911792, - "grad_norm": 0.5090821385383606, - "learning_rate": 8.135550196725474e-06, - "loss": 0.0288, - "step": 70110 - }, - { - "epoch": 1.7797943901510345, - "grad_norm": 0.40860262513160706, - "learning_rate": 8.134704065659771e-06, - "loss": 0.0208, - "step": 70115 - }, - { - "epoch": 1.7799213098108897, - "grad_norm": 0.41193878650665283, - "learning_rate": 8.13385793459407e-06, - "loss": 0.0266, - "step": 70120 - }, - { - "epoch": 1.780048229470745, - "grad_norm": 0.570427656173706, - "learning_rate": 8.133011803528366e-06, - "loss": 0.0384, - "step": 70125 - }, - { - "epoch": 1.7801751491306004, - "grad_norm": 0.8358748555183411, - "learning_rate": 8.132165672462664e-06, - "loss": 0.0319, - "step": 70130 - }, - { - "epoch": 1.7803020687904556, - "grad_norm": 0.5354873538017273, - "learning_rate": 8.131319541396963e-06, - "loss": 0.0616, - "step": 70135 - }, - { - "epoch": 1.780428988450311, - "grad_norm": 0.3037182092666626, - "learning_rate": 8.130473410331261e-06, - "loss": 0.0319, - "step": 70140 - }, - { - "epoch": 1.7805559081101663, - "grad_norm": 0.4913147985935211, - "learning_rate": 8.129627279265558e-06, - "loss": 0.0582, - "step": 70145 - }, - { - "epoch": 1.7806828277700215, - "grad_norm": 0.31541526317596436, - "learning_rate": 8.128781148199856e-06, - "loss": 0.0457, - "step": 70150 - }, - { - "epoch": 1.7808097474298767, - "grad_norm": 0.8901238441467285, - "learning_rate": 8.127935017134155e-06, - "loss": 0.0329, - "step": 70155 - }, - { - "epoch": 1.7809366670897322, - "grad_norm": 0.8764209747314453, - "learning_rate": 8.127088886068453e-06, - "loss": 0.0391, - "step": 70160 - }, - { - "epoch": 1.7810635867495876, - "grad_norm": 0.6074627041816711, - "learning_rate": 8.12624275500275e-06, - "loss": 0.0455, - "step": 70165 - }, - { - "epoch": 1.7811905064094429, - "grad_norm": 0.40889331698417664, - "learning_rate": 8.125396623937048e-06, - "loss": 0.0276, - "step": 70170 - }, - { - "epoch": 1.781317426069298, - "grad_norm": 0.5502198934555054, - "learning_rate": 8.124550492871347e-06, - "loss": 0.0398, - "step": 70175 - }, - { - "epoch": 1.7814443457291533, - "grad_norm": 0.7143648266792297, - "learning_rate": 8.123704361805645e-06, - "loss": 0.0318, - "step": 70180 - }, - { - "epoch": 1.7815712653890088, - "grad_norm": 0.39474961161613464, - "learning_rate": 8.122858230739942e-06, - "loss": 0.0194, - "step": 70185 - }, - { - "epoch": 1.781698185048864, - "grad_norm": 0.49188658595085144, - "learning_rate": 8.12201209967424e-06, - "loss": 0.0406, - "step": 70190 - }, - { - "epoch": 1.7818251047087195, - "grad_norm": 0.3187478482723236, - "learning_rate": 8.121165968608539e-06, - "loss": 0.0264, - "step": 70195 - }, - { - "epoch": 1.7819520243685747, - "grad_norm": 0.5273995995521545, - "learning_rate": 8.120319837542835e-06, - "loss": 0.0482, - "step": 70200 - }, - { - "epoch": 1.78207894402843, - "grad_norm": 0.47301122546195984, - "learning_rate": 8.119473706477134e-06, - "loss": 0.041, - "step": 70205 - }, - { - "epoch": 1.7822058636882852, - "grad_norm": 0.3014694154262543, - "learning_rate": 8.118627575411432e-06, - "loss": 0.0435, - "step": 70210 - }, - { - "epoch": 1.7823327833481406, - "grad_norm": 0.5048933625221252, - "learning_rate": 8.11778144434573e-06, - "loss": 0.0353, - "step": 70215 - }, - { - "epoch": 1.782459703007996, - "grad_norm": 0.379330575466156, - "learning_rate": 8.116935313280027e-06, - "loss": 0.0363, - "step": 70220 - }, - { - "epoch": 1.7825866226678513, - "grad_norm": 0.4192676544189453, - "learning_rate": 8.116089182214326e-06, - "loss": 0.0348, - "step": 70225 - }, - { - "epoch": 1.7827135423277065, - "grad_norm": 0.5213958024978638, - "learning_rate": 8.115243051148624e-06, - "loss": 0.0559, - "step": 70230 - }, - { - "epoch": 1.7828404619875617, - "grad_norm": 0.4229230284690857, - "learning_rate": 8.114396920082922e-06, - "loss": 0.0424, - "step": 70235 - }, - { - "epoch": 1.7829673816474172, - "grad_norm": 0.5766609311103821, - "learning_rate": 8.113550789017219e-06, - "loss": 0.0356, - "step": 70240 - }, - { - "epoch": 1.7830943013072726, - "grad_norm": 0.5972825884819031, - "learning_rate": 8.112704657951518e-06, - "loss": 0.0277, - "step": 70245 - }, - { - "epoch": 1.7832212209671279, - "grad_norm": 1.4389808177947998, - "learning_rate": 8.111858526885816e-06, - "loss": 0.0319, - "step": 70250 - }, - { - "epoch": 1.783348140626983, - "grad_norm": 0.7217868566513062, - "learning_rate": 8.111012395820114e-06, - "loss": 0.0251, - "step": 70255 - }, - { - "epoch": 1.7834750602868383, - "grad_norm": 1.267551302909851, - "learning_rate": 8.110166264754411e-06, - "loss": 0.0463, - "step": 70260 - }, - { - "epoch": 1.7836019799466938, - "grad_norm": 0.5815815329551697, - "learning_rate": 8.10932013368871e-06, - "loss": 0.0346, - "step": 70265 - }, - { - "epoch": 1.783728899606549, - "grad_norm": 0.45230624079704285, - "learning_rate": 8.108474002623006e-06, - "loss": 0.0484, - "step": 70270 - }, - { - "epoch": 1.7838558192664045, - "grad_norm": 1.9081382751464844, - "learning_rate": 8.107627871557305e-06, - "loss": 0.036, - "step": 70275 - }, - { - "epoch": 1.7839827389262597, - "grad_norm": 0.5352705717086792, - "learning_rate": 8.106781740491603e-06, - "loss": 0.0493, - "step": 70280 - }, - { - "epoch": 1.784109658586115, - "grad_norm": 0.8405653238296509, - "learning_rate": 8.105935609425901e-06, - "loss": 0.0287, - "step": 70285 - }, - { - "epoch": 1.7842365782459702, - "grad_norm": 0.4904994070529938, - "learning_rate": 8.105089478360198e-06, - "loss": 0.0363, - "step": 70290 - }, - { - "epoch": 1.7843634979058256, - "grad_norm": 0.4064176380634308, - "learning_rate": 8.104243347294496e-06, - "loss": 0.053, - "step": 70295 - }, - { - "epoch": 1.784490417565681, - "grad_norm": 0.41171449422836304, - "learning_rate": 8.103397216228795e-06, - "loss": 0.035, - "step": 70300 - }, - { - "epoch": 1.7846173372255363, - "grad_norm": 0.7115368843078613, - "learning_rate": 8.102551085163093e-06, - "loss": 0.0466, - "step": 70305 - }, - { - "epoch": 1.7847442568853915, - "grad_norm": 0.6018228530883789, - "learning_rate": 8.10170495409739e-06, - "loss": 0.0267, - "step": 70310 - }, - { - "epoch": 1.7848711765452467, - "grad_norm": 0.33814138174057007, - "learning_rate": 8.100858823031688e-06, - "loss": 0.0323, - "step": 70315 - }, - { - "epoch": 1.7849980962051022, - "grad_norm": 0.5599258542060852, - "learning_rate": 8.100012691965987e-06, - "loss": 0.0556, - "step": 70320 - }, - { - "epoch": 1.7851250158649576, - "grad_norm": 0.5552322864532471, - "learning_rate": 8.099166560900285e-06, - "loss": 0.0398, - "step": 70325 - }, - { - "epoch": 1.7852519355248129, - "grad_norm": 0.5299741625785828, - "learning_rate": 8.098320429834582e-06, - "loss": 0.0406, - "step": 70330 - }, - { - "epoch": 1.785378855184668, - "grad_norm": 0.5101782083511353, - "learning_rate": 8.09747429876888e-06, - "loss": 0.0513, - "step": 70335 - }, - { - "epoch": 1.7855057748445233, - "grad_norm": 0.24159294366836548, - "learning_rate": 8.096628167703177e-06, - "loss": 0.0262, - "step": 70340 - }, - { - "epoch": 1.7856326945043788, - "grad_norm": 0.43885716795921326, - "learning_rate": 8.095782036637475e-06, - "loss": 0.0395, - "step": 70345 - }, - { - "epoch": 1.785759614164234, - "grad_norm": 0.3852458894252777, - "learning_rate": 8.094935905571774e-06, - "loss": 0.0395, - "step": 70350 - }, - { - "epoch": 1.7858865338240895, - "grad_norm": 0.2770293653011322, - "learning_rate": 8.094089774506072e-06, - "loss": 0.0178, - "step": 70355 - }, - { - "epoch": 1.7860134534839447, - "grad_norm": 0.35438698530197144, - "learning_rate": 8.093243643440369e-06, - "loss": 0.0504, - "step": 70360 - }, - { - "epoch": 1.7861403731438, - "grad_norm": 0.5206133127212524, - "learning_rate": 8.092397512374667e-06, - "loss": 0.0345, - "step": 70365 - }, - { - "epoch": 1.7862672928036551, - "grad_norm": 0.8120267391204834, - "learning_rate": 8.091551381308966e-06, - "loss": 0.0327, - "step": 70370 - }, - { - "epoch": 1.7863942124635106, - "grad_norm": 0.874000608921051, - "learning_rate": 8.090705250243264e-06, - "loss": 0.0613, - "step": 70375 - }, - { - "epoch": 1.786521132123366, - "grad_norm": 0.4088856875896454, - "learning_rate": 8.08985911917756e-06, - "loss": 0.0334, - "step": 70380 - }, - { - "epoch": 1.7866480517832213, - "grad_norm": 0.4909011721611023, - "learning_rate": 8.08901298811186e-06, - "loss": 0.0426, - "step": 70385 - }, - { - "epoch": 1.7867749714430765, - "grad_norm": 0.32886606454849243, - "learning_rate": 8.088166857046158e-06, - "loss": 0.0372, - "step": 70390 - }, - { - "epoch": 1.7869018911029317, - "grad_norm": 0.3764723837375641, - "learning_rate": 8.087320725980456e-06, - "loss": 0.0341, - "step": 70395 - }, - { - "epoch": 1.7870288107627872, - "grad_norm": 0.26865699887275696, - "learning_rate": 8.086474594914753e-06, - "loss": 0.0313, - "step": 70400 - }, - { - "epoch": 1.7871557304226424, - "grad_norm": 0.5106930732727051, - "learning_rate": 8.085628463849051e-06, - "loss": 0.0452, - "step": 70405 - }, - { - "epoch": 1.7872826500824979, - "grad_norm": 0.4157450795173645, - "learning_rate": 8.084782332783348e-06, - "loss": 0.0454, - "step": 70410 - }, - { - "epoch": 1.787409569742353, - "grad_norm": 0.46502140164375305, - "learning_rate": 8.083936201717646e-06, - "loss": 0.0293, - "step": 70415 - }, - { - "epoch": 1.7875364894022083, - "grad_norm": 2.894770383834839, - "learning_rate": 8.083090070651945e-06, - "loss": 0.0446, - "step": 70420 - }, - { - "epoch": 1.7876634090620636, - "grad_norm": 0.516001284122467, - "learning_rate": 8.082243939586243e-06, - "loss": 0.0427, - "step": 70425 - }, - { - "epoch": 1.787790328721919, - "grad_norm": 0.5472349524497986, - "learning_rate": 8.08139780852054e-06, - "loss": 0.0384, - "step": 70430 - }, - { - "epoch": 1.7879172483817745, - "grad_norm": 0.4246576428413391, - "learning_rate": 8.080551677454838e-06, - "loss": 0.0608, - "step": 70435 - }, - { - "epoch": 1.7880441680416297, - "grad_norm": 0.37050727009773254, - "learning_rate": 8.079705546389137e-06, - "loss": 0.0229, - "step": 70440 - }, - { - "epoch": 1.788171087701485, - "grad_norm": 0.7087730765342712, - "learning_rate": 8.078859415323435e-06, - "loss": 0.044, - "step": 70445 - }, - { - "epoch": 1.7882980073613401, - "grad_norm": 0.2650199234485626, - "learning_rate": 8.078013284257732e-06, - "loss": 0.0475, - "step": 70450 - }, - { - "epoch": 1.7884249270211956, - "grad_norm": 0.2708140015602112, - "learning_rate": 8.07716715319203e-06, - "loss": 0.0267, - "step": 70455 - }, - { - "epoch": 1.788551846681051, - "grad_norm": 0.414232462644577, - "learning_rate": 8.076321022126329e-06, - "loss": 0.0342, - "step": 70460 - }, - { - "epoch": 1.7886787663409063, - "grad_norm": 0.7577337026596069, - "learning_rate": 8.075474891060627e-06, - "loss": 0.039, - "step": 70465 - }, - { - "epoch": 1.7888056860007615, - "grad_norm": 0.573124349117279, - "learning_rate": 8.074628759994924e-06, - "loss": 0.0316, - "step": 70470 - }, - { - "epoch": 1.7889326056606167, - "grad_norm": 0.36612188816070557, - "learning_rate": 8.073782628929222e-06, - "loss": 0.0279, - "step": 70475 - }, - { - "epoch": 1.7890595253204722, - "grad_norm": 0.8811219930648804, - "learning_rate": 8.072936497863519e-06, - "loss": 0.0336, - "step": 70480 - }, - { - "epoch": 1.7891864449803274, - "grad_norm": 0.2892192304134369, - "learning_rate": 8.072090366797817e-06, - "loss": 0.0299, - "step": 70485 - }, - { - "epoch": 1.7893133646401829, - "grad_norm": 0.8179498910903931, - "learning_rate": 8.071244235732116e-06, - "loss": 0.0388, - "step": 70490 - }, - { - "epoch": 1.789440284300038, - "grad_norm": 0.6006660461425781, - "learning_rate": 8.070398104666414e-06, - "loss": 0.0373, - "step": 70495 - }, - { - "epoch": 1.7895672039598933, - "grad_norm": 0.49992093443870544, - "learning_rate": 8.06955197360071e-06, - "loss": 0.0385, - "step": 70500 - }, - { - "epoch": 1.7896941236197486, - "grad_norm": 0.2841566801071167, - "learning_rate": 8.068705842535009e-06, - "loss": 0.0396, - "step": 70505 - }, - { - "epoch": 1.789821043279604, - "grad_norm": 0.3892175853252411, - "learning_rate": 8.067859711469307e-06, - "loss": 0.0436, - "step": 70510 - }, - { - "epoch": 1.7899479629394595, - "grad_norm": 0.3981149196624756, - "learning_rate": 8.067013580403606e-06, - "loss": 0.018, - "step": 70515 - }, - { - "epoch": 1.7900748825993147, - "grad_norm": 0.5163293480873108, - "learning_rate": 8.066167449337903e-06, - "loss": 0.0407, - "step": 70520 - }, - { - "epoch": 1.79020180225917, - "grad_norm": 0.5130475759506226, - "learning_rate": 8.065321318272201e-06, - "loss": 0.048, - "step": 70525 - }, - { - "epoch": 1.7903287219190251, - "grad_norm": 0.2993965148925781, - "learning_rate": 8.0644751872065e-06, - "loss": 0.0381, - "step": 70530 - }, - { - "epoch": 1.7904556415788806, - "grad_norm": 0.29573237895965576, - "learning_rate": 8.063629056140798e-06, - "loss": 0.0262, - "step": 70535 - }, - { - "epoch": 1.7905825612387358, - "grad_norm": 0.1623002588748932, - "learning_rate": 8.062782925075094e-06, - "loss": 0.0275, - "step": 70540 - }, - { - "epoch": 1.7907094808985913, - "grad_norm": 0.41648173332214355, - "learning_rate": 8.061936794009393e-06, - "loss": 0.0339, - "step": 70545 - }, - { - "epoch": 1.7908364005584465, - "grad_norm": 0.5010846257209778, - "learning_rate": 8.06109066294369e-06, - "loss": 0.0362, - "step": 70550 - }, - { - "epoch": 1.7909633202183017, - "grad_norm": 0.40256500244140625, - "learning_rate": 8.060244531877988e-06, - "loss": 0.0383, - "step": 70555 - }, - { - "epoch": 1.791090239878157, - "grad_norm": 0.5873222947120667, - "learning_rate": 8.059398400812286e-06, - "loss": 0.0412, - "step": 70560 - }, - { - "epoch": 1.7912171595380124, - "grad_norm": 0.42487794160842896, - "learning_rate": 8.058552269746585e-06, - "loss": 0.0303, - "step": 70565 - }, - { - "epoch": 1.7913440791978679, - "grad_norm": 0.4472636282444, - "learning_rate": 8.057706138680881e-06, - "loss": 0.0406, - "step": 70570 - }, - { - "epoch": 1.791470998857723, - "grad_norm": 0.5277614593505859, - "learning_rate": 8.05686000761518e-06, - "loss": 0.0297, - "step": 70575 - }, - { - "epoch": 1.7915979185175783, - "grad_norm": 0.22728148102760315, - "learning_rate": 8.056013876549478e-06, - "loss": 0.0288, - "step": 70580 - }, - { - "epoch": 1.7917248381774336, - "grad_norm": 0.5433340072631836, - "learning_rate": 8.055167745483777e-06, - "loss": 0.0521, - "step": 70585 - }, - { - "epoch": 1.791851757837289, - "grad_norm": 0.9386636018753052, - "learning_rate": 8.054321614418073e-06, - "loss": 0.0362, - "step": 70590 - }, - { - "epoch": 1.7919786774971445, - "grad_norm": 0.5754039287567139, - "learning_rate": 8.053475483352372e-06, - "loss": 0.0494, - "step": 70595 - }, - { - "epoch": 1.7921055971569997, - "grad_norm": 0.4303615093231201, - "learning_rate": 8.05262935228667e-06, - "loss": 0.0443, - "step": 70600 - }, - { - "epoch": 1.792232516816855, - "grad_norm": 0.46444427967071533, - "learning_rate": 8.051783221220969e-06, - "loss": 0.0338, - "step": 70605 - }, - { - "epoch": 1.7923594364767101, - "grad_norm": 0.6349635720252991, - "learning_rate": 8.050937090155265e-06, - "loss": 0.0385, - "step": 70610 - }, - { - "epoch": 1.7924863561365656, - "grad_norm": 0.522357165813446, - "learning_rate": 8.050090959089564e-06, - "loss": 0.033, - "step": 70615 - }, - { - "epoch": 1.7926132757964208, - "grad_norm": 0.36035558581352234, - "learning_rate": 8.04924482802386e-06, - "loss": 0.0338, - "step": 70620 - }, - { - "epoch": 1.7927401954562763, - "grad_norm": 0.4947550296783447, - "learning_rate": 8.048398696958159e-06, - "loss": 0.0304, - "step": 70625 - }, - { - "epoch": 1.7928671151161315, - "grad_norm": 0.38645797967910767, - "learning_rate": 8.047552565892457e-06, - "loss": 0.0262, - "step": 70630 - }, - { - "epoch": 1.7929940347759867, - "grad_norm": 0.8277168869972229, - "learning_rate": 8.046706434826756e-06, - "loss": 0.0317, - "step": 70635 - }, - { - "epoch": 1.793120954435842, - "grad_norm": 0.4253663420677185, - "learning_rate": 8.045860303761052e-06, - "loss": 0.0408, - "step": 70640 - }, - { - "epoch": 1.7932478740956974, - "grad_norm": 0.6287935972213745, - "learning_rate": 8.04501417269535e-06, - "loss": 0.0439, - "step": 70645 - }, - { - "epoch": 1.7933747937555529, - "grad_norm": 0.39734792709350586, - "learning_rate": 8.044168041629649e-06, - "loss": 0.034, - "step": 70650 - }, - { - "epoch": 1.793501713415408, - "grad_norm": 0.4541463553905487, - "learning_rate": 8.043321910563948e-06, - "loss": 0.0404, - "step": 70655 - }, - { - "epoch": 1.7936286330752633, - "grad_norm": 0.5382044911384583, - "learning_rate": 8.042475779498244e-06, - "loss": 0.0329, - "step": 70660 - }, - { - "epoch": 1.7937555527351186, - "grad_norm": 0.20999638736248016, - "learning_rate": 8.041629648432543e-06, - "loss": 0.0289, - "step": 70665 - }, - { - "epoch": 1.793882472394974, - "grad_norm": 0.6434771418571472, - "learning_rate": 8.040783517366841e-06, - "loss": 0.047, - "step": 70670 - }, - { - "epoch": 1.7940093920548295, - "grad_norm": 0.7347632050514221, - "learning_rate": 8.03993738630114e-06, - "loss": 0.0344, - "step": 70675 - }, - { - "epoch": 1.7941363117146847, - "grad_norm": 0.380079984664917, - "learning_rate": 8.039091255235436e-06, - "loss": 0.0505, - "step": 70680 - }, - { - "epoch": 1.79426323137454, - "grad_norm": 0.6180084347724915, - "learning_rate": 8.038245124169735e-06, - "loss": 0.0417, - "step": 70685 - }, - { - "epoch": 1.7943901510343951, - "grad_norm": 0.2443629652261734, - "learning_rate": 8.037398993104031e-06, - "loss": 0.0256, - "step": 70690 - }, - { - "epoch": 1.7945170706942506, - "grad_norm": 0.523524284362793, - "learning_rate": 8.03655286203833e-06, - "loss": 0.0347, - "step": 70695 - }, - { - "epoch": 1.7946439903541058, - "grad_norm": 0.3796182870864868, - "learning_rate": 8.035706730972628e-06, - "loss": 0.0666, - "step": 70700 - }, - { - "epoch": 1.7947709100139613, - "grad_norm": 0.5086886882781982, - "learning_rate": 8.034860599906926e-06, - "loss": 0.065, - "step": 70705 - }, - { - "epoch": 1.7948978296738165, - "grad_norm": 0.3596569895744324, - "learning_rate": 8.034014468841225e-06, - "loss": 0.0265, - "step": 70710 - }, - { - "epoch": 1.7950247493336717, - "grad_norm": 0.4209994077682495, - "learning_rate": 8.033168337775522e-06, - "loss": 0.0379, - "step": 70715 - }, - { - "epoch": 1.795151668993527, - "grad_norm": 0.4426453709602356, - "learning_rate": 8.03232220670982e-06, - "loss": 0.0431, - "step": 70720 - }, - { - "epoch": 1.7952785886533824, - "grad_norm": 0.3352850377559662, - "learning_rate": 8.031476075644118e-06, - "loss": 0.039, - "step": 70725 - }, - { - "epoch": 1.7954055083132379, - "grad_norm": 0.4485955238342285, - "learning_rate": 8.030629944578417e-06, - "loss": 0.0299, - "step": 70730 - }, - { - "epoch": 1.795532427973093, - "grad_norm": 0.3708302974700928, - "learning_rate": 8.029783813512714e-06, - "loss": 0.047, - "step": 70735 - }, - { - "epoch": 1.7956593476329483, - "grad_norm": 0.9476247429847717, - "learning_rate": 8.028937682447012e-06, - "loss": 0.0666, - "step": 70740 - }, - { - "epoch": 1.7957862672928036, - "grad_norm": 0.40648141503334045, - "learning_rate": 8.02809155138131e-06, - "loss": 0.0326, - "step": 70745 - }, - { - "epoch": 1.795913186952659, - "grad_norm": 0.35871604084968567, - "learning_rate": 8.027245420315609e-06, - "loss": 0.0491, - "step": 70750 - }, - { - "epoch": 1.7960401066125142, - "grad_norm": 0.4185831844806671, - "learning_rate": 8.026399289249905e-06, - "loss": 0.0399, - "step": 70755 - }, - { - "epoch": 1.7961670262723697, - "grad_norm": 0.37116655707359314, - "learning_rate": 8.025553158184204e-06, - "loss": 0.0324, - "step": 70760 - }, - { - "epoch": 1.796293945932225, - "grad_norm": 0.32843413949012756, - "learning_rate": 8.0247070271185e-06, - "loss": 0.0379, - "step": 70765 - }, - { - "epoch": 1.7964208655920801, - "grad_norm": 0.39336520433425903, - "learning_rate": 8.023860896052799e-06, - "loss": 0.035, - "step": 70770 - }, - { - "epoch": 1.7965477852519354, - "grad_norm": 0.4729439914226532, - "learning_rate": 8.023014764987097e-06, - "loss": 0.0605, - "step": 70775 - }, - { - "epoch": 1.7966747049117908, - "grad_norm": 0.5391502380371094, - "learning_rate": 8.022168633921396e-06, - "loss": 0.0294, - "step": 70780 - }, - { - "epoch": 1.7968016245716463, - "grad_norm": 0.40414896607398987, - "learning_rate": 8.021322502855692e-06, - "loss": 0.0403, - "step": 70785 - }, - { - "epoch": 1.7969285442315015, - "grad_norm": 0.44581151008605957, - "learning_rate": 8.02047637178999e-06, - "loss": 0.0233, - "step": 70790 - }, - { - "epoch": 1.7970554638913567, - "grad_norm": 0.3391534984111786, - "learning_rate": 8.01963024072429e-06, - "loss": 0.029, - "step": 70795 - }, - { - "epoch": 1.797182383551212, - "grad_norm": 0.3350122570991516, - "learning_rate": 8.018784109658588e-06, - "loss": 0.0525, - "step": 70800 - }, - { - "epoch": 1.7973093032110674, - "grad_norm": 0.2893260419368744, - "learning_rate": 8.017937978592884e-06, - "loss": 0.0287, - "step": 70805 - }, - { - "epoch": 1.7974362228709229, - "grad_norm": 0.5325914025306702, - "learning_rate": 8.017091847527183e-06, - "loss": 0.0278, - "step": 70810 - }, - { - "epoch": 1.797563142530778, - "grad_norm": 0.8585466742515564, - "learning_rate": 8.016245716461481e-06, - "loss": 0.036, - "step": 70815 - }, - { - "epoch": 1.7976900621906333, - "grad_norm": 0.48691895604133606, - "learning_rate": 8.01539958539578e-06, - "loss": 0.0342, - "step": 70820 - }, - { - "epoch": 1.7978169818504885, - "grad_norm": 0.4268670976161957, - "learning_rate": 8.014553454330076e-06, - "loss": 0.0296, - "step": 70825 - }, - { - "epoch": 1.797943901510344, - "grad_norm": 0.3156912922859192, - "learning_rate": 8.013707323264375e-06, - "loss": 0.0411, - "step": 70830 - }, - { - "epoch": 1.7980708211701992, - "grad_norm": 0.4731660783290863, - "learning_rate": 8.012861192198671e-06, - "loss": 0.0379, - "step": 70835 - }, - { - "epoch": 1.7981977408300547, - "grad_norm": 0.48702800273895264, - "learning_rate": 8.01201506113297e-06, - "loss": 0.0376, - "step": 70840 - }, - { - "epoch": 1.79832466048991, - "grad_norm": 0.8609632253646851, - "learning_rate": 8.011168930067268e-06, - "loss": 0.0313, - "step": 70845 - }, - { - "epoch": 1.7984515801497651, - "grad_norm": 0.4859077036380768, - "learning_rate": 8.010322799001567e-06, - "loss": 0.0409, - "step": 70850 - }, - { - "epoch": 1.7985784998096204, - "grad_norm": 0.41556528210639954, - "learning_rate": 8.009476667935863e-06, - "loss": 0.0282, - "step": 70855 - }, - { - "epoch": 1.7987054194694758, - "grad_norm": 0.31939300894737244, - "learning_rate": 8.008630536870162e-06, - "loss": 0.0337, - "step": 70860 - }, - { - "epoch": 1.7988323391293313, - "grad_norm": 0.3585505485534668, - "learning_rate": 8.00778440580446e-06, - "loss": 0.0604, - "step": 70865 - }, - { - "epoch": 1.7989592587891865, - "grad_norm": 0.4554063081741333, - "learning_rate": 8.006938274738759e-06, - "loss": 0.0299, - "step": 70870 - }, - { - "epoch": 1.7990861784490417, - "grad_norm": 0.5285202264785767, - "learning_rate": 8.006092143673055e-06, - "loss": 0.0514, - "step": 70875 - }, - { - "epoch": 1.799213098108897, - "grad_norm": 0.3945246636867523, - "learning_rate": 8.005246012607354e-06, - "loss": 0.0315, - "step": 70880 - }, - { - "epoch": 1.7993400177687524, - "grad_norm": 0.3788602352142334, - "learning_rate": 8.004399881541652e-06, - "loss": 0.0371, - "step": 70885 - }, - { - "epoch": 1.7994669374286076, - "grad_norm": 0.4315367043018341, - "learning_rate": 8.00355375047595e-06, - "loss": 0.0541, - "step": 70890 - }, - { - "epoch": 1.799593857088463, - "grad_norm": 0.7493820786476135, - "learning_rate": 8.002707619410247e-06, - "loss": 0.0324, - "step": 70895 - }, - { - "epoch": 1.7997207767483183, - "grad_norm": 0.673617422580719, - "learning_rate": 8.001861488344546e-06, - "loss": 0.0395, - "step": 70900 - }, - { - "epoch": 1.7998476964081735, - "grad_norm": 0.15110789239406586, - "learning_rate": 8.001015357278842e-06, - "loss": 0.0195, - "step": 70905 - }, - { - "epoch": 1.7999746160680288, - "grad_norm": 0.7078927755355835, - "learning_rate": 8.00016922621314e-06, - "loss": 0.0412, - "step": 70910 - }, - { - "epoch": 1.8001015357278842, - "grad_norm": 0.606690526008606, - "learning_rate": 7.999323095147439e-06, - "loss": 0.0464, - "step": 70915 - }, - { - "epoch": 1.8002284553877397, - "grad_norm": 0.48360922932624817, - "learning_rate": 7.998476964081737e-06, - "loss": 0.0502, - "step": 70920 - }, - { - "epoch": 1.800355375047595, - "grad_norm": 0.42489802837371826, - "learning_rate": 7.997630833016034e-06, - "loss": 0.0208, - "step": 70925 - }, - { - "epoch": 1.8004822947074501, - "grad_norm": 0.6302265524864197, - "learning_rate": 7.996784701950333e-06, - "loss": 0.026, - "step": 70930 - }, - { - "epoch": 1.8006092143673054, - "grad_norm": 0.6131722331047058, - "learning_rate": 7.995938570884631e-06, - "loss": 0.0489, - "step": 70935 - }, - { - "epoch": 1.8007361340271608, - "grad_norm": 0.17902730405330658, - "learning_rate": 7.99509243981893e-06, - "loss": 0.0415, - "step": 70940 - }, - { - "epoch": 1.8008630536870163, - "grad_norm": 1.1799355745315552, - "learning_rate": 7.994246308753226e-06, - "loss": 0.0523, - "step": 70945 - }, - { - "epoch": 1.8009899733468715, - "grad_norm": 0.44104424118995667, - "learning_rate": 7.993400177687524e-06, - "loss": 0.0548, - "step": 70950 - }, - { - "epoch": 1.8011168930067267, - "grad_norm": 0.4193525016307831, - "learning_rate": 7.992554046621823e-06, - "loss": 0.0391, - "step": 70955 - }, - { - "epoch": 1.801243812666582, - "grad_norm": 0.7009649276733398, - "learning_rate": 7.991707915556121e-06, - "loss": 0.03, - "step": 70960 - }, - { - "epoch": 1.8013707323264374, - "grad_norm": 0.8744057416915894, - "learning_rate": 7.990861784490418e-06, - "loss": 0.0331, - "step": 70965 - }, - { - "epoch": 1.8014976519862926, - "grad_norm": 0.2396521121263504, - "learning_rate": 7.990015653424716e-06, - "loss": 0.033, - "step": 70970 - }, - { - "epoch": 1.801624571646148, - "grad_norm": 0.6623174548149109, - "learning_rate": 7.989169522359013e-06, - "loss": 0.0225, - "step": 70975 - }, - { - "epoch": 1.8017514913060033, - "grad_norm": 0.41300150752067566, - "learning_rate": 7.988323391293311e-06, - "loss": 0.0243, - "step": 70980 - }, - { - "epoch": 1.8018784109658585, - "grad_norm": 0.48999840021133423, - "learning_rate": 7.98747726022761e-06, - "loss": 0.031, - "step": 70985 - }, - { - "epoch": 1.8020053306257138, - "grad_norm": 0.4324018657207489, - "learning_rate": 7.986631129161908e-06, - "loss": 0.032, - "step": 70990 - }, - { - "epoch": 1.8021322502855692, - "grad_norm": 0.2350071668624878, - "learning_rate": 7.985784998096205e-06, - "loss": 0.023, - "step": 70995 - }, - { - "epoch": 1.8022591699454247, - "grad_norm": 0.4735085368156433, - "learning_rate": 7.984938867030503e-06, - "loss": 0.0394, - "step": 71000 - }, - { - "epoch": 1.80238608960528, - "grad_norm": 0.5031251311302185, - "learning_rate": 7.984092735964802e-06, - "loss": 0.0485, - "step": 71005 - }, - { - "epoch": 1.8025130092651351, - "grad_norm": 0.4746294319629669, - "learning_rate": 7.9832466048991e-06, - "loss": 0.0366, - "step": 71010 - }, - { - "epoch": 1.8026399289249904, - "grad_norm": 0.4073299169540405, - "learning_rate": 7.982400473833397e-06, - "loss": 0.0283, - "step": 71015 - }, - { - "epoch": 1.8027668485848458, - "grad_norm": 0.425910621881485, - "learning_rate": 7.981554342767695e-06, - "loss": 0.0239, - "step": 71020 - }, - { - "epoch": 1.8028937682447013, - "grad_norm": 0.6035028100013733, - "learning_rate": 7.980708211701994e-06, - "loss": 0.0429, - "step": 71025 - }, - { - "epoch": 1.8030206879045565, - "grad_norm": 0.3074464797973633, - "learning_rate": 7.979862080636292e-06, - "loss": 0.0308, - "step": 71030 - }, - { - "epoch": 1.8031476075644117, - "grad_norm": 0.6578651070594788, - "learning_rate": 7.979015949570589e-06, - "loss": 0.0318, - "step": 71035 - }, - { - "epoch": 1.803274527224267, - "grad_norm": 0.45399966835975647, - "learning_rate": 7.978169818504887e-06, - "loss": 0.0382, - "step": 71040 - }, - { - "epoch": 1.8034014468841224, - "grad_norm": 0.38161104917526245, - "learning_rate": 7.977323687439184e-06, - "loss": 0.0317, - "step": 71045 - }, - { - "epoch": 1.8035283665439776, - "grad_norm": 0.2472257763147354, - "learning_rate": 7.976477556373482e-06, - "loss": 0.0421, - "step": 71050 - }, - { - "epoch": 1.803655286203833, - "grad_norm": 0.3953807055950165, - "learning_rate": 7.97563142530778e-06, - "loss": 0.0579, - "step": 71055 - }, - { - "epoch": 1.8037822058636883, - "grad_norm": 1.1032276153564453, - "learning_rate": 7.974785294242079e-06, - "loss": 0.0443, - "step": 71060 - }, - { - "epoch": 1.8039091255235435, - "grad_norm": 0.428889662027359, - "learning_rate": 7.973939163176376e-06, - "loss": 0.0287, - "step": 71065 - }, - { - "epoch": 1.8040360451833988, - "grad_norm": 0.4772041141986847, - "learning_rate": 7.973093032110674e-06, - "loss": 0.0421, - "step": 71070 - }, - { - "epoch": 1.8041629648432542, - "grad_norm": 0.8415263295173645, - "learning_rate": 7.972246901044973e-06, - "loss": 0.0445, - "step": 71075 - }, - { - "epoch": 1.8042898845031097, - "grad_norm": 0.5488128066062927, - "learning_rate": 7.971400769979271e-06, - "loss": 0.0406, - "step": 71080 - }, - { - "epoch": 1.804416804162965, - "grad_norm": 0.44383957982063293, - "learning_rate": 7.970554638913568e-06, - "loss": 0.0414, - "step": 71085 - }, - { - "epoch": 1.8045437238228201, - "grad_norm": 0.6118372082710266, - "learning_rate": 7.969708507847866e-06, - "loss": 0.0481, - "step": 71090 - }, - { - "epoch": 1.8046706434826754, - "grad_norm": 0.42471104860305786, - "learning_rate": 7.968862376782165e-06, - "loss": 0.0383, - "step": 71095 - }, - { - "epoch": 1.8047975631425308, - "grad_norm": 0.3411651849746704, - "learning_rate": 7.968016245716463e-06, - "loss": 0.0341, - "step": 71100 - }, - { - "epoch": 1.804924482802386, - "grad_norm": 0.3491385579109192, - "learning_rate": 7.96717011465076e-06, - "loss": 0.0277, - "step": 71105 - }, - { - "epoch": 1.8050514024622415, - "grad_norm": 0.5527692437171936, - "learning_rate": 7.966323983585058e-06, - "loss": 0.0413, - "step": 71110 - }, - { - "epoch": 1.8051783221220967, - "grad_norm": 0.41569381952285767, - "learning_rate": 7.965477852519355e-06, - "loss": 0.0403, - "step": 71115 - }, - { - "epoch": 1.805305241781952, - "grad_norm": 0.502711832523346, - "learning_rate": 7.964631721453653e-06, - "loss": 0.0352, - "step": 71120 - }, - { - "epoch": 1.8054321614418072, - "grad_norm": 0.6164815425872803, - "learning_rate": 7.963785590387952e-06, - "loss": 0.029, - "step": 71125 - }, - { - "epoch": 1.8055590811016626, - "grad_norm": 0.39291173219680786, - "learning_rate": 7.96293945932225e-06, - "loss": 0.0383, - "step": 71130 - }, - { - "epoch": 1.805686000761518, - "grad_norm": 0.40581801533699036, - "learning_rate": 7.962093328256547e-06, - "loss": 0.0278, - "step": 71135 - }, - { - "epoch": 1.8058129204213733, - "grad_norm": 0.46548157930374146, - "learning_rate": 7.961247197190845e-06, - "loss": 0.044, - "step": 71140 - }, - { - "epoch": 1.8059398400812285, - "grad_norm": 0.4944707453250885, - "learning_rate": 7.960401066125144e-06, - "loss": 0.0473, - "step": 71145 - }, - { - "epoch": 1.8060667597410838, - "grad_norm": 0.47109004855155945, - "learning_rate": 7.959554935059442e-06, - "loss": 0.0296, - "step": 71150 - }, - { - "epoch": 1.8061936794009392, - "grad_norm": 0.4936853349208832, - "learning_rate": 7.958708803993739e-06, - "loss": 0.0541, - "step": 71155 - }, - { - "epoch": 1.8063205990607947, - "grad_norm": 0.4769483804702759, - "learning_rate": 7.957862672928037e-06, - "loss": 0.0359, - "step": 71160 - }, - { - "epoch": 1.80644751872065, - "grad_norm": 0.4321943521499634, - "learning_rate": 7.957016541862335e-06, - "loss": 0.0408, - "step": 71165 - }, - { - "epoch": 1.8065744383805051, - "grad_norm": 0.3650386929512024, - "learning_rate": 7.956170410796634e-06, - "loss": 0.0294, - "step": 71170 - }, - { - "epoch": 1.8067013580403604, - "grad_norm": 0.19841736555099487, - "learning_rate": 7.95532427973093e-06, - "loss": 0.0303, - "step": 71175 - }, - { - "epoch": 1.8068282777002158, - "grad_norm": 1.9981731176376343, - "learning_rate": 7.954478148665229e-06, - "loss": 0.0331, - "step": 71180 - }, - { - "epoch": 1.806955197360071, - "grad_norm": 0.39002344012260437, - "learning_rate": 7.953632017599526e-06, - "loss": 0.0715, - "step": 71185 - }, - { - "epoch": 1.8070821170199265, - "grad_norm": 1.344465970993042, - "learning_rate": 7.952785886533824e-06, - "loss": 0.0401, - "step": 71190 - }, - { - "epoch": 1.8072090366797817, - "grad_norm": 0.3449687659740448, - "learning_rate": 7.951939755468122e-06, - "loss": 0.0587, - "step": 71195 - }, - { - "epoch": 1.807335956339637, - "grad_norm": 0.6694392561912537, - "learning_rate": 7.95109362440242e-06, - "loss": 0.0433, - "step": 71200 - }, - { - "epoch": 1.8074628759994922, - "grad_norm": 0.6476907134056091, - "learning_rate": 7.950247493336718e-06, - "loss": 0.0388, - "step": 71205 - }, - { - "epoch": 1.8075897956593476, - "grad_norm": 0.4561457633972168, - "learning_rate": 7.949401362271016e-06, - "loss": 0.0602, - "step": 71210 - }, - { - "epoch": 1.807716715319203, - "grad_norm": 0.21803128719329834, - "learning_rate": 7.948555231205314e-06, - "loss": 0.0371, - "step": 71215 - }, - { - "epoch": 1.8078436349790583, - "grad_norm": 0.35680973529815674, - "learning_rate": 7.947709100139613e-06, - "loss": 0.0441, - "step": 71220 - }, - { - "epoch": 1.8079705546389135, - "grad_norm": 0.2794303596019745, - "learning_rate": 7.946862969073911e-06, - "loss": 0.0466, - "step": 71225 - }, - { - "epoch": 1.8080974742987688, - "grad_norm": 0.5130143761634827, - "learning_rate": 7.946016838008208e-06, - "loss": 0.0406, - "step": 71230 - }, - { - "epoch": 1.8082243939586242, - "grad_norm": 0.3728054165840149, - "learning_rate": 7.945170706942506e-06, - "loss": 0.0375, - "step": 71235 - }, - { - "epoch": 1.8083513136184795, - "grad_norm": 0.42393019795417786, - "learning_rate": 7.944324575876805e-06, - "loss": 0.0435, - "step": 71240 - }, - { - "epoch": 1.808478233278335, - "grad_norm": 0.12055670469999313, - "learning_rate": 7.943478444811103e-06, - "loss": 0.0238, - "step": 71245 - }, - { - "epoch": 1.8086051529381901, - "grad_norm": 0.4462319016456604, - "learning_rate": 7.9426323137454e-06, - "loss": 0.0388, - "step": 71250 - }, - { - "epoch": 1.8087320725980454, - "grad_norm": 0.37046465277671814, - "learning_rate": 7.941786182679698e-06, - "loss": 0.0465, - "step": 71255 - }, - { - "epoch": 1.8088589922579006, - "grad_norm": 0.21132737398147583, - "learning_rate": 7.940940051613995e-06, - "loss": 0.0257, - "step": 71260 - }, - { - "epoch": 1.808985911917756, - "grad_norm": 0.40368595719337463, - "learning_rate": 7.940093920548293e-06, - "loss": 0.0397, - "step": 71265 - }, - { - "epoch": 1.8091128315776115, - "grad_norm": 0.6409241557121277, - "learning_rate": 7.939247789482592e-06, - "loss": 0.0489, - "step": 71270 - }, - { - "epoch": 1.8092397512374667, - "grad_norm": 0.48145315051078796, - "learning_rate": 7.93840165841689e-06, - "loss": 0.051, - "step": 71275 - }, - { - "epoch": 1.809366670897322, - "grad_norm": 0.42115846276283264, - "learning_rate": 7.937555527351187e-06, - "loss": 0.0295, - "step": 71280 - }, - { - "epoch": 1.8094935905571772, - "grad_norm": 0.35131847858428955, - "learning_rate": 7.936709396285485e-06, - "loss": 0.0497, - "step": 71285 - }, - { - "epoch": 1.8096205102170326, - "grad_norm": 0.6002100706100464, - "learning_rate": 7.935863265219784e-06, - "loss": 0.0504, - "step": 71290 - }, - { - "epoch": 1.809747429876888, - "grad_norm": 0.3037347197532654, - "learning_rate": 7.935017134154082e-06, - "loss": 0.0455, - "step": 71295 - }, - { - "epoch": 1.8098743495367433, - "grad_norm": 0.6884769797325134, - "learning_rate": 7.934171003088379e-06, - "loss": 0.0362, - "step": 71300 - }, - { - "epoch": 1.8100012691965985, - "grad_norm": 1.191019058227539, - "learning_rate": 7.933324872022677e-06, - "loss": 0.0518, - "step": 71305 - }, - { - "epoch": 1.8101281888564538, - "grad_norm": 0.4793349802494049, - "learning_rate": 7.932478740956976e-06, - "loss": 0.0346, - "step": 71310 - }, - { - "epoch": 1.8102551085163092, - "grad_norm": 0.3213537931442261, - "learning_rate": 7.931632609891274e-06, - "loss": 0.0478, - "step": 71315 - }, - { - "epoch": 1.8103820281761644, - "grad_norm": 0.35479989647865295, - "learning_rate": 7.93078647882557e-06, - "loss": 0.042, - "step": 71320 - }, - { - "epoch": 1.81050894783602, - "grad_norm": 0.5373144149780273, - "learning_rate": 7.929940347759869e-06, - "loss": 0.039, - "step": 71325 - }, - { - "epoch": 1.8106358674958751, - "grad_norm": 0.9816379547119141, - "learning_rate": 7.929094216694166e-06, - "loss": 0.0397, - "step": 71330 - }, - { - "epoch": 1.8107627871557304, - "grad_norm": 0.5653082132339478, - "learning_rate": 7.928248085628464e-06, - "loss": 0.0396, - "step": 71335 - }, - { - "epoch": 1.8108897068155856, - "grad_norm": 0.39329054951667786, - "learning_rate": 7.927401954562763e-06, - "loss": 0.041, - "step": 71340 - }, - { - "epoch": 1.811016626475441, - "grad_norm": 0.46142590045928955, - "learning_rate": 7.926555823497061e-06, - "loss": 0.0388, - "step": 71345 - }, - { - "epoch": 1.8111435461352965, - "grad_norm": 0.3131319284439087, - "learning_rate": 7.925709692431358e-06, - "loss": 0.0321, - "step": 71350 - }, - { - "epoch": 1.8112704657951517, - "grad_norm": 0.6044168472290039, - "learning_rate": 7.924863561365656e-06, - "loss": 0.0329, - "step": 71355 - }, - { - "epoch": 1.811397385455007, - "grad_norm": 0.26120731234550476, - "learning_rate": 7.924017430299954e-06, - "loss": 0.0427, - "step": 71360 - }, - { - "epoch": 1.8115243051148622, - "grad_norm": 0.3135455846786499, - "learning_rate": 7.923171299234253e-06, - "loss": 0.043, - "step": 71365 - }, - { - "epoch": 1.8116512247747176, - "grad_norm": 0.7059943079948425, - "learning_rate": 7.92232516816855e-06, - "loss": 0.0605, - "step": 71370 - }, - { - "epoch": 1.811778144434573, - "grad_norm": 0.2930048108100891, - "learning_rate": 7.921479037102848e-06, - "loss": 0.0362, - "step": 71375 - }, - { - "epoch": 1.8119050640944283, - "grad_norm": 0.27900269627571106, - "learning_rate": 7.920632906037146e-06, - "loss": 0.058, - "step": 71380 - }, - { - "epoch": 1.8120319837542835, - "grad_norm": 0.3806161880493164, - "learning_rate": 7.919786774971445e-06, - "loss": 0.0384, - "step": 71385 - }, - { - "epoch": 1.8121589034141388, - "grad_norm": 0.5340694189071655, - "learning_rate": 7.918940643905741e-06, - "loss": 0.0359, - "step": 71390 - }, - { - "epoch": 1.812285823073994, - "grad_norm": 0.30835986137390137, - "learning_rate": 7.91809451284004e-06, - "loss": 0.0428, - "step": 71395 - }, - { - "epoch": 1.8124127427338494, - "grad_norm": 0.36960428953170776, - "learning_rate": 7.917248381774337e-06, - "loss": 0.0291, - "step": 71400 - }, - { - "epoch": 1.812539662393705, - "grad_norm": 0.4979344308376312, - "learning_rate": 7.916402250708635e-06, - "loss": 0.0223, - "step": 71405 - }, - { - "epoch": 1.8126665820535601, - "grad_norm": 0.31927722692489624, - "learning_rate": 7.915556119642933e-06, - "loss": 0.0252, - "step": 71410 - }, - { - "epoch": 1.8127935017134154, - "grad_norm": 0.3833298087120056, - "learning_rate": 7.914709988577232e-06, - "loss": 0.0262, - "step": 71415 - }, - { - "epoch": 1.8129204213732706, - "grad_norm": 0.5921025276184082, - "learning_rate": 7.913863857511529e-06, - "loss": 0.0465, - "step": 71420 - }, - { - "epoch": 1.813047341033126, - "grad_norm": 0.431619793176651, - "learning_rate": 7.913017726445827e-06, - "loss": 0.0444, - "step": 71425 - }, - { - "epoch": 1.8131742606929815, - "grad_norm": 0.38750988245010376, - "learning_rate": 7.912171595380125e-06, - "loss": 0.0467, - "step": 71430 - }, - { - "epoch": 1.8133011803528367, - "grad_norm": 0.5162782669067383, - "learning_rate": 7.911325464314424e-06, - "loss": 0.0363, - "step": 71435 - }, - { - "epoch": 1.813428100012692, - "grad_norm": 0.9915988445281982, - "learning_rate": 7.91047933324872e-06, - "loss": 0.0238, - "step": 71440 - }, - { - "epoch": 1.8135550196725472, - "grad_norm": 0.4181349277496338, - "learning_rate": 7.909633202183019e-06, - "loss": 0.0372, - "step": 71445 - }, - { - "epoch": 1.8136819393324026, - "grad_norm": 0.5229597091674805, - "learning_rate": 7.908787071117317e-06, - "loss": 0.0417, - "step": 71450 - }, - { - "epoch": 1.8138088589922579, - "grad_norm": 0.7572031021118164, - "learning_rate": 7.907940940051616e-06, - "loss": 0.038, - "step": 71455 - }, - { - "epoch": 1.8139357786521133, - "grad_norm": 0.346894234418869, - "learning_rate": 7.907094808985912e-06, - "loss": 0.0419, - "step": 71460 - }, - { - "epoch": 1.8140626983119685, - "grad_norm": 0.4681001901626587, - "learning_rate": 7.90624867792021e-06, - "loss": 0.0403, - "step": 71465 - }, - { - "epoch": 1.8141896179718238, - "grad_norm": 0.7482230067253113, - "learning_rate": 7.905402546854507e-06, - "loss": 0.0561, - "step": 71470 - }, - { - "epoch": 1.814316537631679, - "grad_norm": 0.7650241255760193, - "learning_rate": 7.904556415788806e-06, - "loss": 0.0479, - "step": 71475 - }, - { - "epoch": 1.8144434572915344, - "grad_norm": 0.41199690103530884, - "learning_rate": 7.903710284723104e-06, - "loss": 0.0284, - "step": 71480 - }, - { - "epoch": 1.81457037695139, - "grad_norm": 0.5197402834892273, - "learning_rate": 7.902864153657403e-06, - "loss": 0.0363, - "step": 71485 - }, - { - "epoch": 1.8146972966112451, - "grad_norm": 0.5641536712646484, - "learning_rate": 7.9020180225917e-06, - "loss": 0.0486, - "step": 71490 - }, - { - "epoch": 1.8148242162711004, - "grad_norm": 0.5307297706604004, - "learning_rate": 7.901171891525998e-06, - "loss": 0.0349, - "step": 71495 - }, - { - "epoch": 1.8149511359309556, - "grad_norm": 0.38117310404777527, - "learning_rate": 7.900325760460296e-06, - "loss": 0.0377, - "step": 71500 - }, - { - "epoch": 1.815078055590811, - "grad_norm": 0.5371500849723816, - "learning_rate": 7.899479629394595e-06, - "loss": 0.0373, - "step": 71505 - }, - { - "epoch": 1.8152049752506665, - "grad_norm": 0.5710240006446838, - "learning_rate": 7.898633498328891e-06, - "loss": 0.0405, - "step": 71510 - }, - { - "epoch": 1.8153318949105217, - "grad_norm": 0.28648772835731506, - "learning_rate": 7.89778736726319e-06, - "loss": 0.0344, - "step": 71515 - }, - { - "epoch": 1.815458814570377, - "grad_norm": 0.400212824344635, - "learning_rate": 7.896941236197488e-06, - "loss": 0.041, - "step": 71520 - }, - { - "epoch": 1.8155857342302322, - "grad_norm": 0.5505761504173279, - "learning_rate": 7.896095105131786e-06, - "loss": 0.0298, - "step": 71525 - }, - { - "epoch": 1.8157126538900876, - "grad_norm": 0.527912974357605, - "learning_rate": 7.895248974066083e-06, - "loss": 0.0472, - "step": 71530 - }, - { - "epoch": 1.8158395735499429, - "grad_norm": 0.41941598057746887, - "learning_rate": 7.894402843000382e-06, - "loss": 0.0423, - "step": 71535 - }, - { - "epoch": 1.8159664932097983, - "grad_norm": 0.2912944555282593, - "learning_rate": 7.893556711934678e-06, - "loss": 0.0373, - "step": 71540 - }, - { - "epoch": 1.8160934128696535, - "grad_norm": 0.368246465921402, - "learning_rate": 7.892710580868977e-06, - "loss": 0.0337, - "step": 71545 - }, - { - "epoch": 1.8162203325295088, - "grad_norm": 0.26613277196884155, - "learning_rate": 7.891864449803275e-06, - "loss": 0.0243, - "step": 71550 - }, - { - "epoch": 1.816347252189364, - "grad_norm": 0.43469107151031494, - "learning_rate": 7.891018318737574e-06, - "loss": 0.0447, - "step": 71555 - }, - { - "epoch": 1.8164741718492194, - "grad_norm": 0.33644357323646545, - "learning_rate": 7.89017218767187e-06, - "loss": 0.0455, - "step": 71560 - }, - { - "epoch": 1.816601091509075, - "grad_norm": 0.32569289207458496, - "learning_rate": 7.889326056606169e-06, - "loss": 0.0686, - "step": 71565 - }, - { - "epoch": 1.8167280111689301, - "grad_norm": 0.45176663994789124, - "learning_rate": 7.888479925540467e-06, - "loss": 0.0378, - "step": 71570 - }, - { - "epoch": 1.8168549308287854, - "grad_norm": 0.37030282616615295, - "learning_rate": 7.887633794474765e-06, - "loss": 0.0337, - "step": 71575 - }, - { - "epoch": 1.8169818504886406, - "grad_norm": 0.31301963329315186, - "learning_rate": 7.886787663409062e-06, - "loss": 0.0379, - "step": 71580 - }, - { - "epoch": 1.817108770148496, - "grad_norm": 0.5028181076049805, - "learning_rate": 7.88594153234336e-06, - "loss": 0.0515, - "step": 71585 - }, - { - "epoch": 1.8172356898083513, - "grad_norm": 0.47616779804229736, - "learning_rate": 7.885095401277659e-06, - "loss": 0.0371, - "step": 71590 - }, - { - "epoch": 1.8173626094682067, - "grad_norm": 0.4567832946777344, - "learning_rate": 7.884249270211957e-06, - "loss": 0.0378, - "step": 71595 - }, - { - "epoch": 1.817489529128062, - "grad_norm": 0.4743015766143799, - "learning_rate": 7.883403139146254e-06, - "loss": 0.0347, - "step": 71600 - }, - { - "epoch": 1.8176164487879172, - "grad_norm": 1.0271203517913818, - "learning_rate": 7.882557008080552e-06, - "loss": 0.0402, - "step": 71605 - }, - { - "epoch": 1.8177433684477724, - "grad_norm": 0.437408447265625, - "learning_rate": 7.88171087701485e-06, - "loss": 0.0359, - "step": 71610 - }, - { - "epoch": 1.8178702881076279, - "grad_norm": 0.40472519397735596, - "learning_rate": 7.880864745949148e-06, - "loss": 0.0294, - "step": 71615 - }, - { - "epoch": 1.8179972077674833, - "grad_norm": 0.6552506685256958, - "learning_rate": 7.880018614883446e-06, - "loss": 0.0361, - "step": 71620 - }, - { - "epoch": 1.8181241274273385, - "grad_norm": 0.3354075849056244, - "learning_rate": 7.879172483817744e-06, - "loss": 0.0417, - "step": 71625 - }, - { - "epoch": 1.8182510470871938, - "grad_norm": 0.46367454528808594, - "learning_rate": 7.878326352752041e-06, - "loss": 0.0398, - "step": 71630 - }, - { - "epoch": 1.818377966747049, - "grad_norm": 1.234711766242981, - "learning_rate": 7.87748022168634e-06, - "loss": 0.028, - "step": 71635 - }, - { - "epoch": 1.8185048864069044, - "grad_norm": 0.3516696095466614, - "learning_rate": 7.876634090620638e-06, - "loss": 0.0485, - "step": 71640 - }, - { - "epoch": 1.81863180606676, - "grad_norm": 0.2543090581893921, - "learning_rate": 7.875787959554936e-06, - "loss": 0.0341, - "step": 71645 - }, - { - "epoch": 1.8187587257266151, - "grad_norm": 0.31576064229011536, - "learning_rate": 7.874941828489233e-06, - "loss": 0.0184, - "step": 71650 - }, - { - "epoch": 1.8188856453864704, - "grad_norm": 0.5484535694122314, - "learning_rate": 7.874095697423531e-06, - "loss": 0.0269, - "step": 71655 - }, - { - "epoch": 1.8190125650463256, - "grad_norm": 0.5490303635597229, - "learning_rate": 7.87324956635783e-06, - "loss": 0.0472, - "step": 71660 - }, - { - "epoch": 1.819139484706181, - "grad_norm": 0.4013497829437256, - "learning_rate": 7.872403435292128e-06, - "loss": 0.0275, - "step": 71665 - }, - { - "epoch": 1.8192664043660363, - "grad_norm": 0.23291340470314026, - "learning_rate": 7.871557304226425e-06, - "loss": 0.0362, - "step": 71670 - }, - { - "epoch": 1.8193933240258917, - "grad_norm": 0.32585248351097107, - "learning_rate": 7.870711173160723e-06, - "loss": 0.0401, - "step": 71675 - }, - { - "epoch": 1.819520243685747, - "grad_norm": 0.38403594493865967, - "learning_rate": 7.86986504209502e-06, - "loss": 0.0219, - "step": 71680 - }, - { - "epoch": 1.8196471633456022, - "grad_norm": 0.2124854326248169, - "learning_rate": 7.869018911029318e-06, - "loss": 0.0346, - "step": 71685 - }, - { - "epoch": 1.8197740830054574, - "grad_norm": 0.40099456906318665, - "learning_rate": 7.868172779963617e-06, - "loss": 0.0377, - "step": 71690 - }, - { - "epoch": 1.8199010026653129, - "grad_norm": 0.3882048726081848, - "learning_rate": 7.867326648897915e-06, - "loss": 0.0284, - "step": 71695 - }, - { - "epoch": 1.8200279223251683, - "grad_norm": 0.517901599407196, - "learning_rate": 7.866480517832212e-06, - "loss": 0.0367, - "step": 71700 - }, - { - "epoch": 1.8201548419850235, - "grad_norm": 0.6991018056869507, - "learning_rate": 7.86563438676651e-06, - "loss": 0.0462, - "step": 71705 - }, - { - "epoch": 1.8202817616448788, - "grad_norm": 0.46183547377586365, - "learning_rate": 7.864788255700809e-06, - "loss": 0.0293, - "step": 71710 - }, - { - "epoch": 1.820408681304734, - "grad_norm": 0.3338564336299896, - "learning_rate": 7.863942124635107e-06, - "loss": 0.0421, - "step": 71715 - }, - { - "epoch": 1.8205356009645894, - "grad_norm": 0.4410174489021301, - "learning_rate": 7.863095993569404e-06, - "loss": 0.0331, - "step": 71720 - }, - { - "epoch": 1.820662520624445, - "grad_norm": 0.49626991152763367, - "learning_rate": 7.862249862503702e-06, - "loss": 0.0329, - "step": 71725 - }, - { - "epoch": 1.8207894402843001, - "grad_norm": 0.21918903291225433, - "learning_rate": 7.861403731438e-06, - "loss": 0.0458, - "step": 71730 - }, - { - "epoch": 1.8209163599441553, - "grad_norm": 0.69776850938797, - "learning_rate": 7.860557600372299e-06, - "loss": 0.0647, - "step": 71735 - }, - { - "epoch": 1.8210432796040106, - "grad_norm": 1.194437026977539, - "learning_rate": 7.859711469306597e-06, - "loss": 0.0255, - "step": 71740 - }, - { - "epoch": 1.8211701992638658, - "grad_norm": 0.3506389856338501, - "learning_rate": 7.858865338240894e-06, - "loss": 0.0393, - "step": 71745 - }, - { - "epoch": 1.8212971189237213, - "grad_norm": 0.4387464225292206, - "learning_rate": 7.858019207175193e-06, - "loss": 0.0492, - "step": 71750 - }, - { - "epoch": 1.8214240385835767, - "grad_norm": 0.4912065863609314, - "learning_rate": 7.85717307610949e-06, - "loss": 0.0367, - "step": 71755 - }, - { - "epoch": 1.821550958243432, - "grad_norm": 0.4879761040210724, - "learning_rate": 7.856326945043788e-06, - "loss": 0.0255, - "step": 71760 - }, - { - "epoch": 1.8216778779032872, - "grad_norm": 0.382095068693161, - "learning_rate": 7.855480813978086e-06, - "loss": 0.0301, - "step": 71765 - }, - { - "epoch": 1.8218047975631424, - "grad_norm": 0.32505127787590027, - "learning_rate": 7.854634682912384e-06, - "loss": 0.0375, - "step": 71770 - }, - { - "epoch": 1.8219317172229978, - "grad_norm": 0.5858319401741028, - "learning_rate": 7.853788551846681e-06, - "loss": 0.0383, - "step": 71775 - }, - { - "epoch": 1.8220586368828533, - "grad_norm": 0.41312041878700256, - "learning_rate": 7.85294242078098e-06, - "loss": 0.0391, - "step": 71780 - }, - { - "epoch": 1.8221855565427085, - "grad_norm": 0.5353965759277344, - "learning_rate": 7.852096289715278e-06, - "loss": 0.0442, - "step": 71785 - }, - { - "epoch": 1.8223124762025638, - "grad_norm": 0.2725403606891632, - "learning_rate": 7.851250158649576e-06, - "loss": 0.0329, - "step": 71790 - }, - { - "epoch": 1.822439395862419, - "grad_norm": 0.8001095652580261, - "learning_rate": 7.850404027583873e-06, - "loss": 0.0336, - "step": 71795 - }, - { - "epoch": 1.8225663155222744, - "grad_norm": 0.5911334156990051, - "learning_rate": 7.849557896518171e-06, - "loss": 0.0499, - "step": 71800 - }, - { - "epoch": 1.8226932351821297, - "grad_norm": 0.35134318470954895, - "learning_rate": 7.84871176545247e-06, - "loss": 0.0238, - "step": 71805 - }, - { - "epoch": 1.8228201548419851, - "grad_norm": 1.0122268199920654, - "learning_rate": 7.847865634386768e-06, - "loss": 0.0602, - "step": 71810 - }, - { - "epoch": 1.8229470745018403, - "grad_norm": 1.7957695722579956, - "learning_rate": 7.847019503321065e-06, - "loss": 0.0385, - "step": 71815 - }, - { - "epoch": 1.8230739941616956, - "grad_norm": 0.3353966772556305, - "learning_rate": 7.846173372255363e-06, - "loss": 0.036, - "step": 71820 - }, - { - "epoch": 1.8232009138215508, - "grad_norm": 0.3158054053783417, - "learning_rate": 7.84532724118966e-06, - "loss": 0.0343, - "step": 71825 - }, - { - "epoch": 1.8233278334814063, - "grad_norm": 0.3996848464012146, - "learning_rate": 7.844481110123959e-06, - "loss": 0.0354, - "step": 71830 - }, - { - "epoch": 1.8234547531412617, - "grad_norm": 0.26121413707733154, - "learning_rate": 7.843634979058257e-06, - "loss": 0.0299, - "step": 71835 - }, - { - "epoch": 1.823581672801117, - "grad_norm": 0.42930641770362854, - "learning_rate": 7.842788847992555e-06, - "loss": 0.0442, - "step": 71840 - }, - { - "epoch": 1.8237085924609722, - "grad_norm": 0.21987488865852356, - "learning_rate": 7.841942716926852e-06, - "loss": 0.0218, - "step": 71845 - }, - { - "epoch": 1.8238355121208274, - "grad_norm": 0.354012668132782, - "learning_rate": 7.84109658586115e-06, - "loss": 0.0482, - "step": 71850 - }, - { - "epoch": 1.8239624317806828, - "grad_norm": 0.28991958498954773, - "learning_rate": 7.840250454795449e-06, - "loss": 0.0366, - "step": 71855 - }, - { - "epoch": 1.8240893514405383, - "grad_norm": 0.42516419291496277, - "learning_rate": 7.839404323729747e-06, - "loss": 0.0286, - "step": 71860 - }, - { - "epoch": 1.8242162711003935, - "grad_norm": 0.5152280926704407, - "learning_rate": 7.838558192664044e-06, - "loss": 0.0421, - "step": 71865 - }, - { - "epoch": 1.8243431907602488, - "grad_norm": 0.4548938572406769, - "learning_rate": 7.837712061598342e-06, - "loss": 0.0374, - "step": 71870 - }, - { - "epoch": 1.824470110420104, - "grad_norm": 0.5143662691116333, - "learning_rate": 7.83686593053264e-06, - "loss": 0.0384, - "step": 71875 - }, - { - "epoch": 1.8245970300799594, - "grad_norm": 0.5017558932304382, - "learning_rate": 7.836019799466939e-06, - "loss": 0.0397, - "step": 71880 - }, - { - "epoch": 1.8247239497398147, - "grad_norm": 0.44324907660484314, - "learning_rate": 7.835173668401236e-06, - "loss": 0.0541, - "step": 71885 - }, - { - "epoch": 1.8248508693996701, - "grad_norm": 0.43533939123153687, - "learning_rate": 7.834327537335534e-06, - "loss": 0.0409, - "step": 71890 - }, - { - "epoch": 1.8249777890595253, - "grad_norm": 0.5923194289207458, - "learning_rate": 7.833481406269831e-06, - "loss": 0.0315, - "step": 71895 - }, - { - "epoch": 1.8251047087193806, - "grad_norm": 0.5213146209716797, - "learning_rate": 7.83263527520413e-06, - "loss": 0.0256, - "step": 71900 - }, - { - "epoch": 1.8252316283792358, - "grad_norm": 0.27774569392204285, - "learning_rate": 7.831789144138428e-06, - "loss": 0.0246, - "step": 71905 - }, - { - "epoch": 1.8253585480390913, - "grad_norm": 0.39434120059013367, - "learning_rate": 7.830943013072726e-06, - "loss": 0.0403, - "step": 71910 - }, - { - "epoch": 1.8254854676989467, - "grad_norm": 0.40291404724121094, - "learning_rate": 7.830096882007023e-06, - "loss": 0.0238, - "step": 71915 - }, - { - "epoch": 1.825612387358802, - "grad_norm": 0.3955644369125366, - "learning_rate": 7.829250750941321e-06, - "loss": 0.0606, - "step": 71920 - }, - { - "epoch": 1.8257393070186572, - "grad_norm": 1.329061508178711, - "learning_rate": 7.82840461987562e-06, - "loss": 0.0566, - "step": 71925 - }, - { - "epoch": 1.8258662266785124, - "grad_norm": 0.5453237295150757, - "learning_rate": 7.827558488809918e-06, - "loss": 0.0303, - "step": 71930 - }, - { - "epoch": 1.8259931463383678, - "grad_norm": 0.4598858952522278, - "learning_rate": 7.826712357744215e-06, - "loss": 0.0312, - "step": 71935 - }, - { - "epoch": 1.826120065998223, - "grad_norm": 0.3248157501220703, - "learning_rate": 7.825866226678513e-06, - "loss": 0.0334, - "step": 71940 - }, - { - "epoch": 1.8262469856580785, - "grad_norm": 0.6696195006370544, - "learning_rate": 7.825020095612812e-06, - "loss": 0.0441, - "step": 71945 - }, - { - "epoch": 1.8263739053179338, - "grad_norm": 0.3002950847148895, - "learning_rate": 7.82417396454711e-06, - "loss": 0.0256, - "step": 71950 - }, - { - "epoch": 1.826500824977789, - "grad_norm": 0.3898886740207672, - "learning_rate": 7.823327833481407e-06, - "loss": 0.0314, - "step": 71955 - }, - { - "epoch": 1.8266277446376442, - "grad_norm": 0.7617043852806091, - "learning_rate": 7.822481702415705e-06, - "loss": 0.038, - "step": 71960 - }, - { - "epoch": 1.8267546642974997, - "grad_norm": 1.087902307510376, - "learning_rate": 7.821635571350002e-06, - "loss": 0.0317, - "step": 71965 - }, - { - "epoch": 1.8268815839573551, - "grad_norm": 0.7413201928138733, - "learning_rate": 7.8207894402843e-06, - "loss": 0.0334, - "step": 71970 - }, - { - "epoch": 1.8270085036172103, - "grad_norm": 0.343750536441803, - "learning_rate": 7.819943309218599e-06, - "loss": 0.0289, - "step": 71975 - }, - { - "epoch": 1.8271354232770656, - "grad_norm": 0.4188224673271179, - "learning_rate": 7.819097178152897e-06, - "loss": 0.0428, - "step": 71980 - }, - { - "epoch": 1.8272623429369208, - "grad_norm": 0.39966508746147156, - "learning_rate": 7.818251047087194e-06, - "loss": 0.0484, - "step": 71985 - }, - { - "epoch": 1.8273892625967763, - "grad_norm": 0.6139541864395142, - "learning_rate": 7.817404916021492e-06, - "loss": 0.044, - "step": 71990 - }, - { - "epoch": 1.8275161822566317, - "grad_norm": 0.5373926758766174, - "learning_rate": 7.81655878495579e-06, - "loss": 0.0403, - "step": 71995 - }, - { - "epoch": 1.827643101916487, - "grad_norm": 0.44844067096710205, - "learning_rate": 7.815712653890089e-06, - "loss": 0.0319, - "step": 72000 - }, - { - "epoch": 1.8277700215763422, - "grad_norm": 0.5498227477073669, - "learning_rate": 7.814866522824386e-06, - "loss": 0.0332, - "step": 72005 - }, - { - "epoch": 1.8278969412361974, - "grad_norm": 0.3994738757610321, - "learning_rate": 7.814020391758684e-06, - "loss": 0.0373, - "step": 72010 - }, - { - "epoch": 1.8280238608960528, - "grad_norm": 0.30079385638237, - "learning_rate": 7.813174260692982e-06, - "loss": 0.0356, - "step": 72015 - }, - { - "epoch": 1.828150780555908, - "grad_norm": 1.0980994701385498, - "learning_rate": 7.812328129627281e-06, - "loss": 0.033, - "step": 72020 - }, - { - "epoch": 1.8282777002157635, - "grad_norm": 0.5583131909370422, - "learning_rate": 7.811481998561578e-06, - "loss": 0.0277, - "step": 72025 - }, - { - "epoch": 1.8284046198756188, - "grad_norm": 0.6727253794670105, - "learning_rate": 7.810635867495876e-06, - "loss": 0.0289, - "step": 72030 - }, - { - "epoch": 1.828531539535474, - "grad_norm": 0.4188079535961151, - "learning_rate": 7.809789736430173e-06, - "loss": 0.0325, - "step": 72035 - }, - { - "epoch": 1.8286584591953292, - "grad_norm": 0.49228933453559875, - "learning_rate": 7.808943605364471e-06, - "loss": 0.0187, - "step": 72040 - }, - { - "epoch": 1.8287853788551847, - "grad_norm": 0.5077501535415649, - "learning_rate": 7.80809747429877e-06, - "loss": 0.0225, - "step": 72045 - }, - { - "epoch": 1.8289122985150401, - "grad_norm": 0.6789401173591614, - "learning_rate": 7.807251343233068e-06, - "loss": 0.0248, - "step": 72050 - }, - { - "epoch": 1.8290392181748953, - "grad_norm": 0.36956652998924255, - "learning_rate": 7.806405212167365e-06, - "loss": 0.0307, - "step": 72055 - }, - { - "epoch": 1.8291661378347506, - "grad_norm": 0.32916295528411865, - "learning_rate": 7.805559081101663e-06, - "loss": 0.0454, - "step": 72060 - }, - { - "epoch": 1.8292930574946058, - "grad_norm": 0.23966778814792633, - "learning_rate": 7.804712950035961e-06, - "loss": 0.0254, - "step": 72065 - }, - { - "epoch": 1.8294199771544613, - "grad_norm": 0.5070527791976929, - "learning_rate": 7.80386681897026e-06, - "loss": 0.0358, - "step": 72070 - }, - { - "epoch": 1.8295468968143167, - "grad_norm": 0.275395929813385, - "learning_rate": 7.803020687904556e-06, - "loss": 0.0298, - "step": 72075 - }, - { - "epoch": 1.829673816474172, - "grad_norm": 0.7153045535087585, - "learning_rate": 7.802174556838855e-06, - "loss": 0.0344, - "step": 72080 - }, - { - "epoch": 1.8298007361340272, - "grad_norm": 0.49462586641311646, - "learning_rate": 7.801328425773153e-06, - "loss": 0.0331, - "step": 72085 - }, - { - "epoch": 1.8299276557938824, - "grad_norm": 0.453701913356781, - "learning_rate": 7.800482294707452e-06, - "loss": 0.0346, - "step": 72090 - }, - { - "epoch": 1.8300545754537376, - "grad_norm": 0.6697918176651001, - "learning_rate": 7.799636163641748e-06, - "loss": 0.0436, - "step": 72095 - }, - { - "epoch": 1.830181495113593, - "grad_norm": 0.3605886399745941, - "learning_rate": 7.798790032576047e-06, - "loss": 0.0215, - "step": 72100 - }, - { - "epoch": 1.8303084147734485, - "grad_norm": 0.3718501627445221, - "learning_rate": 7.797943901510344e-06, - "loss": 0.0265, - "step": 72105 - }, - { - "epoch": 1.8304353344333038, - "grad_norm": 0.7776987552642822, - "learning_rate": 7.797097770444642e-06, - "loss": 0.0471, - "step": 72110 - }, - { - "epoch": 1.830562254093159, - "grad_norm": 0.5092291831970215, - "learning_rate": 7.79625163937894e-06, - "loss": 0.0259, - "step": 72115 - }, - { - "epoch": 1.8306891737530142, - "grad_norm": 0.4584334194660187, - "learning_rate": 7.795405508313239e-06, - "loss": 0.0348, - "step": 72120 - }, - { - "epoch": 1.8308160934128697, - "grad_norm": 0.43857496976852417, - "learning_rate": 7.794559377247535e-06, - "loss": 0.0492, - "step": 72125 - }, - { - "epoch": 1.8309430130727251, - "grad_norm": 0.25733131170272827, - "learning_rate": 7.793713246181834e-06, - "loss": 0.0361, - "step": 72130 - }, - { - "epoch": 1.8310699327325803, - "grad_norm": 0.5985684990882874, - "learning_rate": 7.792867115116132e-06, - "loss": 0.0614, - "step": 72135 - }, - { - "epoch": 1.8311968523924356, - "grad_norm": 0.45665469765663147, - "learning_rate": 7.79202098405043e-06, - "loss": 0.0251, - "step": 72140 - }, - { - "epoch": 1.8313237720522908, - "grad_norm": 0.250362366437912, - "learning_rate": 7.791174852984727e-06, - "loss": 0.0307, - "step": 72145 - }, - { - "epoch": 1.8314506917121463, - "grad_norm": 0.326060950756073, - "learning_rate": 7.790328721919026e-06, - "loss": 0.0642, - "step": 72150 - }, - { - "epoch": 1.8315776113720015, - "grad_norm": 0.3399512767791748, - "learning_rate": 7.789482590853324e-06, - "loss": 0.0385, - "step": 72155 - }, - { - "epoch": 1.831704531031857, - "grad_norm": 0.31696653366088867, - "learning_rate": 7.788636459787623e-06, - "loss": 0.0397, - "step": 72160 - }, - { - "epoch": 1.8318314506917122, - "grad_norm": 0.5025739073753357, - "learning_rate": 7.78779032872192e-06, - "loss": 0.0259, - "step": 72165 - }, - { - "epoch": 1.8319583703515674, - "grad_norm": 0.5687828660011292, - "learning_rate": 7.786944197656218e-06, - "loss": 0.0306, - "step": 72170 - }, - { - "epoch": 1.8320852900114226, - "grad_norm": 0.639975905418396, - "learning_rate": 7.786098066590514e-06, - "loss": 0.0267, - "step": 72175 - }, - { - "epoch": 1.832212209671278, - "grad_norm": 0.4285149574279785, - "learning_rate": 7.785251935524813e-06, - "loss": 0.0287, - "step": 72180 - }, - { - "epoch": 1.8323391293311335, - "grad_norm": 0.46791237592697144, - "learning_rate": 7.784405804459111e-06, - "loss": 0.0523, - "step": 72185 - }, - { - "epoch": 1.8324660489909887, - "grad_norm": 0.34928426146507263, - "learning_rate": 7.78355967339341e-06, - "loss": 0.0311, - "step": 72190 - }, - { - "epoch": 1.832592968650844, - "grad_norm": 0.43250805139541626, - "learning_rate": 7.782713542327706e-06, - "loss": 0.0351, - "step": 72195 - }, - { - "epoch": 1.8327198883106992, - "grad_norm": 0.31326824426651, - "learning_rate": 7.781867411262005e-06, - "loss": 0.0502, - "step": 72200 - }, - { - "epoch": 1.8328468079705547, - "grad_norm": 0.35482653975486755, - "learning_rate": 7.781021280196303e-06, - "loss": 0.0252, - "step": 72205 - }, - { - "epoch": 1.83297372763041, - "grad_norm": 0.3366532623767853, - "learning_rate": 7.780175149130601e-06, - "loss": 0.0214, - "step": 72210 - }, - { - "epoch": 1.8331006472902653, - "grad_norm": 0.29539692401885986, - "learning_rate": 7.779329018064898e-06, - "loss": 0.0361, - "step": 72215 - }, - { - "epoch": 1.8332275669501206, - "grad_norm": 0.4638945460319519, - "learning_rate": 7.778482886999197e-06, - "loss": 0.0493, - "step": 72220 - }, - { - "epoch": 1.8333544866099758, - "grad_norm": 0.34685641527175903, - "learning_rate": 7.777636755933495e-06, - "loss": 0.0442, - "step": 72225 - }, - { - "epoch": 1.8334814062698312, - "grad_norm": 0.5985927581787109, - "learning_rate": 7.776790624867793e-06, - "loss": 0.0437, - "step": 72230 - }, - { - "epoch": 1.8336083259296865, - "grad_norm": 0.34783247113227844, - "learning_rate": 7.77594449380209e-06, - "loss": 0.0559, - "step": 72235 - }, - { - "epoch": 1.833735245589542, - "grad_norm": 0.44174498319625854, - "learning_rate": 7.775098362736389e-06, - "loss": 0.0262, - "step": 72240 - }, - { - "epoch": 1.8338621652493972, - "grad_norm": 0.423912912607193, - "learning_rate": 7.774252231670685e-06, - "loss": 0.0328, - "step": 72245 - }, - { - "epoch": 1.8339890849092524, - "grad_norm": 0.2817619740962982, - "learning_rate": 7.773406100604984e-06, - "loss": 0.0292, - "step": 72250 - }, - { - "epoch": 1.8341160045691076, - "grad_norm": 0.2885328531265259, - "learning_rate": 7.772559969539282e-06, - "loss": 0.0284, - "step": 72255 - }, - { - "epoch": 1.834242924228963, - "grad_norm": 0.4377204179763794, - "learning_rate": 7.77171383847358e-06, - "loss": 0.0255, - "step": 72260 - }, - { - "epoch": 1.8343698438888185, - "grad_norm": 0.34973493218421936, - "learning_rate": 7.770867707407879e-06, - "loss": 0.0303, - "step": 72265 - }, - { - "epoch": 1.8344967635486737, - "grad_norm": 0.6296082139015198, - "learning_rate": 7.770021576342176e-06, - "loss": 0.0609, - "step": 72270 - }, - { - "epoch": 1.834623683208529, - "grad_norm": 0.8651619553565979, - "learning_rate": 7.769175445276474e-06, - "loss": 0.0447, - "step": 72275 - }, - { - "epoch": 1.8347506028683842, - "grad_norm": 0.46574854850769043, - "learning_rate": 7.768329314210772e-06, - "loss": 0.0368, - "step": 72280 - }, - { - "epoch": 1.8348775225282397, - "grad_norm": 1.7171732187271118, - "learning_rate": 7.76748318314507e-06, - "loss": 0.0719, - "step": 72285 - }, - { - "epoch": 1.8350044421880949, - "grad_norm": 0.6396616697311401, - "learning_rate": 7.766637052079367e-06, - "loss": 0.0369, - "step": 72290 - }, - { - "epoch": 1.8351313618479503, - "grad_norm": 0.5466017127037048, - "learning_rate": 7.765790921013666e-06, - "loss": 0.0312, - "step": 72295 - }, - { - "epoch": 1.8352582815078056, - "grad_norm": 4.068554401397705, - "learning_rate": 7.764944789947964e-06, - "loss": 0.0339, - "step": 72300 - }, - { - "epoch": 1.8353852011676608, - "grad_norm": 0.6270554065704346, - "learning_rate": 7.764098658882263e-06, - "loss": 0.0339, - "step": 72305 - }, - { - "epoch": 1.835512120827516, - "grad_norm": 0.2971436679363251, - "learning_rate": 7.76325252781656e-06, - "loss": 0.0597, - "step": 72310 - }, - { - "epoch": 1.8356390404873715, - "grad_norm": 0.4240180552005768, - "learning_rate": 7.762406396750858e-06, - "loss": 0.046, - "step": 72315 - }, - { - "epoch": 1.835765960147227, - "grad_norm": 0.4218800961971283, - "learning_rate": 7.761560265685154e-06, - "loss": 0.0397, - "step": 72320 - }, - { - "epoch": 1.8358928798070822, - "grad_norm": 0.2803560495376587, - "learning_rate": 7.760714134619453e-06, - "loss": 0.0243, - "step": 72325 - }, - { - "epoch": 1.8360197994669374, - "grad_norm": 0.38623565435409546, - "learning_rate": 7.759868003553751e-06, - "loss": 0.0416, - "step": 72330 - }, - { - "epoch": 1.8361467191267926, - "grad_norm": 0.6080032587051392, - "learning_rate": 7.75902187248805e-06, - "loss": 0.0246, - "step": 72335 - }, - { - "epoch": 1.836273638786648, - "grad_norm": 0.5287583470344543, - "learning_rate": 7.758175741422346e-06, - "loss": 0.0406, - "step": 72340 - }, - { - "epoch": 1.8364005584465035, - "grad_norm": 0.5735242366790771, - "learning_rate": 7.757329610356645e-06, - "loss": 0.0279, - "step": 72345 - }, - { - "epoch": 1.8365274781063587, - "grad_norm": 0.32833629846572876, - "learning_rate": 7.756483479290943e-06, - "loss": 0.0448, - "step": 72350 - }, - { - "epoch": 1.836654397766214, - "grad_norm": 0.44107022881507874, - "learning_rate": 7.755637348225242e-06, - "loss": 0.0224, - "step": 72355 - }, - { - "epoch": 1.8367813174260692, - "grad_norm": 0.4136592149734497, - "learning_rate": 7.754791217159538e-06, - "loss": 0.0616, - "step": 72360 - }, - { - "epoch": 1.8369082370859247, - "grad_norm": 0.3620617389678955, - "learning_rate": 7.753945086093837e-06, - "loss": 0.0385, - "step": 72365 - }, - { - "epoch": 1.8370351567457799, - "grad_norm": 0.2818473279476166, - "learning_rate": 7.753098955028135e-06, - "loss": 0.0344, - "step": 72370 - }, - { - "epoch": 1.8371620764056353, - "grad_norm": 0.4929455816745758, - "learning_rate": 7.752252823962434e-06, - "loss": 0.0403, - "step": 72375 - }, - { - "epoch": 1.8372889960654906, - "grad_norm": 0.3721637725830078, - "learning_rate": 7.75140669289673e-06, - "loss": 0.0347, - "step": 72380 - }, - { - "epoch": 1.8374159157253458, - "grad_norm": 0.3995727002620697, - "learning_rate": 7.750560561831029e-06, - "loss": 0.0303, - "step": 72385 - }, - { - "epoch": 1.837542835385201, - "grad_norm": 0.558729350566864, - "learning_rate": 7.749714430765325e-06, - "loss": 0.0513, - "step": 72390 - }, - { - "epoch": 1.8376697550450565, - "grad_norm": 0.4891183376312256, - "learning_rate": 7.748868299699624e-06, - "loss": 0.0467, - "step": 72395 - }, - { - "epoch": 1.837796674704912, - "grad_norm": 0.49697309732437134, - "learning_rate": 7.748022168633922e-06, - "loss": 0.0251, - "step": 72400 - }, - { - "epoch": 1.8379235943647672, - "grad_norm": 0.8142668604850769, - "learning_rate": 7.74717603756822e-06, - "loss": 0.0512, - "step": 72405 - }, - { - "epoch": 1.8380505140246224, - "grad_norm": 0.3343496322631836, - "learning_rate": 7.746329906502517e-06, - "loss": 0.0247, - "step": 72410 - }, - { - "epoch": 1.8381774336844776, - "grad_norm": 2.356555938720703, - "learning_rate": 7.745483775436816e-06, - "loss": 0.0279, - "step": 72415 - }, - { - "epoch": 1.838304353344333, - "grad_norm": 0.39029181003570557, - "learning_rate": 7.744637644371114e-06, - "loss": 0.0387, - "step": 72420 - }, - { - "epoch": 1.8384312730041883, - "grad_norm": 0.49265339970588684, - "learning_rate": 7.743791513305412e-06, - "loss": 0.0417, - "step": 72425 - }, - { - "epoch": 1.8385581926640437, - "grad_norm": 0.46415984630584717, - "learning_rate": 7.74294538223971e-06, - "loss": 0.0326, - "step": 72430 - }, - { - "epoch": 1.838685112323899, - "grad_norm": 0.4722498059272766, - "learning_rate": 7.742099251174008e-06, - "loss": 0.037, - "step": 72435 - }, - { - "epoch": 1.8388120319837542, - "grad_norm": 0.6914186477661133, - "learning_rate": 7.741253120108306e-06, - "loss": 0.0368, - "step": 72440 - }, - { - "epoch": 1.8389389516436094, - "grad_norm": 0.906947672367096, - "learning_rate": 7.740406989042604e-06, - "loss": 0.0283, - "step": 72445 - }, - { - "epoch": 1.8390658713034649, - "grad_norm": 0.4257940649986267, - "learning_rate": 7.739560857976901e-06, - "loss": 0.045, - "step": 72450 - }, - { - "epoch": 1.8391927909633203, - "grad_norm": 0.5387512445449829, - "learning_rate": 7.7387147269112e-06, - "loss": 0.0264, - "step": 72455 - }, - { - "epoch": 1.8393197106231756, - "grad_norm": 0.6407455205917358, - "learning_rate": 7.737868595845496e-06, - "loss": 0.0393, - "step": 72460 - }, - { - "epoch": 1.8394466302830308, - "grad_norm": 0.7188113927841187, - "learning_rate": 7.737022464779795e-06, - "loss": 0.0306, - "step": 72465 - }, - { - "epoch": 1.839573549942886, - "grad_norm": 0.4425368309020996, - "learning_rate": 7.736176333714093e-06, - "loss": 0.0307, - "step": 72470 - }, - { - "epoch": 1.8397004696027415, - "grad_norm": 0.3363012671470642, - "learning_rate": 7.735330202648391e-06, - "loss": 0.0387, - "step": 72475 - }, - { - "epoch": 1.839827389262597, - "grad_norm": 0.31748339533805847, - "learning_rate": 7.734484071582688e-06, - "loss": 0.0267, - "step": 72480 - }, - { - "epoch": 1.8399543089224522, - "grad_norm": 0.5754492878913879, - "learning_rate": 7.733637940516986e-06, - "loss": 0.0528, - "step": 72485 - }, - { - "epoch": 1.8400812285823074, - "grad_norm": 0.6859310865402222, - "learning_rate": 7.732791809451285e-06, - "loss": 0.0414, - "step": 72490 - }, - { - "epoch": 1.8402081482421626, - "grad_norm": 0.7418652772903442, - "learning_rate": 7.731945678385583e-06, - "loss": 0.0531, - "step": 72495 - }, - { - "epoch": 1.840335067902018, - "grad_norm": 0.2652081549167633, - "learning_rate": 7.73109954731988e-06, - "loss": 0.0424, - "step": 72500 - }, - { - "epoch": 1.8404619875618733, - "grad_norm": 0.665120005607605, - "learning_rate": 7.730253416254178e-06, - "loss": 0.0431, - "step": 72505 - }, - { - "epoch": 1.8405889072217287, - "grad_norm": 0.42425692081451416, - "learning_rate": 7.729407285188477e-06, - "loss": 0.036, - "step": 72510 - }, - { - "epoch": 1.840715826881584, - "grad_norm": 0.41412216424942017, - "learning_rate": 7.728561154122775e-06, - "loss": 0.0299, - "step": 72515 - }, - { - "epoch": 1.8408427465414392, - "grad_norm": 0.9699652194976807, - "learning_rate": 7.727715023057072e-06, - "loss": 0.0647, - "step": 72520 - }, - { - "epoch": 1.8409696662012944, - "grad_norm": 0.5655332207679749, - "learning_rate": 7.72686889199137e-06, - "loss": 0.0438, - "step": 72525 - }, - { - "epoch": 1.8410965858611499, - "grad_norm": 0.5248034000396729, - "learning_rate": 7.726022760925667e-06, - "loss": 0.0271, - "step": 72530 - }, - { - "epoch": 1.8412235055210053, - "grad_norm": 0.2843499481678009, - "learning_rate": 7.725176629859965e-06, - "loss": 0.0461, - "step": 72535 - }, - { - "epoch": 1.8413504251808606, - "grad_norm": 0.3755659759044647, - "learning_rate": 7.724330498794264e-06, - "loss": 0.0343, - "step": 72540 - }, - { - "epoch": 1.8414773448407158, - "grad_norm": 0.6257660388946533, - "learning_rate": 7.723484367728562e-06, - "loss": 0.0391, - "step": 72545 - }, - { - "epoch": 1.841604264500571, - "grad_norm": 0.5335383415222168, - "learning_rate": 7.722638236662859e-06, - "loss": 0.0442, - "step": 72550 - }, - { - "epoch": 1.8417311841604265, - "grad_norm": 0.36324450373649597, - "learning_rate": 7.721792105597157e-06, - "loss": 0.036, - "step": 72555 - }, - { - "epoch": 1.841858103820282, - "grad_norm": 0.33278077840805054, - "learning_rate": 7.720945974531456e-06, - "loss": 0.0317, - "step": 72560 - }, - { - "epoch": 1.8419850234801372, - "grad_norm": 0.5372306108474731, - "learning_rate": 7.720099843465754e-06, - "loss": 0.0384, - "step": 72565 - }, - { - "epoch": 1.8421119431399924, - "grad_norm": 0.4644870162010193, - "learning_rate": 7.719253712400051e-06, - "loss": 0.0456, - "step": 72570 - }, - { - "epoch": 1.8422388627998476, - "grad_norm": 0.3583316206932068, - "learning_rate": 7.71840758133435e-06, - "loss": 0.027, - "step": 72575 - }, - { - "epoch": 1.842365782459703, - "grad_norm": 0.36412733793258667, - "learning_rate": 7.717561450268648e-06, - "loss": 0.0289, - "step": 72580 - }, - { - "epoch": 1.8424927021195583, - "grad_norm": 0.3262830674648285, - "learning_rate": 7.716715319202946e-06, - "loss": 0.038, - "step": 72585 - }, - { - "epoch": 1.8426196217794137, - "grad_norm": 0.42151859402656555, - "learning_rate": 7.715869188137243e-06, - "loss": 0.0221, - "step": 72590 - }, - { - "epoch": 1.842746541439269, - "grad_norm": 0.32918184995651245, - "learning_rate": 7.715023057071541e-06, - "loss": 0.0422, - "step": 72595 - }, - { - "epoch": 1.8428734610991242, - "grad_norm": 0.2972355782985687, - "learning_rate": 7.714176926005838e-06, - "loss": 0.0334, - "step": 72600 - }, - { - "epoch": 1.8430003807589794, - "grad_norm": 0.31966108083724976, - "learning_rate": 7.713330794940136e-06, - "loss": 0.0249, - "step": 72605 - }, - { - "epoch": 1.8431273004188349, - "grad_norm": 0.9721766114234924, - "learning_rate": 7.712484663874435e-06, - "loss": 0.0546, - "step": 72610 - }, - { - "epoch": 1.8432542200786903, - "grad_norm": 0.7215553522109985, - "learning_rate": 7.711638532808733e-06, - "loss": 0.041, - "step": 72615 - }, - { - "epoch": 1.8433811397385456, - "grad_norm": 0.422058641910553, - "learning_rate": 7.71079240174303e-06, - "loss": 0.0463, - "step": 72620 - }, - { - "epoch": 1.8435080593984008, - "grad_norm": 0.49567800760269165, - "learning_rate": 7.709946270677328e-06, - "loss": 0.0405, - "step": 72625 - }, - { - "epoch": 1.843634979058256, - "grad_norm": 0.4844178557395935, - "learning_rate": 7.709100139611627e-06, - "loss": 0.0538, - "step": 72630 - }, - { - "epoch": 1.8437618987181115, - "grad_norm": 0.679039478302002, - "learning_rate": 7.708254008545925e-06, - "loss": 0.0548, - "step": 72635 - }, - { - "epoch": 1.8438888183779667, - "grad_norm": 0.4327368438243866, - "learning_rate": 7.707407877480222e-06, - "loss": 0.0393, - "step": 72640 - }, - { - "epoch": 1.8440157380378221, - "grad_norm": 0.4801304340362549, - "learning_rate": 7.70656174641452e-06, - "loss": 0.0328, - "step": 72645 - }, - { - "epoch": 1.8441426576976774, - "grad_norm": 0.5201664566993713, - "learning_rate": 7.705715615348819e-06, - "loss": 0.029, - "step": 72650 - }, - { - "epoch": 1.8442695773575326, - "grad_norm": 0.3794746994972229, - "learning_rate": 7.704869484283117e-06, - "loss": 0.0288, - "step": 72655 - }, - { - "epoch": 1.8443964970173878, - "grad_norm": 0.2277531921863556, - "learning_rate": 7.704023353217414e-06, - "loss": 0.0191, - "step": 72660 - }, - { - "epoch": 1.8445234166772433, - "grad_norm": 0.5999239087104797, - "learning_rate": 7.703177222151712e-06, - "loss": 0.0354, - "step": 72665 - }, - { - "epoch": 1.8446503363370987, - "grad_norm": 0.37934422492980957, - "learning_rate": 7.702331091086009e-06, - "loss": 0.0204, - "step": 72670 - }, - { - "epoch": 1.844777255996954, - "grad_norm": 0.4032561779022217, - "learning_rate": 7.701484960020307e-06, - "loss": 0.043, - "step": 72675 - }, - { - "epoch": 1.8449041756568092, - "grad_norm": 0.5724853277206421, - "learning_rate": 7.700638828954606e-06, - "loss": 0.0326, - "step": 72680 - }, - { - "epoch": 1.8450310953166644, - "grad_norm": 0.2609630227088928, - "learning_rate": 7.699792697888904e-06, - "loss": 0.0308, - "step": 72685 - }, - { - "epoch": 1.8451580149765199, - "grad_norm": 0.5227208137512207, - "learning_rate": 7.6989465668232e-06, - "loss": 0.0478, - "step": 72690 - }, - { - "epoch": 1.8452849346363753, - "grad_norm": 0.44853833317756653, - "learning_rate": 7.698100435757499e-06, - "loss": 0.0325, - "step": 72695 - }, - { - "epoch": 1.8454118542962306, - "grad_norm": 0.5389077067375183, - "learning_rate": 7.697254304691797e-06, - "loss": 0.0438, - "step": 72700 - }, - { - "epoch": 1.8455387739560858, - "grad_norm": 0.3126462697982788, - "learning_rate": 7.696408173626096e-06, - "loss": 0.0358, - "step": 72705 - }, - { - "epoch": 1.845665693615941, - "grad_norm": 0.34187883138656616, - "learning_rate": 7.695562042560393e-06, - "loss": 0.0266, - "step": 72710 - }, - { - "epoch": 1.8457926132757965, - "grad_norm": 0.6997032165527344, - "learning_rate": 7.694715911494691e-06, - "loss": 0.0384, - "step": 72715 - }, - { - "epoch": 1.8459195329356517, - "grad_norm": 0.43685993552207947, - "learning_rate": 7.69386978042899e-06, - "loss": 0.0314, - "step": 72720 - }, - { - "epoch": 1.8460464525955071, - "grad_norm": 0.3777293264865875, - "learning_rate": 7.693023649363288e-06, - "loss": 0.0128, - "step": 72725 - }, - { - "epoch": 1.8461733722553624, - "grad_norm": 0.2686243951320648, - "learning_rate": 7.692177518297584e-06, - "loss": 0.0438, - "step": 72730 - }, - { - "epoch": 1.8463002919152176, - "grad_norm": 0.5726079940795898, - "learning_rate": 7.691331387231883e-06, - "loss": 0.0472, - "step": 72735 - }, - { - "epoch": 1.8464272115750728, - "grad_norm": 0.235941082239151, - "learning_rate": 7.69048525616618e-06, - "loss": 0.0288, - "step": 72740 - }, - { - "epoch": 1.8465541312349283, - "grad_norm": 0.44869157671928406, - "learning_rate": 7.689639125100478e-06, - "loss": 0.0365, - "step": 72745 - }, - { - "epoch": 1.8466810508947837, - "grad_norm": 0.3492095172405243, - "learning_rate": 7.688792994034776e-06, - "loss": 0.0413, - "step": 72750 - }, - { - "epoch": 1.846807970554639, - "grad_norm": 0.37284326553344727, - "learning_rate": 7.687946862969075e-06, - "loss": 0.0271, - "step": 72755 - }, - { - "epoch": 1.8469348902144942, - "grad_norm": 0.49445343017578125, - "learning_rate": 7.687100731903372e-06, - "loss": 0.051, - "step": 72760 - }, - { - "epoch": 1.8470618098743494, - "grad_norm": 0.5503557920455933, - "learning_rate": 7.68625460083767e-06, - "loss": 0.0421, - "step": 72765 - }, - { - "epoch": 1.8471887295342049, - "grad_norm": 0.5261096954345703, - "learning_rate": 7.685408469771968e-06, - "loss": 0.0289, - "step": 72770 - }, - { - "epoch": 1.84731564919406, - "grad_norm": 0.7209519147872925, - "learning_rate": 7.684562338706267e-06, - "loss": 0.0372, - "step": 72775 - }, - { - "epoch": 1.8474425688539156, - "grad_norm": 0.9794279932975769, - "learning_rate": 7.683716207640565e-06, - "loss": 0.0498, - "step": 72780 - }, - { - "epoch": 1.8475694885137708, - "grad_norm": 0.26202115416526794, - "learning_rate": 7.682870076574862e-06, - "loss": 0.0281, - "step": 72785 - }, - { - "epoch": 1.847696408173626, - "grad_norm": 0.3560878336429596, - "learning_rate": 7.68202394550916e-06, - "loss": 0.0404, - "step": 72790 - }, - { - "epoch": 1.8478233278334812, - "grad_norm": 0.2816479504108429, - "learning_rate": 7.681177814443459e-06, - "loss": 0.028, - "step": 72795 - }, - { - "epoch": 1.8479502474933367, - "grad_norm": 0.5598582029342651, - "learning_rate": 7.680331683377757e-06, - "loss": 0.0265, - "step": 72800 - }, - { - "epoch": 1.8480771671531921, - "grad_norm": 0.28289303183555603, - "learning_rate": 7.679485552312054e-06, - "loss": 0.0315, - "step": 72805 - }, - { - "epoch": 1.8482040868130474, - "grad_norm": 0.6657314896583557, - "learning_rate": 7.678639421246352e-06, - "loss": 0.0406, - "step": 72810 - }, - { - "epoch": 1.8483310064729026, - "grad_norm": 0.41524186730384827, - "learning_rate": 7.677793290180649e-06, - "loss": 0.0393, - "step": 72815 - }, - { - "epoch": 1.8484579261327578, - "grad_norm": 0.8948625922203064, - "learning_rate": 7.676947159114947e-06, - "loss": 0.0373, - "step": 72820 - }, - { - "epoch": 1.8485848457926133, - "grad_norm": 0.4947989881038666, - "learning_rate": 7.676101028049246e-06, - "loss": 0.0537, - "step": 72825 - }, - { - "epoch": 1.8487117654524687, - "grad_norm": 0.7158237099647522, - "learning_rate": 7.675254896983544e-06, - "loss": 0.044, - "step": 72830 - }, - { - "epoch": 1.848838685112324, - "grad_norm": 0.3327103853225708, - "learning_rate": 7.67440876591784e-06, - "loss": 0.0253, - "step": 72835 - }, - { - "epoch": 1.8489656047721792, - "grad_norm": 0.5851573944091797, - "learning_rate": 7.67356263485214e-06, - "loss": 0.0418, - "step": 72840 - }, - { - "epoch": 1.8490925244320344, - "grad_norm": 0.5711240768432617, - "learning_rate": 7.672716503786438e-06, - "loss": 0.0272, - "step": 72845 - }, - { - "epoch": 1.8492194440918899, - "grad_norm": 0.38847675919532776, - "learning_rate": 7.671870372720736e-06, - "loss": 0.0342, - "step": 72850 - }, - { - "epoch": 1.849346363751745, - "grad_norm": 0.549992561340332, - "learning_rate": 7.671024241655033e-06, - "loss": 0.0318, - "step": 72855 - }, - { - "epoch": 1.8494732834116006, - "grad_norm": 0.2897048890590668, - "learning_rate": 7.670178110589331e-06, - "loss": 0.0431, - "step": 72860 - }, - { - "epoch": 1.8496002030714558, - "grad_norm": 0.46089115738868713, - "learning_rate": 7.66933197952363e-06, - "loss": 0.0286, - "step": 72865 - }, - { - "epoch": 1.849727122731311, - "grad_norm": 0.667595624923706, - "learning_rate": 7.668485848457928e-06, - "loss": 0.0419, - "step": 72870 - }, - { - "epoch": 1.8498540423911662, - "grad_norm": 0.497618168592453, - "learning_rate": 7.667639717392225e-06, - "loss": 0.0343, - "step": 72875 - }, - { - "epoch": 1.8499809620510217, - "grad_norm": 0.34055978059768677, - "learning_rate": 7.666793586326523e-06, - "loss": 0.0327, - "step": 72880 - }, - { - "epoch": 1.8501078817108771, - "grad_norm": 0.3787175416946411, - "learning_rate": 7.66594745526082e-06, - "loss": 0.0454, - "step": 72885 - }, - { - "epoch": 1.8502348013707324, - "grad_norm": 0.5521329045295715, - "learning_rate": 7.665101324195118e-06, - "loss": 0.0414, - "step": 72890 - }, - { - "epoch": 1.8503617210305876, - "grad_norm": 0.3585733473300934, - "learning_rate": 7.664255193129416e-06, - "loss": 0.0243, - "step": 72895 - }, - { - "epoch": 1.8504886406904428, - "grad_norm": 0.24839109182357788, - "learning_rate": 7.663409062063715e-06, - "loss": 0.0394, - "step": 72900 - }, - { - "epoch": 1.8506155603502983, - "grad_norm": 0.7916136980056763, - "learning_rate": 7.662562930998012e-06, - "loss": 0.0399, - "step": 72905 - }, - { - "epoch": 1.8507424800101537, - "grad_norm": 0.5610529184341431, - "learning_rate": 7.66171679993231e-06, - "loss": 0.0392, - "step": 72910 - }, - { - "epoch": 1.850869399670009, - "grad_norm": 0.29547491669654846, - "learning_rate": 7.660870668866608e-06, - "loss": 0.0329, - "step": 72915 - }, - { - "epoch": 1.8509963193298642, - "grad_norm": 0.5522790551185608, - "learning_rate": 7.660024537800907e-06, - "loss": 0.0275, - "step": 72920 - }, - { - "epoch": 1.8511232389897194, - "grad_norm": 0.6567624807357788, - "learning_rate": 7.659178406735204e-06, - "loss": 0.021, - "step": 72925 - }, - { - "epoch": 1.8512501586495749, - "grad_norm": 0.39463990926742554, - "learning_rate": 7.658332275669502e-06, - "loss": 0.0383, - "step": 72930 - }, - { - "epoch": 1.85137707830943, - "grad_norm": 0.4453105628490448, - "learning_rate": 7.6574861446038e-06, - "loss": 0.0371, - "step": 72935 - }, - { - "epoch": 1.8515039979692856, - "grad_norm": 0.3382141590118408, - "learning_rate": 7.656640013538099e-06, - "loss": 0.0359, - "step": 72940 - }, - { - "epoch": 1.8516309176291408, - "grad_norm": 0.5171671509742737, - "learning_rate": 7.655793882472395e-06, - "loss": 0.0351, - "step": 72945 - }, - { - "epoch": 1.851757837288996, - "grad_norm": 0.44507312774658203, - "learning_rate": 7.654947751406694e-06, - "loss": 0.0353, - "step": 72950 - }, - { - "epoch": 1.8518847569488512, - "grad_norm": 0.40411630272865295, - "learning_rate": 7.65410162034099e-06, - "loss": 0.019, - "step": 72955 - }, - { - "epoch": 1.8520116766087067, - "grad_norm": 0.22293056547641754, - "learning_rate": 7.653255489275289e-06, - "loss": 0.0325, - "step": 72960 - }, - { - "epoch": 1.8521385962685621, - "grad_norm": 0.37364792823791504, - "learning_rate": 7.652409358209587e-06, - "loss": 0.0342, - "step": 72965 - }, - { - "epoch": 1.8522655159284174, - "grad_norm": 0.3782266676425934, - "learning_rate": 7.651563227143886e-06, - "loss": 0.0376, - "step": 72970 - }, - { - "epoch": 1.8523924355882726, - "grad_norm": 0.6764352917671204, - "learning_rate": 7.650717096078182e-06, - "loss": 0.0417, - "step": 72975 - }, - { - "epoch": 1.8525193552481278, - "grad_norm": 0.6642218232154846, - "learning_rate": 7.649870965012481e-06, - "loss": 0.0334, - "step": 72980 - }, - { - "epoch": 1.8526462749079833, - "grad_norm": 0.2722839117050171, - "learning_rate": 7.64902483394678e-06, - "loss": 0.0279, - "step": 72985 - }, - { - "epoch": 1.8527731945678385, - "grad_norm": 0.51924729347229, - "learning_rate": 7.648178702881078e-06, - "loss": 0.0561, - "step": 72990 - }, - { - "epoch": 1.852900114227694, - "grad_norm": 0.7937610745429993, - "learning_rate": 7.647332571815374e-06, - "loss": 0.0369, - "step": 72995 - }, - { - "epoch": 1.8530270338875492, - "grad_norm": 0.4527570605278015, - "learning_rate": 7.646486440749673e-06, - "loss": 0.0404, - "step": 73000 - }, - { - "epoch": 1.8531539535474044, - "grad_norm": 0.6017526388168335, - "learning_rate": 7.645640309683971e-06, - "loss": 0.0357, - "step": 73005 - }, - { - "epoch": 1.8532808732072596, - "grad_norm": 0.3233206272125244, - "learning_rate": 7.64479417861827e-06, - "loss": 0.0312, - "step": 73010 - }, - { - "epoch": 1.853407792867115, - "grad_norm": 0.4156467914581299, - "learning_rate": 7.643948047552566e-06, - "loss": 0.0419, - "step": 73015 - }, - { - "epoch": 1.8535347125269706, - "grad_norm": 0.3091551661491394, - "learning_rate": 7.643101916486865e-06, - "loss": 0.0321, - "step": 73020 - }, - { - "epoch": 1.8536616321868258, - "grad_norm": 0.5937807559967041, - "learning_rate": 7.642255785421161e-06, - "loss": 0.0322, - "step": 73025 - }, - { - "epoch": 1.853788551846681, - "grad_norm": 0.48102036118507385, - "learning_rate": 7.64140965435546e-06, - "loss": 0.0503, - "step": 73030 - }, - { - "epoch": 1.8539154715065362, - "grad_norm": 0.31532078981399536, - "learning_rate": 7.640563523289758e-06, - "loss": 0.0283, - "step": 73035 - }, - { - "epoch": 1.8540423911663917, - "grad_norm": 0.4379854202270508, - "learning_rate": 7.639717392224057e-06, - "loss": 0.0494, - "step": 73040 - }, - { - "epoch": 1.8541693108262471, - "grad_norm": 0.37000495195388794, - "learning_rate": 7.638871261158353e-06, - "loss": 0.041, - "step": 73045 - }, - { - "epoch": 1.8542962304861024, - "grad_norm": 0.34049081802368164, - "learning_rate": 7.638025130092652e-06, - "loss": 0.0395, - "step": 73050 - }, - { - "epoch": 1.8544231501459576, - "grad_norm": 0.33771249651908875, - "learning_rate": 7.63717899902695e-06, - "loss": 0.0326, - "step": 73055 - }, - { - "epoch": 1.8545500698058128, - "grad_norm": 0.2779901325702667, - "learning_rate": 7.636332867961249e-06, - "loss": 0.0504, - "step": 73060 - }, - { - "epoch": 1.8546769894656683, - "grad_norm": 0.47627878189086914, - "learning_rate": 7.635486736895545e-06, - "loss": 0.0341, - "step": 73065 - }, - { - "epoch": 1.8548039091255235, - "grad_norm": 0.3878328204154968, - "learning_rate": 7.634640605829844e-06, - "loss": 0.0189, - "step": 73070 - }, - { - "epoch": 1.854930828785379, - "grad_norm": 0.3524996340274811, - "learning_rate": 7.633794474764142e-06, - "loss": 0.0436, - "step": 73075 - }, - { - "epoch": 1.8550577484452342, - "grad_norm": 0.5353085398674011, - "learning_rate": 7.63294834369844e-06, - "loss": 0.0273, - "step": 73080 - }, - { - "epoch": 1.8551846681050894, - "grad_norm": 1.1848622560501099, - "learning_rate": 7.632102212632737e-06, - "loss": 0.0465, - "step": 73085 - }, - { - "epoch": 1.8553115877649446, - "grad_norm": 0.43057456612586975, - "learning_rate": 7.631256081567036e-06, - "loss": 0.0478, - "step": 73090 - }, - { - "epoch": 1.8554385074248, - "grad_norm": 0.6175716519355774, - "learning_rate": 7.630409950501332e-06, - "loss": 0.0383, - "step": 73095 - }, - { - "epoch": 1.8555654270846555, - "grad_norm": 0.7961320877075195, - "learning_rate": 7.62956381943563e-06, - "loss": 0.0322, - "step": 73100 - }, - { - "epoch": 1.8556923467445108, - "grad_norm": 0.5698443651199341, - "learning_rate": 7.628717688369929e-06, - "loss": 0.0403, - "step": 73105 - }, - { - "epoch": 1.855819266404366, - "grad_norm": 0.38758203387260437, - "learning_rate": 7.6278715573042275e-06, - "loss": 0.0429, - "step": 73110 - }, - { - "epoch": 1.8559461860642212, - "grad_norm": 1.0592488050460815, - "learning_rate": 7.627025426238525e-06, - "loss": 0.0375, - "step": 73115 - }, - { - "epoch": 1.8560731057240767, - "grad_norm": 0.47558894753456116, - "learning_rate": 7.626179295172823e-06, - "loss": 0.0417, - "step": 73120 - }, - { - "epoch": 1.856200025383932, - "grad_norm": 0.46028944849967957, - "learning_rate": 7.62533316410712e-06, - "loss": 0.0431, - "step": 73125 - }, - { - "epoch": 1.8563269450437874, - "grad_norm": 0.5512962341308594, - "learning_rate": 7.6244870330414185e-06, - "loss": 0.0415, - "step": 73130 - }, - { - "epoch": 1.8564538647036426, - "grad_norm": 0.7609273791313171, - "learning_rate": 7.623640901975716e-06, - "loss": 0.0503, - "step": 73135 - }, - { - "epoch": 1.8565807843634978, - "grad_norm": 0.33203551173210144, - "learning_rate": 7.6227947709100145e-06, - "loss": 0.033, - "step": 73140 - }, - { - "epoch": 1.856707704023353, - "grad_norm": 0.6136659979820251, - "learning_rate": 7.621948639844312e-06, - "loss": 0.0244, - "step": 73145 - }, - { - "epoch": 1.8568346236832085, - "grad_norm": 0.675408661365509, - "learning_rate": 7.6211025087786104e-06, - "loss": 0.0281, - "step": 73150 - }, - { - "epoch": 1.856961543343064, - "grad_norm": 0.4249838590621948, - "learning_rate": 7.620256377712908e-06, - "loss": 0.0409, - "step": 73155 - }, - { - "epoch": 1.8570884630029192, - "grad_norm": 0.7693101763725281, - "learning_rate": 7.619410246647206e-06, - "loss": 0.0372, - "step": 73160 - }, - { - "epoch": 1.8572153826627744, - "grad_norm": 0.3843802511692047, - "learning_rate": 7.618564115581504e-06, - "loss": 0.0383, - "step": 73165 - }, - { - "epoch": 1.8573423023226296, - "grad_norm": 0.3055875599384308, - "learning_rate": 7.617717984515802e-06, - "loss": 0.0397, - "step": 73170 - }, - { - "epoch": 1.857469221982485, - "grad_norm": 0.45162904262542725, - "learning_rate": 7.6168718534501e-06, - "loss": 0.0315, - "step": 73175 - }, - { - "epoch": 1.8575961416423405, - "grad_norm": 0.5020622611045837, - "learning_rate": 7.616025722384398e-06, - "loss": 0.063, - "step": 73180 - }, - { - "epoch": 1.8577230613021958, - "grad_norm": 0.3561357259750366, - "learning_rate": 7.615179591318696e-06, - "loss": 0.0264, - "step": 73185 - }, - { - "epoch": 1.857849980962051, - "grad_norm": 0.7096174955368042, - "learning_rate": 7.614333460252994e-06, - "loss": 0.0473, - "step": 73190 - }, - { - "epoch": 1.8579769006219062, - "grad_norm": 0.34453991055488586, - "learning_rate": 7.613487329187291e-06, - "loss": 0.0322, - "step": 73195 - }, - { - "epoch": 1.8581038202817617, - "grad_norm": 0.3838564455509186, - "learning_rate": 7.612641198121589e-06, - "loss": 0.0314, - "step": 73200 - }, - { - "epoch": 1.858230739941617, - "grad_norm": 0.35811057686805725, - "learning_rate": 7.611795067055887e-06, - "loss": 0.0381, - "step": 73205 - }, - { - "epoch": 1.8583576596014724, - "grad_norm": 0.33930838108062744, - "learning_rate": 7.610948935990185e-06, - "loss": 0.023, - "step": 73210 - }, - { - "epoch": 1.8584845792613276, - "grad_norm": 0.5119139552116394, - "learning_rate": 7.610102804924483e-06, - "loss": 0.0379, - "step": 73215 - }, - { - "epoch": 1.8586114989211828, - "grad_norm": 0.3135601878166199, - "learning_rate": 7.609256673858781e-06, - "loss": 0.0374, - "step": 73220 - }, - { - "epoch": 1.858738418581038, - "grad_norm": 0.5815479159355164, - "learning_rate": 7.608410542793079e-06, - "loss": 0.0459, - "step": 73225 - }, - { - "epoch": 1.8588653382408935, - "grad_norm": 0.6350271701812744, - "learning_rate": 7.607564411727377e-06, - "loss": 0.0382, - "step": 73230 - }, - { - "epoch": 1.858992257900749, - "grad_norm": 0.4068549573421478, - "learning_rate": 7.606718280661675e-06, - "loss": 0.0327, - "step": 73235 - }, - { - "epoch": 1.8591191775606042, - "grad_norm": 0.5301534533500671, - "learning_rate": 7.605872149595973e-06, - "loss": 0.0346, - "step": 73240 - }, - { - "epoch": 1.8592460972204594, - "grad_norm": 0.38961201906204224, - "learning_rate": 7.605026018530271e-06, - "loss": 0.0385, - "step": 73245 - }, - { - "epoch": 1.8593730168803146, - "grad_norm": 0.2949112355709076, - "learning_rate": 7.604179887464569e-06, - "loss": 0.0371, - "step": 73250 - }, - { - "epoch": 1.85949993654017, - "grad_norm": 0.528198778629303, - "learning_rate": 7.603333756398866e-06, - "loss": 0.0488, - "step": 73255 - }, - { - "epoch": 1.8596268562000255, - "grad_norm": 0.7231833338737488, - "learning_rate": 7.602487625333165e-06, - "loss": 0.0479, - "step": 73260 - }, - { - "epoch": 1.8597537758598808, - "grad_norm": 0.5277169346809387, - "learning_rate": 7.601641494267462e-06, - "loss": 0.0319, - "step": 73265 - }, - { - "epoch": 1.859880695519736, - "grad_norm": 0.4124847650527954, - "learning_rate": 7.60079536320176e-06, - "loss": 0.0271, - "step": 73270 - }, - { - "epoch": 1.8600076151795912, - "grad_norm": 0.3206121027469635, - "learning_rate": 7.599949232136058e-06, - "loss": 0.0216, - "step": 73275 - }, - { - "epoch": 1.8601345348394467, - "grad_norm": 0.42703184485435486, - "learning_rate": 7.599103101070356e-06, - "loss": 0.0374, - "step": 73280 - }, - { - "epoch": 1.860261454499302, - "grad_norm": 0.4249008297920227, - "learning_rate": 7.598256970004654e-06, - "loss": 0.0496, - "step": 73285 - }, - { - "epoch": 1.8603883741591574, - "grad_norm": 0.28445473313331604, - "learning_rate": 7.597410838938952e-06, - "loss": 0.0343, - "step": 73290 - }, - { - "epoch": 1.8605152938190126, - "grad_norm": 0.41978639364242554, - "learning_rate": 7.5965647078732505e-06, - "loss": 0.022, - "step": 73295 - }, - { - "epoch": 1.8606422134788678, - "grad_norm": 0.22873343527317047, - "learning_rate": 7.595718576807548e-06, - "loss": 0.0289, - "step": 73300 - }, - { - "epoch": 1.860769133138723, - "grad_norm": 0.2182752788066864, - "learning_rate": 7.5948724457418465e-06, - "loss": 0.0386, - "step": 73305 - }, - { - "epoch": 1.8608960527985785, - "grad_norm": 0.43071258068084717, - "learning_rate": 7.594026314676144e-06, - "loss": 0.0719, - "step": 73310 - }, - { - "epoch": 1.861022972458434, - "grad_norm": 0.4348742961883545, - "learning_rate": 7.5931801836104425e-06, - "loss": 0.0292, - "step": 73315 - }, - { - "epoch": 1.8611498921182892, - "grad_norm": 0.2973809838294983, - "learning_rate": 7.59233405254474e-06, - "loss": 0.039, - "step": 73320 - }, - { - "epoch": 1.8612768117781444, - "grad_norm": 0.45378583669662476, - "learning_rate": 7.591487921479038e-06, - "loss": 0.0232, - "step": 73325 - }, - { - "epoch": 1.8614037314379996, - "grad_norm": 0.36866268515586853, - "learning_rate": 7.590641790413336e-06, - "loss": 0.0323, - "step": 73330 - }, - { - "epoch": 1.861530651097855, - "grad_norm": 0.3099067807197571, - "learning_rate": 7.589795659347634e-06, - "loss": 0.0339, - "step": 73335 - }, - { - "epoch": 1.8616575707577103, - "grad_norm": 0.26234787702560425, - "learning_rate": 7.588949528281931e-06, - "loss": 0.0465, - "step": 73340 - }, - { - "epoch": 1.8617844904175658, - "grad_norm": 0.4525549113750458, - "learning_rate": 7.58810339721623e-06, - "loss": 0.0305, - "step": 73345 - }, - { - "epoch": 1.861911410077421, - "grad_norm": 0.3661547005176544, - "learning_rate": 7.587257266150527e-06, - "loss": 0.043, - "step": 73350 - }, - { - "epoch": 1.8620383297372762, - "grad_norm": 0.332266628742218, - "learning_rate": 7.5864111350848254e-06, - "loss": 0.0485, - "step": 73355 - }, - { - "epoch": 1.8621652493971315, - "grad_norm": 0.39736005663871765, - "learning_rate": 7.585565004019123e-06, - "loss": 0.0478, - "step": 73360 - }, - { - "epoch": 1.862292169056987, - "grad_norm": 0.7498329281806946, - "learning_rate": 7.584718872953421e-06, - "loss": 0.052, - "step": 73365 - }, - { - "epoch": 1.8624190887168424, - "grad_norm": 0.40449780225753784, - "learning_rate": 7.583872741887719e-06, - "loss": 0.0396, - "step": 73370 - }, - { - "epoch": 1.8625460083766976, - "grad_norm": 0.4577851891517639, - "learning_rate": 7.583026610822017e-06, - "loss": 0.0473, - "step": 73375 - }, - { - "epoch": 1.8626729280365528, - "grad_norm": 0.28925439715385437, - "learning_rate": 7.582180479756315e-06, - "loss": 0.0327, - "step": 73380 - }, - { - "epoch": 1.862799847696408, - "grad_norm": 0.21302592754364014, - "learning_rate": 7.581334348690613e-06, - "loss": 0.038, - "step": 73385 - }, - { - "epoch": 1.8629267673562635, - "grad_norm": 0.5160405039787292, - "learning_rate": 7.580488217624911e-06, - "loss": 0.0345, - "step": 73390 - }, - { - "epoch": 1.863053687016119, - "grad_norm": 0.2732714116573334, - "learning_rate": 7.579642086559209e-06, - "loss": 0.0273, - "step": 73395 - }, - { - "epoch": 1.8631806066759742, - "grad_norm": 1.2055752277374268, - "learning_rate": 7.578795955493507e-06, - "loss": 0.0366, - "step": 73400 - }, - { - "epoch": 1.8633075263358294, - "grad_norm": 0.5458006262779236, - "learning_rate": 7.577949824427805e-06, - "loss": 0.0286, - "step": 73405 - }, - { - "epoch": 1.8634344459956846, - "grad_norm": 0.5006032586097717, - "learning_rate": 7.577103693362102e-06, - "loss": 0.0434, - "step": 73410 - }, - { - "epoch": 1.86356136565554, - "grad_norm": 0.5495287179946899, - "learning_rate": 7.5762575622964e-06, - "loss": 0.0247, - "step": 73415 - }, - { - "epoch": 1.8636882853153953, - "grad_norm": 0.4025460183620453, - "learning_rate": 7.575411431230698e-06, - "loss": 0.0324, - "step": 73420 - }, - { - "epoch": 1.8638152049752508, - "grad_norm": 0.43932005763053894, - "learning_rate": 7.574565300164996e-06, - "loss": 0.0367, - "step": 73425 - }, - { - "epoch": 1.863942124635106, - "grad_norm": 0.8542406558990479, - "learning_rate": 7.573719169099294e-06, - "loss": 0.0453, - "step": 73430 - }, - { - "epoch": 1.8640690442949612, - "grad_norm": 0.38319575786590576, - "learning_rate": 7.572873038033592e-06, - "loss": 0.0471, - "step": 73435 - }, - { - "epoch": 1.8641959639548165, - "grad_norm": 0.3895409107208252, - "learning_rate": 7.57202690696789e-06, - "loss": 0.0556, - "step": 73440 - }, - { - "epoch": 1.864322883614672, - "grad_norm": 0.47808557748794556, - "learning_rate": 7.571180775902188e-06, - "loss": 0.0336, - "step": 73445 - }, - { - "epoch": 1.8644498032745274, - "grad_norm": 0.2718760669231415, - "learning_rate": 7.570334644836486e-06, - "loss": 0.0489, - "step": 73450 - }, - { - "epoch": 1.8645767229343826, - "grad_norm": 0.42986196279525757, - "learning_rate": 7.569488513770784e-06, - "loss": 0.0378, - "step": 73455 - }, - { - "epoch": 1.8647036425942378, - "grad_norm": 0.3622870147228241, - "learning_rate": 7.568642382705082e-06, - "loss": 0.0193, - "step": 73460 - }, - { - "epoch": 1.864830562254093, - "grad_norm": 0.3284076154232025, - "learning_rate": 7.56779625163938e-06, - "loss": 0.0333, - "step": 73465 - }, - { - "epoch": 1.8649574819139485, - "grad_norm": 0.32985740900039673, - "learning_rate": 7.566950120573678e-06, - "loss": 0.0222, - "step": 73470 - }, - { - "epoch": 1.8650844015738037, - "grad_norm": 0.4527156352996826, - "learning_rate": 7.566103989507976e-06, - "loss": 0.0316, - "step": 73475 - }, - { - "epoch": 1.8652113212336592, - "grad_norm": 0.44402363896369934, - "learning_rate": 7.565257858442273e-06, - "loss": 0.0327, - "step": 73480 - }, - { - "epoch": 1.8653382408935144, - "grad_norm": 0.17790792882442474, - "learning_rate": 7.564411727376571e-06, - "loss": 0.0244, - "step": 73485 - }, - { - "epoch": 1.8654651605533696, - "grad_norm": 0.7167782187461853, - "learning_rate": 7.563565596310869e-06, - "loss": 0.0381, - "step": 73490 - }, - { - "epoch": 1.8655920802132249, - "grad_norm": 0.30281850695610046, - "learning_rate": 7.562719465245167e-06, - "loss": 0.0261, - "step": 73495 - }, - { - "epoch": 1.8657189998730803, - "grad_norm": 0.3630172908306122, - "learning_rate": 7.561873334179465e-06, - "loss": 0.0259, - "step": 73500 - }, - { - "epoch": 1.8658459195329358, - "grad_norm": 0.3273363411426544, - "learning_rate": 7.561027203113763e-06, - "loss": 0.0463, - "step": 73505 - }, - { - "epoch": 1.865972839192791, - "grad_norm": 0.5342987775802612, - "learning_rate": 7.560181072048061e-06, - "loss": 0.0451, - "step": 73510 - }, - { - "epoch": 1.8660997588526462, - "grad_norm": 1.393652319908142, - "learning_rate": 7.559334940982359e-06, - "loss": 0.0418, - "step": 73515 - }, - { - "epoch": 1.8662266785125015, - "grad_norm": 0.23329082131385803, - "learning_rate": 7.558488809916657e-06, - "loss": 0.048, - "step": 73520 - }, - { - "epoch": 1.866353598172357, - "grad_norm": 0.32690417766571045, - "learning_rate": 7.557642678850955e-06, - "loss": 0.0413, - "step": 73525 - }, - { - "epoch": 1.8664805178322124, - "grad_norm": 0.5742086172103882, - "learning_rate": 7.5567965477852526e-06, - "loss": 0.0517, - "step": 73530 - }, - { - "epoch": 1.8666074374920676, - "grad_norm": 0.41749638319015503, - "learning_rate": 7.555950416719551e-06, - "loss": 0.0441, - "step": 73535 - }, - { - "epoch": 1.8667343571519228, - "grad_norm": 0.6347283124923706, - "learning_rate": 7.5551042856538485e-06, - "loss": 0.0284, - "step": 73540 - }, - { - "epoch": 1.866861276811778, - "grad_norm": 0.5140455961227417, - "learning_rate": 7.554258154588147e-06, - "loss": 0.0496, - "step": 73545 - }, - { - "epoch": 1.8669881964716335, - "grad_norm": 1.7740757465362549, - "learning_rate": 7.553412023522444e-06, - "loss": 0.0328, - "step": 73550 - }, - { - "epoch": 1.8671151161314887, - "grad_norm": 0.3328590393066406, - "learning_rate": 7.552565892456742e-06, - "loss": 0.0512, - "step": 73555 - }, - { - "epoch": 1.8672420357913442, - "grad_norm": 0.5116691589355469, - "learning_rate": 7.55171976139104e-06, - "loss": 0.0545, - "step": 73560 - }, - { - "epoch": 1.8673689554511994, - "grad_norm": 0.3914783000946045, - "learning_rate": 7.550873630325338e-06, - "loss": 0.0311, - "step": 73565 - }, - { - "epoch": 1.8674958751110546, - "grad_norm": 0.5405775904655457, - "learning_rate": 7.5500274992596355e-06, - "loss": 0.0473, - "step": 73570 - }, - { - "epoch": 1.8676227947709099, - "grad_norm": 0.4745592474937439, - "learning_rate": 7.549181368193934e-06, - "loss": 0.038, - "step": 73575 - }, - { - "epoch": 1.8677497144307653, - "grad_norm": 0.45318031311035156, - "learning_rate": 7.5483352371282315e-06, - "loss": 0.0376, - "step": 73580 - }, - { - "epoch": 1.8678766340906208, - "grad_norm": 0.23582878708839417, - "learning_rate": 7.54748910606253e-06, - "loss": 0.036, - "step": 73585 - }, - { - "epoch": 1.868003553750476, - "grad_norm": 0.5273194909095764, - "learning_rate": 7.5466429749968275e-06, - "loss": 0.0466, - "step": 73590 - }, - { - "epoch": 1.8681304734103312, - "grad_norm": 0.4436270296573639, - "learning_rate": 7.545796843931126e-06, - "loss": 0.0254, - "step": 73595 - }, - { - "epoch": 1.8682573930701865, - "grad_norm": 0.30496543645858765, - "learning_rate": 7.544950712865423e-06, - "loss": 0.0368, - "step": 73600 - }, - { - "epoch": 1.868384312730042, - "grad_norm": 0.35649222135543823, - "learning_rate": 7.544104581799722e-06, - "loss": 0.0282, - "step": 73605 - }, - { - "epoch": 1.8685112323898974, - "grad_norm": 0.8184651732444763, - "learning_rate": 7.543258450734019e-06, - "loss": 0.0444, - "step": 73610 - }, - { - "epoch": 1.8686381520497526, - "grad_norm": 1.3168188333511353, - "learning_rate": 7.542412319668318e-06, - "loss": 0.0524, - "step": 73615 - }, - { - "epoch": 1.8687650717096078, - "grad_norm": 0.7852512001991272, - "learning_rate": 7.5415661886026145e-06, - "loss": 0.0381, - "step": 73620 - }, - { - "epoch": 1.868891991369463, - "grad_norm": 0.5679740309715271, - "learning_rate": 7.540720057536913e-06, - "loss": 0.0399, - "step": 73625 - }, - { - "epoch": 1.8690189110293183, - "grad_norm": 0.3738311231136322, - "learning_rate": 7.5398739264712104e-06, - "loss": 0.0458, - "step": 73630 - }, - { - "epoch": 1.8691458306891737, - "grad_norm": 0.38672274351119995, - "learning_rate": 7.539027795405509e-06, - "loss": 0.0326, - "step": 73635 - }, - { - "epoch": 1.8692727503490292, - "grad_norm": 0.4942168593406677, - "learning_rate": 7.538181664339806e-06, - "loss": 0.0418, - "step": 73640 - }, - { - "epoch": 1.8693996700088844, - "grad_norm": 0.4416219890117645, - "learning_rate": 7.537335533274105e-06, - "loss": 0.04, - "step": 73645 - }, - { - "epoch": 1.8695265896687396, - "grad_norm": 0.41674020886421204, - "learning_rate": 7.536489402208402e-06, - "loss": 0.0316, - "step": 73650 - }, - { - "epoch": 1.8696535093285949, - "grad_norm": 0.42338889837265015, - "learning_rate": 7.535643271142701e-06, - "loss": 0.0409, - "step": 73655 - }, - { - "epoch": 1.8697804289884503, - "grad_norm": 0.3381919860839844, - "learning_rate": 7.534797140076998e-06, - "loss": 0.0535, - "step": 73660 - }, - { - "epoch": 1.8699073486483058, - "grad_norm": 0.545272171497345, - "learning_rate": 7.533951009011297e-06, - "loss": 0.0328, - "step": 73665 - }, - { - "epoch": 1.870034268308161, - "grad_norm": 1.658664584159851, - "learning_rate": 7.533104877945594e-06, - "loss": 0.0301, - "step": 73670 - }, - { - "epoch": 1.8701611879680162, - "grad_norm": 0.4693523943424225, - "learning_rate": 7.532258746879893e-06, - "loss": 0.0541, - "step": 73675 - }, - { - "epoch": 1.8702881076278715, - "grad_norm": 0.30579185485839844, - "learning_rate": 7.53141261581419e-06, - "loss": 0.0434, - "step": 73680 - }, - { - "epoch": 1.870415027287727, - "grad_norm": 0.5520510077476501, - "learning_rate": 7.530566484748489e-06, - "loss": 0.0515, - "step": 73685 - }, - { - "epoch": 1.8705419469475821, - "grad_norm": 0.12417864799499512, - "learning_rate": 7.529720353682785e-06, - "loss": 0.042, - "step": 73690 - }, - { - "epoch": 1.8706688666074376, - "grad_norm": 0.5447812080383301, - "learning_rate": 7.528874222617084e-06, - "loss": 0.0212, - "step": 73695 - }, - { - "epoch": 1.8707957862672928, - "grad_norm": 0.5343620181083679, - "learning_rate": 7.528028091551381e-06, - "loss": 0.0373, - "step": 73700 - }, - { - "epoch": 1.870922705927148, - "grad_norm": 0.2356509566307068, - "learning_rate": 7.52718196048568e-06, - "loss": 0.0371, - "step": 73705 - }, - { - "epoch": 1.8710496255870033, - "grad_norm": 0.3244430124759674, - "learning_rate": 7.526335829419977e-06, - "loss": 0.0338, - "step": 73710 - }, - { - "epoch": 1.8711765452468587, - "grad_norm": 0.5338812470436096, - "learning_rate": 7.525489698354276e-06, - "loss": 0.0552, - "step": 73715 - }, - { - "epoch": 1.8713034649067142, - "grad_norm": 0.4335499703884125, - "learning_rate": 7.524643567288573e-06, - "loss": 0.0282, - "step": 73720 - }, - { - "epoch": 1.8714303845665694, - "grad_norm": 0.3463226556777954, - "learning_rate": 7.523797436222872e-06, - "loss": 0.0277, - "step": 73725 - }, - { - "epoch": 1.8715573042264246, - "grad_norm": 0.4673941135406494, - "learning_rate": 7.522951305157169e-06, - "loss": 0.0316, - "step": 73730 - }, - { - "epoch": 1.8716842238862799, - "grad_norm": 0.34205445647239685, - "learning_rate": 7.5221051740914676e-06, - "loss": 0.0365, - "step": 73735 - }, - { - "epoch": 1.8718111435461353, - "grad_norm": 0.2666976749897003, - "learning_rate": 7.521259043025765e-06, - "loss": 0.0534, - "step": 73740 - }, - { - "epoch": 1.8719380632059908, - "grad_norm": 0.4592796564102173, - "learning_rate": 7.5204129119600635e-06, - "loss": 0.0507, - "step": 73745 - }, - { - "epoch": 1.872064982865846, - "grad_norm": 0.40054646134376526, - "learning_rate": 7.51956678089436e-06, - "loss": 0.0302, - "step": 73750 - }, - { - "epoch": 1.8721919025257012, - "grad_norm": 0.4866581857204437, - "learning_rate": 7.5187206498286595e-06, - "loss": 0.036, - "step": 73755 - }, - { - "epoch": 1.8723188221855565, - "grad_norm": 0.2580988109111786, - "learning_rate": 7.517874518762956e-06, - "loss": 0.0401, - "step": 73760 - }, - { - "epoch": 1.872445741845412, - "grad_norm": 0.4803682565689087, - "learning_rate": 7.517028387697255e-06, - "loss": 0.0326, - "step": 73765 - }, - { - "epoch": 1.8725726615052671, - "grad_norm": 2.4046523571014404, - "learning_rate": 7.516182256631552e-06, - "loss": 0.0399, - "step": 73770 - }, - { - "epoch": 1.8726995811651226, - "grad_norm": 0.3445727527141571, - "learning_rate": 7.5153361255658505e-06, - "loss": 0.0301, - "step": 73775 - }, - { - "epoch": 1.8728265008249778, - "grad_norm": 0.492262065410614, - "learning_rate": 7.514489994500148e-06, - "loss": 0.0273, - "step": 73780 - }, - { - "epoch": 1.872953420484833, - "grad_norm": 0.488438218832016, - "learning_rate": 7.5136438634344465e-06, - "loss": 0.0286, - "step": 73785 - }, - { - "epoch": 1.8730803401446883, - "grad_norm": 0.584281861782074, - "learning_rate": 7.512797732368744e-06, - "loss": 0.0355, - "step": 73790 - }, - { - "epoch": 1.8732072598045437, - "grad_norm": 1.2342113256454468, - "learning_rate": 7.5119516013030425e-06, - "loss": 0.0449, - "step": 73795 - }, - { - "epoch": 1.8733341794643992, - "grad_norm": 0.8883946537971497, - "learning_rate": 7.51110547023734e-06, - "loss": 0.0282, - "step": 73800 - }, - { - "epoch": 1.8734610991242544, - "grad_norm": 0.2954212427139282, - "learning_rate": 7.510259339171638e-06, - "loss": 0.0235, - "step": 73805 - }, - { - "epoch": 1.8735880187841096, - "grad_norm": 0.5594305992126465, - "learning_rate": 7.509413208105937e-06, - "loss": 0.049, - "step": 73810 - }, - { - "epoch": 1.8737149384439649, - "grad_norm": 0.2513466477394104, - "learning_rate": 7.508567077040234e-06, - "loss": 0.0245, - "step": 73815 - }, - { - "epoch": 1.8738418581038203, - "grad_norm": 0.34482139348983765, - "learning_rate": 7.507720945974533e-06, - "loss": 0.038, - "step": 73820 - }, - { - "epoch": 1.8739687777636755, - "grad_norm": 0.6154205799102783, - "learning_rate": 7.50687481490883e-06, - "loss": 0.0499, - "step": 73825 - }, - { - "epoch": 1.874095697423531, - "grad_norm": 0.44373002648353577, - "learning_rate": 7.506028683843129e-06, - "loss": 0.0308, - "step": 73830 - }, - { - "epoch": 1.8742226170833862, - "grad_norm": 0.6326587200164795, - "learning_rate": 7.5051825527774254e-06, - "loss": 0.0491, - "step": 73835 - }, - { - "epoch": 1.8743495367432415, - "grad_norm": 0.5984882116317749, - "learning_rate": 7.504336421711725e-06, - "loss": 0.0301, - "step": 73840 - }, - { - "epoch": 1.8744764564030967, - "grad_norm": 0.31174543499946594, - "learning_rate": 7.503490290646021e-06, - "loss": 0.025, - "step": 73845 - }, - { - "epoch": 1.8746033760629521, - "grad_norm": 0.43460485339164734, - "learning_rate": 7.50264415958032e-06, - "loss": 0.0521, - "step": 73850 - }, - { - "epoch": 1.8747302957228076, - "grad_norm": 0.6473285555839539, - "learning_rate": 7.501798028514617e-06, - "loss": 0.0402, - "step": 73855 - }, - { - "epoch": 1.8748572153826628, - "grad_norm": 0.7723906636238098, - "learning_rate": 7.500951897448916e-06, - "loss": 0.0323, - "step": 73860 - }, - { - "epoch": 1.874984135042518, - "grad_norm": 0.4740931987762451, - "learning_rate": 7.500105766383213e-06, - "loss": 0.0247, - "step": 73865 - }, - { - "epoch": 1.8751110547023733, - "grad_norm": 0.3979039788246155, - "learning_rate": 7.499259635317512e-06, - "loss": 0.0307, - "step": 73870 - }, - { - "epoch": 1.8752379743622287, - "grad_norm": 0.5045456886291504, - "learning_rate": 7.498413504251809e-06, - "loss": 0.0478, - "step": 73875 - }, - { - "epoch": 1.8753648940220842, - "grad_norm": 0.5192897319793701, - "learning_rate": 7.497567373186108e-06, - "loss": 0.0229, - "step": 73880 - }, - { - "epoch": 1.8754918136819394, - "grad_norm": 0.7493794560432434, - "learning_rate": 7.496721242120405e-06, - "loss": 0.0414, - "step": 73885 - }, - { - "epoch": 1.8756187333417946, - "grad_norm": 0.45392656326293945, - "learning_rate": 7.495875111054704e-06, - "loss": 0.0299, - "step": 73890 - }, - { - "epoch": 1.8757456530016499, - "grad_norm": 0.3697686195373535, - "learning_rate": 7.495028979989001e-06, - "loss": 0.037, - "step": 73895 - }, - { - "epoch": 1.8758725726615053, - "grad_norm": 0.35779282450675964, - "learning_rate": 7.4941828489233e-06, - "loss": 0.0353, - "step": 73900 - }, - { - "epoch": 1.8759994923213605, - "grad_norm": 0.2909733057022095, - "learning_rate": 7.493336717857596e-06, - "loss": 0.022, - "step": 73905 - }, - { - "epoch": 1.876126411981216, - "grad_norm": 0.313346803188324, - "learning_rate": 7.492490586791895e-06, - "loss": 0.0385, - "step": 73910 - }, - { - "epoch": 1.8762533316410712, - "grad_norm": 0.3216453492641449, - "learning_rate": 7.491644455726192e-06, - "loss": 0.029, - "step": 73915 - }, - { - "epoch": 1.8763802513009264, - "grad_norm": 0.2557257413864136, - "learning_rate": 7.490798324660491e-06, - "loss": 0.0349, - "step": 73920 - }, - { - "epoch": 1.8765071709607817, - "grad_norm": 0.43648651242256165, - "learning_rate": 7.489952193594788e-06, - "loss": 0.0363, - "step": 73925 - }, - { - "epoch": 1.8766340906206371, - "grad_norm": 0.42016130685806274, - "learning_rate": 7.489106062529087e-06, - "loss": 0.0243, - "step": 73930 - }, - { - "epoch": 1.8767610102804926, - "grad_norm": 0.5657927393913269, - "learning_rate": 7.488259931463384e-06, - "loss": 0.0278, - "step": 73935 - }, - { - "epoch": 1.8768879299403478, - "grad_norm": 0.601011335849762, - "learning_rate": 7.4874138003976826e-06, - "loss": 0.0398, - "step": 73940 - }, - { - "epoch": 1.877014849600203, - "grad_norm": 0.4388642907142639, - "learning_rate": 7.48656766933198e-06, - "loss": 0.0331, - "step": 73945 - }, - { - "epoch": 1.8771417692600583, - "grad_norm": 1.3266913890838623, - "learning_rate": 7.4857215382662785e-06, - "loss": 0.027, - "step": 73950 - }, - { - "epoch": 1.8772686889199137, - "grad_norm": 0.22581563889980316, - "learning_rate": 7.484875407200576e-06, - "loss": 0.0282, - "step": 73955 - }, - { - "epoch": 1.8773956085797692, - "grad_norm": 0.28941610455513, - "learning_rate": 7.4840292761348745e-06, - "loss": 0.0442, - "step": 73960 - }, - { - "epoch": 1.8775225282396244, - "grad_norm": 0.4075261354446411, - "learning_rate": 7.483183145069172e-06, - "loss": 0.037, - "step": 73965 - }, - { - "epoch": 1.8776494478994796, - "grad_norm": 2.4731791019439697, - "learning_rate": 7.4823370140034704e-06, - "loss": 0.0322, - "step": 73970 - }, - { - "epoch": 1.8777763675593349, - "grad_norm": 0.3712489604949951, - "learning_rate": 7.481490882937767e-06, - "loss": 0.0326, - "step": 73975 - }, - { - "epoch": 1.87790328721919, - "grad_norm": 0.6470932960510254, - "learning_rate": 7.4806447518720655e-06, - "loss": 0.0476, - "step": 73980 - }, - { - "epoch": 1.8780302068790455, - "grad_norm": 0.6559893488883972, - "learning_rate": 7.479798620806363e-06, - "loss": 0.0459, - "step": 73985 - }, - { - "epoch": 1.878157126538901, - "grad_norm": 0.6153828501701355, - "learning_rate": 7.4789524897406615e-06, - "loss": 0.0334, - "step": 73990 - }, - { - "epoch": 1.8782840461987562, - "grad_norm": 0.7165561318397522, - "learning_rate": 7.478106358674959e-06, - "loss": 0.0408, - "step": 73995 - }, - { - "epoch": 1.8784109658586114, - "grad_norm": 0.40002527832984924, - "learning_rate": 7.4772602276092575e-06, - "loss": 0.0296, - "step": 74000 - }, - { - "epoch": 1.8785378855184667, - "grad_norm": 0.27569103240966797, - "learning_rate": 7.476414096543555e-06, - "loss": 0.0253, - "step": 74005 - }, - { - "epoch": 1.8786648051783221, - "grad_norm": 0.2580665946006775, - "learning_rate": 7.475567965477853e-06, - "loss": 0.0305, - "step": 74010 - }, - { - "epoch": 1.8787917248381776, - "grad_norm": 0.28244999051094055, - "learning_rate": 7.474721834412151e-06, - "loss": 0.0274, - "step": 74015 - }, - { - "epoch": 1.8789186444980328, - "grad_norm": 0.440202921628952, - "learning_rate": 7.473875703346449e-06, - "loss": 0.0357, - "step": 74020 - }, - { - "epoch": 1.879045564157888, - "grad_norm": 0.5103312730789185, - "learning_rate": 7.473029572280747e-06, - "loss": 0.032, - "step": 74025 - }, - { - "epoch": 1.8791724838177433, - "grad_norm": 0.24015852808952332, - "learning_rate": 7.472183441215045e-06, - "loss": 0.0206, - "step": 74030 - }, - { - "epoch": 1.8792994034775987, - "grad_norm": 0.5262442827224731, - "learning_rate": 7.471337310149343e-06, - "loss": 0.0315, - "step": 74035 - }, - { - "epoch": 1.879426323137454, - "grad_norm": 0.49194270372390747, - "learning_rate": 7.470491179083641e-06, - "loss": 0.0443, - "step": 74040 - }, - { - "epoch": 1.8795532427973094, - "grad_norm": 1.5700843334197998, - "learning_rate": 7.469645048017938e-06, - "loss": 0.0463, - "step": 74045 - }, - { - "epoch": 1.8796801624571646, - "grad_norm": 0.5707568526268005, - "learning_rate": 7.468798916952236e-06, - "loss": 0.0387, - "step": 74050 - }, - { - "epoch": 1.8798070821170199, - "grad_norm": 0.15014947950839996, - "learning_rate": 7.467952785886534e-06, - "loss": 0.0324, - "step": 74055 - }, - { - "epoch": 1.879934001776875, - "grad_norm": 0.42866846919059753, - "learning_rate": 7.467106654820832e-06, - "loss": 0.0314, - "step": 74060 - }, - { - "epoch": 1.8800609214367305, - "grad_norm": 0.5489247441291809, - "learning_rate": 7.46626052375513e-06, - "loss": 0.0365, - "step": 74065 - }, - { - "epoch": 1.880187841096586, - "grad_norm": 0.5473324656486511, - "learning_rate": 7.465414392689428e-06, - "loss": 0.0317, - "step": 74070 - }, - { - "epoch": 1.8803147607564412, - "grad_norm": 0.3081494867801666, - "learning_rate": 7.464568261623726e-06, - "loss": 0.0564, - "step": 74075 - }, - { - "epoch": 1.8804416804162964, - "grad_norm": 0.496761292219162, - "learning_rate": 7.463722130558024e-06, - "loss": 0.0573, - "step": 74080 - }, - { - "epoch": 1.8805686000761517, - "grad_norm": 0.3653762638568878, - "learning_rate": 7.462875999492322e-06, - "loss": 0.0379, - "step": 74085 - }, - { - "epoch": 1.8806955197360071, - "grad_norm": 0.3443663716316223, - "learning_rate": 7.46202986842662e-06, - "loss": 0.0448, - "step": 74090 - }, - { - "epoch": 1.8808224393958626, - "grad_norm": 0.5805698037147522, - "learning_rate": 7.461183737360918e-06, - "loss": 0.0508, - "step": 74095 - }, - { - "epoch": 1.8809493590557178, - "grad_norm": 0.4384879171848297, - "learning_rate": 7.460337606295216e-06, - "loss": 0.0324, - "step": 74100 - }, - { - "epoch": 1.881076278715573, - "grad_norm": 1.1938024759292603, - "learning_rate": 7.459491475229514e-06, - "loss": 0.0413, - "step": 74105 - }, - { - "epoch": 1.8812031983754283, - "grad_norm": 0.403897225856781, - "learning_rate": 7.458645344163812e-06, - "loss": 0.0315, - "step": 74110 - }, - { - "epoch": 1.8813301180352837, - "grad_norm": 0.42535677552223206, - "learning_rate": 7.457799213098109e-06, - "loss": 0.067, - "step": 74115 - }, - { - "epoch": 1.881457037695139, - "grad_norm": 0.40718138217926025, - "learning_rate": 7.456953082032407e-06, - "loss": 0.0532, - "step": 74120 - }, - { - "epoch": 1.8815839573549944, - "grad_norm": 0.41928422451019287, - "learning_rate": 7.456106950966705e-06, - "loss": 0.0484, - "step": 74125 - }, - { - "epoch": 1.8817108770148496, - "grad_norm": 0.32495737075805664, - "learning_rate": 7.455260819901003e-06, - "loss": 0.0352, - "step": 74130 - }, - { - "epoch": 1.8818377966747049, - "grad_norm": 0.44181257486343384, - "learning_rate": 7.454414688835301e-06, - "loss": 0.0395, - "step": 74135 - }, - { - "epoch": 1.88196471633456, - "grad_norm": 0.46676406264305115, - "learning_rate": 7.453568557769599e-06, - "loss": 0.0429, - "step": 74140 - }, - { - "epoch": 1.8820916359944155, - "grad_norm": 0.4480361342430115, - "learning_rate": 7.452722426703897e-06, - "loss": 0.0388, - "step": 74145 - }, - { - "epoch": 1.882218555654271, - "grad_norm": 0.4532800018787384, - "learning_rate": 7.451876295638195e-06, - "loss": 0.037, - "step": 74150 - }, - { - "epoch": 1.8823454753141262, - "grad_norm": 0.2113012820482254, - "learning_rate": 7.451030164572493e-06, - "loss": 0.031, - "step": 74155 - }, - { - "epoch": 1.8824723949739814, - "grad_norm": 0.5053550004959106, - "learning_rate": 7.450184033506791e-06, - "loss": 0.0335, - "step": 74160 - }, - { - "epoch": 1.8825993146338367, - "grad_norm": 0.4111632704734802, - "learning_rate": 7.449337902441089e-06, - "loss": 0.0421, - "step": 74165 - }, - { - "epoch": 1.8827262342936921, - "grad_norm": 0.6703060269355774, - "learning_rate": 7.448491771375387e-06, - "loss": 0.0369, - "step": 74170 - }, - { - "epoch": 1.8828531539535474, - "grad_norm": 0.7132113575935364, - "learning_rate": 7.447645640309685e-06, - "loss": 0.0405, - "step": 74175 - }, - { - "epoch": 1.8829800736134028, - "grad_norm": 0.40352198481559753, - "learning_rate": 7.446799509243983e-06, - "loss": 0.0464, - "step": 74180 - }, - { - "epoch": 1.883106993273258, - "grad_norm": 0.363723486661911, - "learning_rate": 7.44595337817828e-06, - "loss": 0.0295, - "step": 74185 - }, - { - "epoch": 1.8832339129331133, - "grad_norm": 0.5913069844245911, - "learning_rate": 7.445107247112578e-06, - "loss": 0.0386, - "step": 74190 - }, - { - "epoch": 1.8833608325929685, - "grad_norm": 0.5578503608703613, - "learning_rate": 7.444261116046876e-06, - "loss": 0.0485, - "step": 74195 - }, - { - "epoch": 1.883487752252824, - "grad_norm": 0.25614243745803833, - "learning_rate": 7.443414984981174e-06, - "loss": 0.0377, - "step": 74200 - }, - { - "epoch": 1.8836146719126794, - "grad_norm": 0.5022077560424805, - "learning_rate": 7.442568853915472e-06, - "loss": 0.0373, - "step": 74205 - }, - { - "epoch": 1.8837415915725346, - "grad_norm": 0.39866676926612854, - "learning_rate": 7.44172272284977e-06, - "loss": 0.0411, - "step": 74210 - }, - { - "epoch": 1.8838685112323899, - "grad_norm": 0.33455052971839905, - "learning_rate": 7.4408765917840676e-06, - "loss": 0.0466, - "step": 74215 - }, - { - "epoch": 1.883995430892245, - "grad_norm": 1.2645626068115234, - "learning_rate": 7.440030460718366e-06, - "loss": 0.0463, - "step": 74220 - }, - { - "epoch": 1.8841223505521005, - "grad_norm": 0.7792898416519165, - "learning_rate": 7.4391843296526635e-06, - "loss": 0.0524, - "step": 74225 - }, - { - "epoch": 1.884249270211956, - "grad_norm": 0.49314090609550476, - "learning_rate": 7.438338198586962e-06, - "loss": 0.0454, - "step": 74230 - }, - { - "epoch": 1.8843761898718112, - "grad_norm": 0.41846007108688354, - "learning_rate": 7.4374920675212595e-06, - "loss": 0.0366, - "step": 74235 - }, - { - "epoch": 1.8845031095316664, - "grad_norm": 0.2875153422355652, - "learning_rate": 7.436645936455558e-06, - "loss": 0.0217, - "step": 74240 - }, - { - "epoch": 1.8846300291915217, - "grad_norm": 0.39208611845970154, - "learning_rate": 7.435799805389855e-06, - "loss": 0.0507, - "step": 74245 - }, - { - "epoch": 1.8847569488513771, - "grad_norm": 0.2931087613105774, - "learning_rate": 7.434953674324154e-06, - "loss": 0.029, - "step": 74250 - }, - { - "epoch": 1.8848838685112324, - "grad_norm": 0.37661659717559814, - "learning_rate": 7.4341075432584506e-06, - "loss": 0.044, - "step": 74255 - }, - { - "epoch": 1.8850107881710878, - "grad_norm": 0.3307968080043793, - "learning_rate": 7.433261412192749e-06, - "loss": 0.0266, - "step": 74260 - }, - { - "epoch": 1.885137707830943, - "grad_norm": 0.9301825761795044, - "learning_rate": 7.4324152811270465e-06, - "loss": 0.0265, - "step": 74265 - }, - { - "epoch": 1.8852646274907983, - "grad_norm": 0.4084140956401825, - "learning_rate": 7.431569150061345e-06, - "loss": 0.0339, - "step": 74270 - }, - { - "epoch": 1.8853915471506535, - "grad_norm": 0.47794309258461, - "learning_rate": 7.4307230189956425e-06, - "loss": 0.0542, - "step": 74275 - }, - { - "epoch": 1.885518466810509, - "grad_norm": 1.0083372592926025, - "learning_rate": 7.429876887929941e-06, - "loss": 0.0165, - "step": 74280 - }, - { - "epoch": 1.8856453864703644, - "grad_norm": 0.29742395877838135, - "learning_rate": 7.4290307568642384e-06, - "loss": 0.047, - "step": 74285 - }, - { - "epoch": 1.8857723061302196, - "grad_norm": 1.0126953125, - "learning_rate": 7.428184625798537e-06, - "loss": 0.0563, - "step": 74290 - }, - { - "epoch": 1.8858992257900749, - "grad_norm": 0.3398003876209259, - "learning_rate": 7.427338494732834e-06, - "loss": 0.0355, - "step": 74295 - }, - { - "epoch": 1.88602614544993, - "grad_norm": 0.3660661578178406, - "learning_rate": 7.426492363667133e-06, - "loss": 0.0305, - "step": 74300 - }, - { - "epoch": 1.8861530651097855, - "grad_norm": 0.8695236444473267, - "learning_rate": 7.42564623260143e-06, - "loss": 0.0357, - "step": 74305 - }, - { - "epoch": 1.886279984769641, - "grad_norm": 0.4697916805744171, - "learning_rate": 7.424800101535729e-06, - "loss": 0.0558, - "step": 74310 - }, - { - "epoch": 1.8864069044294962, - "grad_norm": 0.4932514727115631, - "learning_rate": 7.4239539704700254e-06, - "loss": 0.0441, - "step": 74315 - }, - { - "epoch": 1.8865338240893514, - "grad_norm": 0.9902518391609192, - "learning_rate": 7.423107839404325e-06, - "loss": 0.0354, - "step": 74320 - }, - { - "epoch": 1.8866607437492067, - "grad_norm": 0.6439658403396606, - "learning_rate": 7.422261708338623e-06, - "loss": 0.0307, - "step": 74325 - }, - { - "epoch": 1.886787663409062, - "grad_norm": 0.6227523684501648, - "learning_rate": 7.42141557727292e-06, - "loss": 0.0376, - "step": 74330 - }, - { - "epoch": 1.8869145830689174, - "grad_norm": 0.46035879850387573, - "learning_rate": 7.420569446207219e-06, - "loss": 0.0418, - "step": 74335 - }, - { - "epoch": 1.8870415027287728, - "grad_norm": 0.3966492712497711, - "learning_rate": 7.419723315141516e-06, - "loss": 0.0433, - "step": 74340 - }, - { - "epoch": 1.887168422388628, - "grad_norm": 1.0908516645431519, - "learning_rate": 7.418877184075814e-06, - "loss": 0.0441, - "step": 74345 - }, - { - "epoch": 1.8872953420484833, - "grad_norm": 0.32982927560806274, - "learning_rate": 7.418031053010112e-06, - "loss": 0.036, - "step": 74350 - }, - { - "epoch": 1.8874222617083385, - "grad_norm": 0.4928825795650482, - "learning_rate": 7.41718492194441e-06, - "loss": 0.032, - "step": 74355 - }, - { - "epoch": 1.887549181368194, - "grad_norm": 0.4273185729980469, - "learning_rate": 7.416338790878708e-06, - "loss": 0.03, - "step": 74360 - }, - { - "epoch": 1.8876761010280494, - "grad_norm": 0.9619681239128113, - "learning_rate": 7.415492659813006e-06, - "loss": 0.0306, - "step": 74365 - }, - { - "epoch": 1.8878030206879046, - "grad_norm": 0.6463741660118103, - "learning_rate": 7.414646528747304e-06, - "loss": 0.0257, - "step": 74370 - }, - { - "epoch": 1.8879299403477598, - "grad_norm": 0.7205610871315002, - "learning_rate": 7.413800397681602e-06, - "loss": 0.0246, - "step": 74375 - }, - { - "epoch": 1.888056860007615, - "grad_norm": 0.6478941440582275, - "learning_rate": 7.4129542666159e-06, - "loss": 0.0343, - "step": 74380 - }, - { - "epoch": 1.8881837796674705, - "grad_norm": 0.7212875485420227, - "learning_rate": 7.412108135550198e-06, - "loss": 0.0366, - "step": 74385 - }, - { - "epoch": 1.8883106993273258, - "grad_norm": 0.3956863284111023, - "learning_rate": 7.4112620044844955e-06, - "loss": 0.0283, - "step": 74390 - }, - { - "epoch": 1.8884376189871812, - "grad_norm": 0.4684865176677704, - "learning_rate": 7.410415873418794e-06, - "loss": 0.0378, - "step": 74395 - }, - { - "epoch": 1.8885645386470364, - "grad_norm": 0.5382887125015259, - "learning_rate": 7.409569742353091e-06, - "loss": 0.0308, - "step": 74400 - }, - { - "epoch": 1.8886914583068917, - "grad_norm": 0.49789464473724365, - "learning_rate": 7.408723611287389e-06, - "loss": 0.0304, - "step": 74405 - }, - { - "epoch": 1.888818377966747, - "grad_norm": 0.4649914503097534, - "learning_rate": 7.407877480221687e-06, - "loss": 0.0249, - "step": 74410 - }, - { - "epoch": 1.8889452976266023, - "grad_norm": 0.29560235142707825, - "learning_rate": 7.407031349155985e-06, - "loss": 0.0325, - "step": 74415 - }, - { - "epoch": 1.8890722172864578, - "grad_norm": 0.4870833456516266, - "learning_rate": 7.4061852180902826e-06, - "loss": 0.0311, - "step": 74420 - }, - { - "epoch": 1.889199136946313, - "grad_norm": 0.4964491128921509, - "learning_rate": 7.405339087024581e-06, - "loss": 0.0445, - "step": 74425 - }, - { - "epoch": 1.8893260566061683, - "grad_norm": 0.4180253744125366, - "learning_rate": 7.4044929559588785e-06, - "loss": 0.0473, - "step": 74430 - }, - { - "epoch": 1.8894529762660235, - "grad_norm": 0.43399110436439514, - "learning_rate": 7.403646824893177e-06, - "loss": 0.0281, - "step": 74435 - }, - { - "epoch": 1.889579895925879, - "grad_norm": 0.4201628267765045, - "learning_rate": 7.4028006938274745e-06, - "loss": 0.046, - "step": 74440 - }, - { - "epoch": 1.8897068155857344, - "grad_norm": 2.455545425415039, - "learning_rate": 7.401954562761773e-06, - "loss": 0.0478, - "step": 74445 - }, - { - "epoch": 1.8898337352455896, - "grad_norm": 0.33793601393699646, - "learning_rate": 7.4011084316960704e-06, - "loss": 0.0439, - "step": 74450 - }, - { - "epoch": 1.8899606549054448, - "grad_norm": 0.342317134141922, - "learning_rate": 7.400262300630369e-06, - "loss": 0.0305, - "step": 74455 - }, - { - "epoch": 1.8900875745653, - "grad_norm": 0.38534966111183167, - "learning_rate": 7.399416169564666e-06, - "loss": 0.0363, - "step": 74460 - }, - { - "epoch": 1.8902144942251555, - "grad_norm": 0.5270501375198364, - "learning_rate": 7.398570038498965e-06, - "loss": 0.039, - "step": 74465 - }, - { - "epoch": 1.8903414138850108, - "grad_norm": 0.37435296177864075, - "learning_rate": 7.3977239074332615e-06, - "loss": 0.039, - "step": 74470 - }, - { - "epoch": 1.8904683335448662, - "grad_norm": 0.28252559900283813, - "learning_rate": 7.39687777636756e-06, - "loss": 0.029, - "step": 74475 - }, - { - "epoch": 1.8905952532047214, - "grad_norm": 0.6560195684432983, - "learning_rate": 7.3960316453018575e-06, - "loss": 0.0368, - "step": 74480 - }, - { - "epoch": 1.8907221728645767, - "grad_norm": 0.2117239385843277, - "learning_rate": 7.395185514236156e-06, - "loss": 0.0273, - "step": 74485 - }, - { - "epoch": 1.890849092524432, - "grad_norm": 0.6052525043487549, - "learning_rate": 7.3943393831704534e-06, - "loss": 0.048, - "step": 74490 - }, - { - "epoch": 1.8909760121842873, - "grad_norm": 0.632473349571228, - "learning_rate": 7.393493252104752e-06, - "loss": 0.029, - "step": 74495 - }, - { - "epoch": 1.8911029318441428, - "grad_norm": 0.4673151671886444, - "learning_rate": 7.392647121039049e-06, - "loss": 0.0441, - "step": 74500 - }, - { - "epoch": 1.891229851503998, - "grad_norm": 0.5579447746276855, - "learning_rate": 7.391800989973348e-06, - "loss": 0.0402, - "step": 74505 - }, - { - "epoch": 1.8913567711638533, - "grad_norm": 0.6453192234039307, - "learning_rate": 7.390954858907645e-06, - "loss": 0.0336, - "step": 74510 - }, - { - "epoch": 1.8914836908237085, - "grad_norm": 0.28358668088912964, - "learning_rate": 7.390108727841944e-06, - "loss": 0.0366, - "step": 74515 - }, - { - "epoch": 1.891610610483564, - "grad_norm": 0.4131391644477844, - "learning_rate": 7.389262596776241e-06, - "loss": 0.0409, - "step": 74520 - }, - { - "epoch": 1.8917375301434192, - "grad_norm": 0.4993913173675537, - "learning_rate": 7.38841646571054e-06, - "loss": 0.0345, - "step": 74525 - }, - { - "epoch": 1.8918644498032746, - "grad_norm": 0.336528480052948, - "learning_rate": 7.387570334644837e-06, - "loss": 0.0325, - "step": 74530 - }, - { - "epoch": 1.8919913694631298, - "grad_norm": 1.0023270845413208, - "learning_rate": 7.386724203579136e-06, - "loss": 0.0588, - "step": 74535 - }, - { - "epoch": 1.892118289122985, - "grad_norm": 0.3479638397693634, - "learning_rate": 7.385878072513432e-06, - "loss": 0.0207, - "step": 74540 - }, - { - "epoch": 1.8922452087828403, - "grad_norm": 0.3435896039009094, - "learning_rate": 7.385031941447731e-06, - "loss": 0.0481, - "step": 74545 - }, - { - "epoch": 1.8923721284426958, - "grad_norm": 0.6650906205177307, - "learning_rate": 7.384185810382028e-06, - "loss": 0.0357, - "step": 74550 - }, - { - "epoch": 1.8924990481025512, - "grad_norm": 0.5611099004745483, - "learning_rate": 7.383339679316327e-06, - "loss": 0.0378, - "step": 74555 - }, - { - "epoch": 1.8926259677624064, - "grad_norm": 0.5545644760131836, - "learning_rate": 7.382493548250624e-06, - "loss": 0.0384, - "step": 74560 - }, - { - "epoch": 1.8927528874222617, - "grad_norm": 0.9831686615943909, - "learning_rate": 7.381647417184923e-06, - "loss": 0.0485, - "step": 74565 - }, - { - "epoch": 1.892879807082117, - "grad_norm": 0.8318731784820557, - "learning_rate": 7.38080128611922e-06, - "loss": 0.0387, - "step": 74570 - }, - { - "epoch": 1.8930067267419723, - "grad_norm": 0.7025355100631714, - "learning_rate": 7.379955155053519e-06, - "loss": 0.0557, - "step": 74575 - }, - { - "epoch": 1.8931336464018278, - "grad_norm": 0.6269879341125488, - "learning_rate": 7.379109023987816e-06, - "loss": 0.0538, - "step": 74580 - }, - { - "epoch": 1.893260566061683, - "grad_norm": 0.7266960144042969, - "learning_rate": 7.378262892922115e-06, - "loss": 0.0458, - "step": 74585 - }, - { - "epoch": 1.8933874857215383, - "grad_norm": 0.3982461988925934, - "learning_rate": 7.377416761856412e-06, - "loss": 0.0247, - "step": 74590 - }, - { - "epoch": 1.8935144053813935, - "grad_norm": 0.31227344274520874, - "learning_rate": 7.3765706307907105e-06, - "loss": 0.0319, - "step": 74595 - }, - { - "epoch": 1.893641325041249, - "grad_norm": 0.686843752861023, - "learning_rate": 7.375724499725008e-06, - "loss": 0.047, - "step": 74600 - }, - { - "epoch": 1.8937682447011042, - "grad_norm": 0.4988148808479309, - "learning_rate": 7.3748783686593065e-06, - "loss": 0.0466, - "step": 74605 - }, - { - "epoch": 1.8938951643609596, - "grad_norm": 0.6062554717063904, - "learning_rate": 7.374032237593603e-06, - "loss": 0.0486, - "step": 74610 - }, - { - "epoch": 1.8940220840208148, - "grad_norm": 0.26493901014328003, - "learning_rate": 7.373186106527902e-06, - "loss": 0.0227, - "step": 74615 - }, - { - "epoch": 1.89414900368067, - "grad_norm": 0.3827832341194153, - "learning_rate": 7.372339975462199e-06, - "loss": 0.0311, - "step": 74620 - }, - { - "epoch": 1.8942759233405253, - "grad_norm": 0.2917088270187378, - "learning_rate": 7.3714938443964976e-06, - "loss": 0.0443, - "step": 74625 - }, - { - "epoch": 1.8944028430003808, - "grad_norm": 0.9283243417739868, - "learning_rate": 7.370647713330795e-06, - "loss": 0.0381, - "step": 74630 - }, - { - "epoch": 1.8945297626602362, - "grad_norm": 0.22756899893283844, - "learning_rate": 7.3698015822650935e-06, - "loss": 0.0223, - "step": 74635 - }, - { - "epoch": 1.8946566823200914, - "grad_norm": 0.6112502217292786, - "learning_rate": 7.368955451199391e-06, - "loss": 0.0357, - "step": 74640 - }, - { - "epoch": 1.8947836019799467, - "grad_norm": 0.5036784410476685, - "learning_rate": 7.3681093201336895e-06, - "loss": 0.0393, - "step": 74645 - }, - { - "epoch": 1.894910521639802, - "grad_norm": 0.39115655422210693, - "learning_rate": 7.367263189067987e-06, - "loss": 0.0294, - "step": 74650 - }, - { - "epoch": 1.8950374412996573, - "grad_norm": 0.3934013545513153, - "learning_rate": 7.3664170580022854e-06, - "loss": 0.0339, - "step": 74655 - }, - { - "epoch": 1.8951643609595128, - "grad_norm": 0.4786794185638428, - "learning_rate": 7.365570926936583e-06, - "loss": 0.033, - "step": 74660 - }, - { - "epoch": 1.895291280619368, - "grad_norm": 0.3971647620201111, - "learning_rate": 7.364724795870881e-06, - "loss": 0.0324, - "step": 74665 - }, - { - "epoch": 1.8954182002792233, - "grad_norm": 0.4902183413505554, - "learning_rate": 7.363878664805179e-06, - "loss": 0.0296, - "step": 74670 - }, - { - "epoch": 1.8955451199390785, - "grad_norm": 0.28146839141845703, - "learning_rate": 7.363032533739477e-06, - "loss": 0.0402, - "step": 74675 - }, - { - "epoch": 1.8956720395989337, - "grad_norm": 0.503254771232605, - "learning_rate": 7.362186402673774e-06, - "loss": 0.0374, - "step": 74680 - }, - { - "epoch": 1.8957989592587892, - "grad_norm": 0.36245816946029663, - "learning_rate": 7.3613402716080725e-06, - "loss": 0.023, - "step": 74685 - }, - { - "epoch": 1.8959258789186446, - "grad_norm": 0.43627825379371643, - "learning_rate": 7.36049414054237e-06, - "loss": 0.0347, - "step": 74690 - }, - { - "epoch": 1.8960527985784998, - "grad_norm": 0.675991952419281, - "learning_rate": 7.3596480094766684e-06, - "loss": 0.0308, - "step": 74695 - }, - { - "epoch": 1.896179718238355, - "grad_norm": 0.5392748117446899, - "learning_rate": 7.358801878410966e-06, - "loss": 0.0563, - "step": 74700 - }, - { - "epoch": 1.8963066378982103, - "grad_norm": 0.4661058783531189, - "learning_rate": 7.357955747345264e-06, - "loss": 0.0464, - "step": 74705 - }, - { - "epoch": 1.8964335575580658, - "grad_norm": 0.3815213739871979, - "learning_rate": 7.357109616279562e-06, - "loss": 0.0414, - "step": 74710 - }, - { - "epoch": 1.8965604772179212, - "grad_norm": 0.717062771320343, - "learning_rate": 7.35626348521386e-06, - "loss": 0.0324, - "step": 74715 - }, - { - "epoch": 1.8966873968777764, - "grad_norm": 0.29764553904533386, - "learning_rate": 7.355417354148158e-06, - "loss": 0.0269, - "step": 74720 - }, - { - "epoch": 1.8968143165376317, - "grad_norm": 0.5430722236633301, - "learning_rate": 7.354571223082456e-06, - "loss": 0.0283, - "step": 74725 - }, - { - "epoch": 1.896941236197487, - "grad_norm": 0.6490036249160767, - "learning_rate": 7.353725092016754e-06, - "loss": 0.0266, - "step": 74730 - }, - { - "epoch": 1.8970681558573423, - "grad_norm": 0.4569244980812073, - "learning_rate": 7.352878960951052e-06, - "loss": 0.0315, - "step": 74735 - }, - { - "epoch": 1.8971950755171976, - "grad_norm": 0.7710807919502258, - "learning_rate": 7.35203282988535e-06, - "loss": 0.0358, - "step": 74740 - }, - { - "epoch": 1.897321995177053, - "grad_norm": 0.48566439747810364, - "learning_rate": 7.351186698819648e-06, - "loss": 0.0314, - "step": 74745 - }, - { - "epoch": 1.8974489148369083, - "grad_norm": 0.41843387484550476, - "learning_rate": 7.350340567753945e-06, - "loss": 0.0445, - "step": 74750 - }, - { - "epoch": 1.8975758344967635, - "grad_norm": 0.4450206756591797, - "learning_rate": 7.349494436688243e-06, - "loss": 0.0504, - "step": 74755 - }, - { - "epoch": 1.8977027541566187, - "grad_norm": 0.4294438362121582, - "learning_rate": 7.348648305622541e-06, - "loss": 0.0522, - "step": 74760 - }, - { - "epoch": 1.8978296738164742, - "grad_norm": 0.22543270885944366, - "learning_rate": 7.347802174556839e-06, - "loss": 0.0212, - "step": 74765 - }, - { - "epoch": 1.8979565934763296, - "grad_norm": 0.9558671116828918, - "learning_rate": 7.346956043491137e-06, - "loss": 0.0615, - "step": 74770 - }, - { - "epoch": 1.8980835131361848, - "grad_norm": 0.373965859413147, - "learning_rate": 7.346109912425435e-06, - "loss": 0.0326, - "step": 74775 - }, - { - "epoch": 1.89821043279604, - "grad_norm": 0.18710000813007355, - "learning_rate": 7.345263781359733e-06, - "loss": 0.0486, - "step": 74780 - }, - { - "epoch": 1.8983373524558953, - "grad_norm": 0.26737019419670105, - "learning_rate": 7.344417650294031e-06, - "loss": 0.0387, - "step": 74785 - }, - { - "epoch": 1.8984642721157508, - "grad_norm": 0.4500572383403778, - "learning_rate": 7.343571519228329e-06, - "loss": 0.0357, - "step": 74790 - }, - { - "epoch": 1.8985911917756062, - "grad_norm": 0.4530860483646393, - "learning_rate": 7.342725388162627e-06, - "loss": 0.0326, - "step": 74795 - }, - { - "epoch": 1.8987181114354614, - "grad_norm": 0.34689322113990784, - "learning_rate": 7.341879257096925e-06, - "loss": 0.0326, - "step": 74800 - }, - { - "epoch": 1.8988450310953167, - "grad_norm": 0.2868269681930542, - "learning_rate": 7.341033126031223e-06, - "loss": 0.0324, - "step": 74805 - }, - { - "epoch": 1.898971950755172, - "grad_norm": 0.5179691910743713, - "learning_rate": 7.34018699496552e-06, - "loss": 0.0323, - "step": 74810 - }, - { - "epoch": 1.8990988704150273, - "grad_norm": 0.3264458179473877, - "learning_rate": 7.339340863899819e-06, - "loss": 0.0252, - "step": 74815 - }, - { - "epoch": 1.8992257900748826, - "grad_norm": 0.549074113368988, - "learning_rate": 7.338494732834116e-06, - "loss": 0.0256, - "step": 74820 - }, - { - "epoch": 1.899352709734738, - "grad_norm": 0.3724745213985443, - "learning_rate": 7.337648601768414e-06, - "loss": 0.0431, - "step": 74825 - }, - { - "epoch": 1.8994796293945932, - "grad_norm": 0.3224852979183197, - "learning_rate": 7.336802470702712e-06, - "loss": 0.0263, - "step": 74830 - }, - { - "epoch": 1.8996065490544485, - "grad_norm": 0.439812570810318, - "learning_rate": 7.33595633963701e-06, - "loss": 0.0381, - "step": 74835 - }, - { - "epoch": 1.8997334687143037, - "grad_norm": 0.6273593306541443, - "learning_rate": 7.3351102085713085e-06, - "loss": 0.048, - "step": 74840 - }, - { - "epoch": 1.8998603883741592, - "grad_norm": 0.3520565629005432, - "learning_rate": 7.334264077505606e-06, - "loss": 0.0323, - "step": 74845 - }, - { - "epoch": 1.8999873080340146, - "grad_norm": 0.7893599271774292, - "learning_rate": 7.3334179464399045e-06, - "loss": 0.0418, - "step": 74850 - }, - { - "epoch": 1.9001142276938698, - "grad_norm": 0.22668854892253876, - "learning_rate": 7.332571815374202e-06, - "loss": 0.0431, - "step": 74855 - }, - { - "epoch": 1.900241147353725, - "grad_norm": 0.46295198798179626, - "learning_rate": 7.3317256843085004e-06, - "loss": 0.0364, - "step": 74860 - }, - { - "epoch": 1.9003680670135803, - "grad_norm": 0.5118817687034607, - "learning_rate": 7.330879553242798e-06, - "loss": 0.0425, - "step": 74865 - }, - { - "epoch": 1.9004949866734357, - "grad_norm": 0.46457433700561523, - "learning_rate": 7.330033422177096e-06, - "loss": 0.0569, - "step": 74870 - }, - { - "epoch": 1.900621906333291, - "grad_norm": 0.38324806094169617, - "learning_rate": 7.329187291111394e-06, - "loss": 0.0274, - "step": 74875 - }, - { - "epoch": 1.9007488259931464, - "grad_norm": 0.2611892819404602, - "learning_rate": 7.328341160045692e-06, - "loss": 0.0222, - "step": 74880 - }, - { - "epoch": 1.9008757456530017, - "grad_norm": 0.40895333886146545, - "learning_rate": 7.32749502897999e-06, - "loss": 0.0396, - "step": 74885 - }, - { - "epoch": 1.9010026653128569, - "grad_norm": 0.6677075624465942, - "learning_rate": 7.326648897914288e-06, - "loss": 0.0473, - "step": 74890 - }, - { - "epoch": 1.9011295849727121, - "grad_norm": 0.34610751271247864, - "learning_rate": 7.325802766848585e-06, - "loss": 0.0359, - "step": 74895 - }, - { - "epoch": 1.9012565046325676, - "grad_norm": 0.18231095373630524, - "learning_rate": 7.324956635782884e-06, - "loss": 0.0243, - "step": 74900 - }, - { - "epoch": 1.901383424292423, - "grad_norm": 0.6044496893882751, - "learning_rate": 7.324110504717181e-06, - "loss": 0.0324, - "step": 74905 - }, - { - "epoch": 1.9015103439522782, - "grad_norm": 0.8724872469902039, - "learning_rate": 7.323264373651479e-06, - "loss": 0.0393, - "step": 74910 - }, - { - "epoch": 1.9016372636121335, - "grad_norm": 0.555270254611969, - "learning_rate": 7.322418242585777e-06, - "loss": 0.0409, - "step": 74915 - }, - { - "epoch": 1.9017641832719887, - "grad_norm": 0.2762957215309143, - "learning_rate": 7.321572111520075e-06, - "loss": 0.0348, - "step": 74920 - }, - { - "epoch": 1.9018911029318442, - "grad_norm": 0.32964828610420227, - "learning_rate": 7.320725980454373e-06, - "loss": 0.046, - "step": 74925 - }, - { - "epoch": 1.9020180225916996, - "grad_norm": 0.6631227731704712, - "learning_rate": 7.319879849388671e-06, - "loss": 0.0178, - "step": 74930 - }, - { - "epoch": 1.9021449422515548, - "grad_norm": 0.2526497542858124, - "learning_rate": 7.319033718322969e-06, - "loss": 0.0441, - "step": 74935 - }, - { - "epoch": 1.90227186191141, - "grad_norm": 0.37786605954170227, - "learning_rate": 7.318187587257267e-06, - "loss": 0.0433, - "step": 74940 - }, - { - "epoch": 1.9023987815712653, - "grad_norm": 0.2095467746257782, - "learning_rate": 7.317341456191565e-06, - "loss": 0.0278, - "step": 74945 - }, - { - "epoch": 1.9025257012311207, - "grad_norm": 0.440453439950943, - "learning_rate": 7.316495325125863e-06, - "loss": 0.0424, - "step": 74950 - }, - { - "epoch": 1.902652620890976, - "grad_norm": 1.0864287614822388, - "learning_rate": 7.315649194060161e-06, - "loss": 0.025, - "step": 74955 - }, - { - "epoch": 1.9027795405508314, - "grad_norm": 0.35610347986221313, - "learning_rate": 7.314803062994459e-06, - "loss": 0.0235, - "step": 74960 - }, - { - "epoch": 1.9029064602106867, - "grad_norm": 2.6997954845428467, - "learning_rate": 7.313956931928756e-06, - "loss": 0.0407, - "step": 74965 - }, - { - "epoch": 1.9030333798705419, - "grad_norm": 0.35224229097366333, - "learning_rate": 7.313110800863054e-06, - "loss": 0.037, - "step": 74970 - }, - { - "epoch": 1.9031602995303971, - "grad_norm": 0.3657575845718384, - "learning_rate": 7.312264669797352e-06, - "loss": 0.0348, - "step": 74975 - }, - { - "epoch": 1.9032872191902526, - "grad_norm": 0.6717686057090759, - "learning_rate": 7.31141853873165e-06, - "loss": 0.0368, - "step": 74980 - }, - { - "epoch": 1.903414138850108, - "grad_norm": 0.5160046815872192, - "learning_rate": 7.310572407665948e-06, - "loss": 0.0482, - "step": 74985 - }, - { - "epoch": 1.9035410585099632, - "grad_norm": 0.49232470989227295, - "learning_rate": 7.309726276600246e-06, - "loss": 0.0322, - "step": 74990 - }, - { - "epoch": 1.9036679781698185, - "grad_norm": 0.46649542450904846, - "learning_rate": 7.308880145534544e-06, - "loss": 0.0359, - "step": 74995 - }, - { - "epoch": 1.9037948978296737, - "grad_norm": 0.2439371943473816, - "learning_rate": 7.308034014468842e-06, - "loss": 0.0231, - "step": 75000 - }, - { - "epoch": 1.9039218174895292, - "grad_norm": 0.3960670232772827, - "learning_rate": 7.30718788340314e-06, - "loss": 0.0306, - "step": 75005 - }, - { - "epoch": 1.9040487371493844, - "grad_norm": 0.35567787289619446, - "learning_rate": 7.306341752337438e-06, - "loss": 0.0281, - "step": 75010 - }, - { - "epoch": 1.9041756568092398, - "grad_norm": 0.3124677836894989, - "learning_rate": 7.305495621271736e-06, - "loss": 0.0359, - "step": 75015 - }, - { - "epoch": 1.904302576469095, - "grad_norm": 0.3538469672203064, - "learning_rate": 7.304649490206034e-06, - "loss": 0.037, - "step": 75020 - }, - { - "epoch": 1.9044294961289503, - "grad_norm": 0.40282711386680603, - "learning_rate": 7.303803359140332e-06, - "loss": 0.027, - "step": 75025 - }, - { - "epoch": 1.9045564157888055, - "grad_norm": 0.3147781789302826, - "learning_rate": 7.30295722807463e-06, - "loss": 0.0274, - "step": 75030 - }, - { - "epoch": 1.904683335448661, - "grad_norm": 0.42606469988822937, - "learning_rate": 7.302111097008927e-06, - "loss": 0.047, - "step": 75035 - }, - { - "epoch": 1.9048102551085164, - "grad_norm": 0.3895498812198639, - "learning_rate": 7.301264965943225e-06, - "loss": 0.0431, - "step": 75040 - }, - { - "epoch": 1.9049371747683717, - "grad_norm": 0.35287201404571533, - "learning_rate": 7.300418834877523e-06, - "loss": 0.0299, - "step": 75045 - }, - { - "epoch": 1.9050640944282269, - "grad_norm": 0.3943061828613281, - "learning_rate": 7.299572703811821e-06, - "loss": 0.0451, - "step": 75050 - }, - { - "epoch": 1.9051910140880821, - "grad_norm": 0.45964595675468445, - "learning_rate": 7.298726572746119e-06, - "loss": 0.021, - "step": 75055 - }, - { - "epoch": 1.9053179337479376, - "grad_norm": 0.4374353885650635, - "learning_rate": 7.297880441680417e-06, - "loss": 0.0257, - "step": 75060 - }, - { - "epoch": 1.905444853407793, - "grad_norm": 1.4295505285263062, - "learning_rate": 7.297034310614715e-06, - "loss": 0.0365, - "step": 75065 - }, - { - "epoch": 1.9055717730676482, - "grad_norm": 0.5034865736961365, - "learning_rate": 7.296188179549013e-06, - "loss": 0.0248, - "step": 75070 - }, - { - "epoch": 1.9056986927275035, - "grad_norm": 0.4700987935066223, - "learning_rate": 7.2953420484833106e-06, - "loss": 0.0329, - "step": 75075 - }, - { - "epoch": 1.9058256123873587, - "grad_norm": 0.4676573574542999, - "learning_rate": 7.294495917417609e-06, - "loss": 0.0453, - "step": 75080 - }, - { - "epoch": 1.9059525320472142, - "grad_norm": 0.4866630434989929, - "learning_rate": 7.2936497863519065e-06, - "loss": 0.0282, - "step": 75085 - }, - { - "epoch": 1.9060794517070694, - "grad_norm": 0.9796786904335022, - "learning_rate": 7.292803655286205e-06, - "loss": 0.0365, - "step": 75090 - }, - { - "epoch": 1.9062063713669248, - "grad_norm": 0.7564641833305359, - "learning_rate": 7.2919575242205025e-06, - "loss": 0.0489, - "step": 75095 - }, - { - "epoch": 1.90633329102678, - "grad_norm": 0.3983996510505676, - "learning_rate": 7.291111393154801e-06, - "loss": 0.0614, - "step": 75100 - }, - { - "epoch": 1.9064602106866353, - "grad_norm": 0.3819156289100647, - "learning_rate": 7.290265262089098e-06, - "loss": 0.0474, - "step": 75105 - }, - { - "epoch": 1.9065871303464905, - "grad_norm": 0.7060027718544006, - "learning_rate": 7.289419131023396e-06, - "loss": 0.0327, - "step": 75110 - }, - { - "epoch": 1.906714050006346, - "grad_norm": 0.6067022085189819, - "learning_rate": 7.2885729999576935e-06, - "loss": 0.0297, - "step": 75115 - }, - { - "epoch": 1.9068409696662014, - "grad_norm": 0.33942389488220215, - "learning_rate": 7.287726868891992e-06, - "loss": 0.0378, - "step": 75120 - }, - { - "epoch": 1.9069678893260567, - "grad_norm": 0.2869786024093628, - "learning_rate": 7.2868807378262895e-06, - "loss": 0.0527, - "step": 75125 - }, - { - "epoch": 1.9070948089859119, - "grad_norm": 0.5094878077507019, - "learning_rate": 7.286034606760588e-06, - "loss": 0.0295, - "step": 75130 - }, - { - "epoch": 1.9072217286457671, - "grad_norm": 0.3348986506462097, - "learning_rate": 7.2851884756948855e-06, - "loss": 0.0322, - "step": 75135 - }, - { - "epoch": 1.9073486483056226, - "grad_norm": 0.41808241605758667, - "learning_rate": 7.284342344629184e-06, - "loss": 0.0252, - "step": 75140 - }, - { - "epoch": 1.907475567965478, - "grad_norm": 0.405127614736557, - "learning_rate": 7.283496213563481e-06, - "loss": 0.0237, - "step": 75145 - }, - { - "epoch": 1.9076024876253332, - "grad_norm": 0.23401573300361633, - "learning_rate": 7.28265008249778e-06, - "loss": 0.031, - "step": 75150 - }, - { - "epoch": 1.9077294072851885, - "grad_norm": 0.3461570143699646, - "learning_rate": 7.281803951432077e-06, - "loss": 0.0327, - "step": 75155 - }, - { - "epoch": 1.9078563269450437, - "grad_norm": 0.3516415059566498, - "learning_rate": 7.280957820366376e-06, - "loss": 0.0375, - "step": 75160 - }, - { - "epoch": 1.9079832466048992, - "grad_norm": 0.6220629811286926, - "learning_rate": 7.280111689300673e-06, - "loss": 0.0373, - "step": 75165 - }, - { - "epoch": 1.9081101662647544, - "grad_norm": 1.1153067350387573, - "learning_rate": 7.279265558234972e-06, - "loss": 0.0343, - "step": 75170 - }, - { - "epoch": 1.9082370859246098, - "grad_norm": 0.529201090335846, - "learning_rate": 7.2784194271692684e-06, - "loss": 0.02, - "step": 75175 - }, - { - "epoch": 1.908364005584465, - "grad_norm": 0.5109729766845703, - "learning_rate": 7.277573296103567e-06, - "loss": 0.0453, - "step": 75180 - }, - { - "epoch": 1.9084909252443203, - "grad_norm": 0.5937342643737793, - "learning_rate": 7.276727165037864e-06, - "loss": 0.0524, - "step": 75185 - }, - { - "epoch": 1.9086178449041755, - "grad_norm": 0.4672570824623108, - "learning_rate": 7.275881033972163e-06, - "loss": 0.0594, - "step": 75190 - }, - { - "epoch": 1.908744764564031, - "grad_norm": 1.8600044250488281, - "learning_rate": 7.27503490290646e-06, - "loss": 0.0263, - "step": 75195 - }, - { - "epoch": 1.9088716842238864, - "grad_norm": 0.5563987493515015, - "learning_rate": 7.274188771840759e-06, - "loss": 0.027, - "step": 75200 - }, - { - "epoch": 1.9089986038837417, - "grad_norm": 0.4645465612411499, - "learning_rate": 7.273342640775056e-06, - "loss": 0.0411, - "step": 75205 - }, - { - "epoch": 1.9091255235435969, - "grad_norm": 0.22413352131843567, - "learning_rate": 7.272496509709355e-06, - "loss": 0.0271, - "step": 75210 - }, - { - "epoch": 1.909252443203452, - "grad_norm": 0.27908438444137573, - "learning_rate": 7.271650378643652e-06, - "loss": 0.0414, - "step": 75215 - }, - { - "epoch": 1.9093793628633076, - "grad_norm": 0.5340016484260559, - "learning_rate": 7.270804247577951e-06, - "loss": 0.0438, - "step": 75220 - }, - { - "epoch": 1.9095062825231628, - "grad_norm": 0.29453498125076294, - "learning_rate": 7.269958116512248e-06, - "loss": 0.0403, - "step": 75225 - }, - { - "epoch": 1.9096332021830182, - "grad_norm": 0.5087630152702332, - "learning_rate": 7.269111985446547e-06, - "loss": 0.0399, - "step": 75230 - }, - { - "epoch": 1.9097601218428735, - "grad_norm": 0.8538188338279724, - "learning_rate": 7.268265854380844e-06, - "loss": 0.0409, - "step": 75235 - }, - { - "epoch": 1.9098870415027287, - "grad_norm": 0.47237280011177063, - "learning_rate": 7.2674197233151426e-06, - "loss": 0.0548, - "step": 75240 - }, - { - "epoch": 1.910013961162584, - "grad_norm": 0.2776098847389221, - "learning_rate": 7.266573592249439e-06, - "loss": 0.0156, - "step": 75245 - }, - { - "epoch": 1.9101408808224394, - "grad_norm": 0.5609169602394104, - "learning_rate": 7.265727461183738e-06, - "loss": 0.0383, - "step": 75250 - }, - { - "epoch": 1.9102678004822948, - "grad_norm": 0.593799889087677, - "learning_rate": 7.264881330118035e-06, - "loss": 0.0276, - "step": 75255 - }, - { - "epoch": 1.91039472014215, - "grad_norm": 0.45349767804145813, - "learning_rate": 7.264035199052334e-06, - "loss": 0.0205, - "step": 75260 - }, - { - "epoch": 1.9105216398020053, - "grad_norm": 0.5815842747688293, - "learning_rate": 7.263189067986631e-06, - "loss": 0.048, - "step": 75265 - }, - { - "epoch": 1.9106485594618605, - "grad_norm": 0.30930498242378235, - "learning_rate": 7.26234293692093e-06, - "loss": 0.0458, - "step": 75270 - }, - { - "epoch": 1.910775479121716, - "grad_norm": 0.32953473925590515, - "learning_rate": 7.261496805855227e-06, - "loss": 0.0372, - "step": 75275 - }, - { - "epoch": 1.9109023987815714, - "grad_norm": 0.863290548324585, - "learning_rate": 7.2606506747895256e-06, - "loss": 0.0272, - "step": 75280 - }, - { - "epoch": 1.9110293184414266, - "grad_norm": 0.41612404584884644, - "learning_rate": 7.259804543723823e-06, - "loss": 0.0362, - "step": 75285 - }, - { - "epoch": 1.9111562381012819, - "grad_norm": 0.40844255685806274, - "learning_rate": 7.2589584126581215e-06, - "loss": 0.0388, - "step": 75290 - }, - { - "epoch": 1.911283157761137, - "grad_norm": 0.4157906472682953, - "learning_rate": 7.258112281592419e-06, - "loss": 0.0482, - "step": 75295 - }, - { - "epoch": 1.9114100774209926, - "grad_norm": 0.42284661531448364, - "learning_rate": 7.2572661505267175e-06, - "loss": 0.0413, - "step": 75300 - }, - { - "epoch": 1.9115369970808478, - "grad_norm": 0.3366011381149292, - "learning_rate": 7.256420019461014e-06, - "loss": 0.0376, - "step": 75305 - }, - { - "epoch": 1.9116639167407032, - "grad_norm": 0.17225295305252075, - "learning_rate": 7.2555738883953134e-06, - "loss": 0.0284, - "step": 75310 - }, - { - "epoch": 1.9117908364005585, - "grad_norm": 0.434949666261673, - "learning_rate": 7.25472775732961e-06, - "loss": 0.0415, - "step": 75315 - }, - { - "epoch": 1.9119177560604137, - "grad_norm": 0.2645588219165802, - "learning_rate": 7.2538816262639085e-06, - "loss": 0.0366, - "step": 75320 - }, - { - "epoch": 1.912044675720269, - "grad_norm": 0.2835372984409332, - "learning_rate": 7.253035495198206e-06, - "loss": 0.0467, - "step": 75325 - }, - { - "epoch": 1.9121715953801244, - "grad_norm": 0.5597830414772034, - "learning_rate": 7.2521893641325045e-06, - "loss": 0.0473, - "step": 75330 - }, - { - "epoch": 1.9122985150399798, - "grad_norm": 0.47831544280052185, - "learning_rate": 7.251343233066802e-06, - "loss": 0.0267, - "step": 75335 - }, - { - "epoch": 1.912425434699835, - "grad_norm": 0.35611486434936523, - "learning_rate": 7.2504971020011005e-06, - "loss": 0.0321, - "step": 75340 - }, - { - "epoch": 1.9125523543596903, - "grad_norm": 0.9562399983406067, - "learning_rate": 7.249650970935398e-06, - "loss": 0.0316, - "step": 75345 - }, - { - "epoch": 1.9126792740195455, - "grad_norm": 0.6615468263626099, - "learning_rate": 7.248804839869696e-06, - "loss": 0.0358, - "step": 75350 - }, - { - "epoch": 1.912806193679401, - "grad_norm": 0.47063377499580383, - "learning_rate": 7.247958708803995e-06, - "loss": 0.0497, - "step": 75355 - }, - { - "epoch": 1.9129331133392562, - "grad_norm": 0.3987702429294586, - "learning_rate": 7.247112577738292e-06, - "loss": 0.0316, - "step": 75360 - }, - { - "epoch": 1.9130600329991116, - "grad_norm": 0.4296201765537262, - "learning_rate": 7.246266446672591e-06, - "loss": 0.0393, - "step": 75365 - }, - { - "epoch": 1.9131869526589669, - "grad_norm": 0.53169846534729, - "learning_rate": 7.245420315606888e-06, - "loss": 0.0401, - "step": 75370 - }, - { - "epoch": 1.913313872318822, - "grad_norm": 0.3952375054359436, - "learning_rate": 7.244574184541187e-06, - "loss": 0.0343, - "step": 75375 - }, - { - "epoch": 1.9134407919786773, - "grad_norm": 0.3717719614505768, - "learning_rate": 7.243728053475484e-06, - "loss": 0.0391, - "step": 75380 - }, - { - "epoch": 1.9135677116385328, - "grad_norm": 0.4329374134540558, - "learning_rate": 7.242881922409783e-06, - "loss": 0.0209, - "step": 75385 - }, - { - "epoch": 1.9136946312983882, - "grad_norm": 0.7852108478546143, - "learning_rate": 7.242035791344079e-06, - "loss": 0.0243, - "step": 75390 - }, - { - "epoch": 1.9138215509582435, - "grad_norm": 0.2865369915962219, - "learning_rate": 7.241189660278379e-06, - "loss": 0.0184, - "step": 75395 - }, - { - "epoch": 1.9139484706180987, - "grad_norm": 0.4647975564002991, - "learning_rate": 7.240343529212675e-06, - "loss": 0.0382, - "step": 75400 - }, - { - "epoch": 1.914075390277954, - "grad_norm": 0.20822404325008392, - "learning_rate": 7.239497398146974e-06, - "loss": 0.0267, - "step": 75405 - }, - { - "epoch": 1.9142023099378094, - "grad_norm": 0.4030109643936157, - "learning_rate": 7.238651267081271e-06, - "loss": 0.0502, - "step": 75410 - }, - { - "epoch": 1.9143292295976648, - "grad_norm": 0.5471249222755432, - "learning_rate": 7.23780513601557e-06, - "loss": 0.0578, - "step": 75415 - }, - { - "epoch": 1.91445614925752, - "grad_norm": 0.39193758368492126, - "learning_rate": 7.236959004949867e-06, - "loss": 0.0428, - "step": 75420 - }, - { - "epoch": 1.9145830689173753, - "grad_norm": 0.9236963987350464, - "learning_rate": 7.236112873884166e-06, - "loss": 0.0479, - "step": 75425 - }, - { - "epoch": 1.9147099885772305, - "grad_norm": 0.5191662907600403, - "learning_rate": 7.235266742818463e-06, - "loss": 0.039, - "step": 75430 - }, - { - "epoch": 1.914836908237086, - "grad_norm": 0.5054100155830383, - "learning_rate": 7.234420611752762e-06, - "loss": 0.0586, - "step": 75435 - }, - { - "epoch": 1.9149638278969412, - "grad_norm": 0.38729390501976013, - "learning_rate": 7.233574480687059e-06, - "loss": 0.0418, - "step": 75440 - }, - { - "epoch": 1.9150907475567966, - "grad_norm": 0.5488352179527283, - "learning_rate": 7.2327283496213576e-06, - "loss": 0.0446, - "step": 75445 - }, - { - "epoch": 1.9152176672166519, - "grad_norm": 0.8006948232650757, - "learning_rate": 7.231882218555655e-06, - "loss": 0.0432, - "step": 75450 - }, - { - "epoch": 1.915344586876507, - "grad_norm": 0.42533907294273376, - "learning_rate": 7.2310360874899535e-06, - "loss": 0.0272, - "step": 75455 - }, - { - "epoch": 1.9154715065363623, - "grad_norm": 0.26014646887779236, - "learning_rate": 7.23018995642425e-06, - "loss": 0.0343, - "step": 75460 - }, - { - "epoch": 1.9155984261962178, - "grad_norm": 0.3863328993320465, - "learning_rate": 7.229343825358549e-06, - "loss": 0.0486, - "step": 75465 - }, - { - "epoch": 1.9157253458560732, - "grad_norm": 0.2299901843070984, - "learning_rate": 7.228497694292846e-06, - "loss": 0.0307, - "step": 75470 - }, - { - "epoch": 1.9158522655159285, - "grad_norm": 0.43251535296440125, - "learning_rate": 7.227651563227145e-06, - "loss": 0.0333, - "step": 75475 - }, - { - "epoch": 1.9159791851757837, - "grad_norm": 0.4712091386318207, - "learning_rate": 7.226805432161442e-06, - "loss": 0.0276, - "step": 75480 - }, - { - "epoch": 1.916106104835639, - "grad_norm": 0.33542993664741516, - "learning_rate": 7.2259593010957406e-06, - "loss": 0.0432, - "step": 75485 - }, - { - "epoch": 1.9162330244954944, - "grad_norm": 0.419557124376297, - "learning_rate": 7.225113170030038e-06, - "loss": 0.0384, - "step": 75490 - }, - { - "epoch": 1.9163599441553498, - "grad_norm": 0.39326393604278564, - "learning_rate": 7.2242670389643365e-06, - "loss": 0.0606, - "step": 75495 - }, - { - "epoch": 1.916486863815205, - "grad_norm": 0.6219335794448853, - "learning_rate": 7.223420907898634e-06, - "loss": 0.0382, - "step": 75500 - }, - { - "epoch": 1.9166137834750603, - "grad_norm": 0.38938578963279724, - "learning_rate": 7.2225747768329325e-06, - "loss": 0.0451, - "step": 75505 - }, - { - "epoch": 1.9167407031349155, - "grad_norm": 0.321058452129364, - "learning_rate": 7.22172864576723e-06, - "loss": 0.0446, - "step": 75510 - }, - { - "epoch": 1.916867622794771, - "grad_norm": 0.359213262796402, - "learning_rate": 7.2208825147015284e-06, - "loss": 0.0441, - "step": 75515 - }, - { - "epoch": 1.9169945424546262, - "grad_norm": 0.43401533365249634, - "learning_rate": 7.220036383635826e-06, - "loss": 0.0341, - "step": 75520 - }, - { - "epoch": 1.9171214621144816, - "grad_norm": 0.3230583369731903, - "learning_rate": 7.219190252570124e-06, - "loss": 0.0228, - "step": 75525 - }, - { - "epoch": 1.9172483817743369, - "grad_norm": 0.6319587230682373, - "learning_rate": 7.218344121504421e-06, - "loss": 0.034, - "step": 75530 - }, - { - "epoch": 1.917375301434192, - "grad_norm": 0.5881219506263733, - "learning_rate": 7.2174979904387195e-06, - "loss": 0.0375, - "step": 75535 - }, - { - "epoch": 1.9175022210940473, - "grad_norm": 0.4445069134235382, - "learning_rate": 7.216651859373017e-06, - "loss": 0.0351, - "step": 75540 - }, - { - "epoch": 1.9176291407539028, - "grad_norm": 0.47016802430152893, - "learning_rate": 7.2158057283073155e-06, - "loss": 0.0477, - "step": 75545 - }, - { - "epoch": 1.9177560604137582, - "grad_norm": 0.4740341901779175, - "learning_rate": 7.214959597241613e-06, - "loss": 0.048, - "step": 75550 - }, - { - "epoch": 1.9178829800736135, - "grad_norm": 0.4821435213088989, - "learning_rate": 7.214113466175911e-06, - "loss": 0.0205, - "step": 75555 - }, - { - "epoch": 1.9180098997334687, - "grad_norm": 0.9047722220420837, - "learning_rate": 7.213267335110209e-06, - "loss": 0.0231, - "step": 75560 - }, - { - "epoch": 1.918136819393324, - "grad_norm": 0.42818063497543335, - "learning_rate": 7.212421204044507e-06, - "loss": 0.0408, - "step": 75565 - }, - { - "epoch": 1.9182637390531794, - "grad_norm": 0.4453182816505432, - "learning_rate": 7.211575072978805e-06, - "loss": 0.0342, - "step": 75570 - }, - { - "epoch": 1.9183906587130346, - "grad_norm": 0.40940919518470764, - "learning_rate": 7.210728941913103e-06, - "loss": 0.0357, - "step": 75575 - }, - { - "epoch": 1.91851757837289, - "grad_norm": 0.7417961955070496, - "learning_rate": 7.209882810847401e-06, - "loss": 0.0403, - "step": 75580 - }, - { - "epoch": 1.9186444980327453, - "grad_norm": 0.4213666021823883, - "learning_rate": 7.209036679781699e-06, - "loss": 0.0322, - "step": 75585 - }, - { - "epoch": 1.9187714176926005, - "grad_norm": 0.5581508874893188, - "learning_rate": 7.208190548715997e-06, - "loss": 0.034, - "step": 75590 - }, - { - "epoch": 1.9188983373524557, - "grad_norm": 0.45028096437454224, - "learning_rate": 7.207344417650295e-06, - "loss": 0.0562, - "step": 75595 - }, - { - "epoch": 1.9190252570123112, - "grad_norm": 0.4323491156101227, - "learning_rate": 7.206498286584592e-06, - "loss": 0.0557, - "step": 75600 - }, - { - "epoch": 1.9191521766721666, - "grad_norm": 0.3195600211620331, - "learning_rate": 7.20565215551889e-06, - "loss": 0.027, - "step": 75605 - }, - { - "epoch": 1.9192790963320219, - "grad_norm": 0.5432814955711365, - "learning_rate": 7.204806024453188e-06, - "loss": 0.0317, - "step": 75610 - }, - { - "epoch": 1.919406015991877, - "grad_norm": 0.36071863770484924, - "learning_rate": 7.203959893387486e-06, - "loss": 0.0337, - "step": 75615 - }, - { - "epoch": 1.9195329356517323, - "grad_norm": 0.31908470392227173, - "learning_rate": 7.203113762321784e-06, - "loss": 0.0221, - "step": 75620 - }, - { - "epoch": 1.9196598553115878, - "grad_norm": 0.6210837364196777, - "learning_rate": 7.202267631256082e-06, - "loss": 0.0404, - "step": 75625 - }, - { - "epoch": 1.9197867749714432, - "grad_norm": 0.3272773325443268, - "learning_rate": 7.20142150019038e-06, - "loss": 0.0395, - "step": 75630 - }, - { - "epoch": 1.9199136946312985, - "grad_norm": 0.5717149972915649, - "learning_rate": 7.200575369124678e-06, - "loss": 0.0544, - "step": 75635 - }, - { - "epoch": 1.9200406142911537, - "grad_norm": 0.28382644057273865, - "learning_rate": 7.199729238058976e-06, - "loss": 0.0286, - "step": 75640 - }, - { - "epoch": 1.920167533951009, - "grad_norm": 0.5595223903656006, - "learning_rate": 7.198883106993274e-06, - "loss": 0.0307, - "step": 75645 - }, - { - "epoch": 1.9202944536108644, - "grad_norm": 0.4653652310371399, - "learning_rate": 7.198036975927572e-06, - "loss": 0.0266, - "step": 75650 - }, - { - "epoch": 1.9204213732707196, - "grad_norm": 0.4778807759284973, - "learning_rate": 7.19719084486187e-06, - "loss": 0.0435, - "step": 75655 - }, - { - "epoch": 1.920548292930575, - "grad_norm": 0.4139435291290283, - "learning_rate": 7.196344713796168e-06, - "loss": 0.035, - "step": 75660 - }, - { - "epoch": 1.9206752125904303, - "grad_norm": 0.43981409072875977, - "learning_rate": 7.195498582730466e-06, - "loss": 0.025, - "step": 75665 - }, - { - "epoch": 1.9208021322502855, - "grad_norm": 0.3257589042186737, - "learning_rate": 7.194652451664763e-06, - "loss": 0.0325, - "step": 75670 - }, - { - "epoch": 1.9209290519101407, - "grad_norm": 0.39879798889160156, - "learning_rate": 7.193806320599061e-06, - "loss": 0.0344, - "step": 75675 - }, - { - "epoch": 1.9210559715699962, - "grad_norm": 0.5210800766944885, - "learning_rate": 7.192960189533359e-06, - "loss": 0.0316, - "step": 75680 - }, - { - "epoch": 1.9211828912298516, - "grad_norm": 0.3047713041305542, - "learning_rate": 7.192114058467657e-06, - "loss": 0.0321, - "step": 75685 - }, - { - "epoch": 1.9213098108897069, - "grad_norm": 0.43631091713905334, - "learning_rate": 7.191267927401955e-06, - "loss": 0.0317, - "step": 75690 - }, - { - "epoch": 1.921436730549562, - "grad_norm": 0.9756001830101013, - "learning_rate": 7.190421796336253e-06, - "loss": 0.0264, - "step": 75695 - }, - { - "epoch": 1.9215636502094173, - "grad_norm": 0.4301241636276245, - "learning_rate": 7.189575665270551e-06, - "loss": 0.0247, - "step": 75700 - }, - { - "epoch": 1.9216905698692728, - "grad_norm": 0.23192989826202393, - "learning_rate": 7.188729534204849e-06, - "loss": 0.0306, - "step": 75705 - }, - { - "epoch": 1.921817489529128, - "grad_norm": 1.1587947607040405, - "learning_rate": 7.187883403139147e-06, - "loss": 0.0579, - "step": 75710 - }, - { - "epoch": 1.9219444091889835, - "grad_norm": 0.4642960727214813, - "learning_rate": 7.187037272073445e-06, - "loss": 0.0239, - "step": 75715 - }, - { - "epoch": 1.9220713288488387, - "grad_norm": 0.5740816593170166, - "learning_rate": 7.186191141007743e-06, - "loss": 0.0482, - "step": 75720 - }, - { - "epoch": 1.922198248508694, - "grad_norm": 0.5302680134773254, - "learning_rate": 7.185345009942041e-06, - "loss": 0.0343, - "step": 75725 - }, - { - "epoch": 1.9223251681685491, - "grad_norm": 0.556111216545105, - "learning_rate": 7.1844988788763385e-06, - "loss": 0.0339, - "step": 75730 - }, - { - "epoch": 1.9224520878284046, - "grad_norm": 0.6342504620552063, - "learning_rate": 7.183652747810637e-06, - "loss": 0.05, - "step": 75735 - }, - { - "epoch": 1.92257900748826, - "grad_norm": 0.716523289680481, - "learning_rate": 7.182806616744934e-06, - "loss": 0.0409, - "step": 75740 - }, - { - "epoch": 1.9227059271481153, - "grad_norm": 0.4970579743385315, - "learning_rate": 7.181960485679232e-06, - "loss": 0.044, - "step": 75745 - }, - { - "epoch": 1.9228328468079705, - "grad_norm": 0.27059629559516907, - "learning_rate": 7.18111435461353e-06, - "loss": 0.0365, - "step": 75750 - }, - { - "epoch": 1.9229597664678257, - "grad_norm": 0.45361268520355225, - "learning_rate": 7.180268223547828e-06, - "loss": 0.0496, - "step": 75755 - }, - { - "epoch": 1.9230866861276812, - "grad_norm": 0.5908650755882263, - "learning_rate": 7.1794220924821256e-06, - "loss": 0.0394, - "step": 75760 - }, - { - "epoch": 1.9232136057875366, - "grad_norm": 0.4219732880592346, - "learning_rate": 7.178575961416424e-06, - "loss": 0.028, - "step": 75765 - }, - { - "epoch": 1.9233405254473919, - "grad_norm": 0.32874318957328796, - "learning_rate": 7.1777298303507215e-06, - "loss": 0.0366, - "step": 75770 - }, - { - "epoch": 1.923467445107247, - "grad_norm": 0.55035001039505, - "learning_rate": 7.17688369928502e-06, - "loss": 0.0401, - "step": 75775 - }, - { - "epoch": 1.9235943647671023, - "grad_norm": 0.5581026673316956, - "learning_rate": 7.1760375682193175e-06, - "loss": 0.0406, - "step": 75780 - }, - { - "epoch": 1.9237212844269578, - "grad_norm": 0.42979612946510315, - "learning_rate": 7.175191437153616e-06, - "loss": 0.0403, - "step": 75785 - }, - { - "epoch": 1.923848204086813, - "grad_norm": 0.35735997557640076, - "learning_rate": 7.1743453060879134e-06, - "loss": 0.0322, - "step": 75790 - }, - { - "epoch": 1.9239751237466685, - "grad_norm": 0.5496389865875244, - "learning_rate": 7.173499175022212e-06, - "loss": 0.0369, - "step": 75795 - }, - { - "epoch": 1.9241020434065237, - "grad_norm": 0.40475916862487793, - "learning_rate": 7.1726530439565085e-06, - "loss": 0.0509, - "step": 75800 - }, - { - "epoch": 1.924228963066379, - "grad_norm": 0.598719596862793, - "learning_rate": 7.171806912890808e-06, - "loss": 0.0385, - "step": 75805 - }, - { - "epoch": 1.9243558827262341, - "grad_norm": 0.5183451175689697, - "learning_rate": 7.1709607818251045e-06, - "loss": 0.0438, - "step": 75810 - }, - { - "epoch": 1.9244828023860896, - "grad_norm": 0.45705702900886536, - "learning_rate": 7.170114650759403e-06, - "loss": 0.0334, - "step": 75815 - }, - { - "epoch": 1.924609722045945, - "grad_norm": 0.4469811022281647, - "learning_rate": 7.1692685196937005e-06, - "loss": 0.0471, - "step": 75820 - }, - { - "epoch": 1.9247366417058003, - "grad_norm": 0.31337565183639526, - "learning_rate": 7.168422388627999e-06, - "loss": 0.0239, - "step": 75825 - }, - { - "epoch": 1.9248635613656555, - "grad_norm": 0.47421351075172424, - "learning_rate": 7.167576257562296e-06, - "loss": 0.0327, - "step": 75830 - }, - { - "epoch": 1.9249904810255107, - "grad_norm": 0.2837108373641968, - "learning_rate": 7.166730126496595e-06, - "loss": 0.0365, - "step": 75835 - }, - { - "epoch": 1.9251174006853662, - "grad_norm": 0.5110825896263123, - "learning_rate": 7.165883995430892e-06, - "loss": 0.0386, - "step": 75840 - }, - { - "epoch": 1.9252443203452216, - "grad_norm": 0.5507407188415527, - "learning_rate": 7.165037864365191e-06, - "loss": 0.0385, - "step": 75845 - }, - { - "epoch": 1.9253712400050769, - "grad_norm": 0.3873836398124695, - "learning_rate": 7.164191733299488e-06, - "loss": 0.0407, - "step": 75850 - }, - { - "epoch": 1.925498159664932, - "grad_norm": 0.5788750648498535, - "learning_rate": 7.163345602233787e-06, - "loss": 0.0312, - "step": 75855 - }, - { - "epoch": 1.9256250793247873, - "grad_norm": 0.4188635051250458, - "learning_rate": 7.162499471168084e-06, - "loss": 0.0366, - "step": 75860 - }, - { - "epoch": 1.9257519989846428, - "grad_norm": 0.5330942869186401, - "learning_rate": 7.161653340102383e-06, - "loss": 0.0537, - "step": 75865 - }, - { - "epoch": 1.925878918644498, - "grad_norm": 0.3621709644794464, - "learning_rate": 7.160807209036681e-06, - "loss": 0.0421, - "step": 75870 - }, - { - "epoch": 1.9260058383043535, - "grad_norm": 0.3498122990131378, - "learning_rate": 7.159961077970979e-06, - "loss": 0.0406, - "step": 75875 - }, - { - "epoch": 1.9261327579642087, - "grad_norm": 0.4365721344947815, - "learning_rate": 7.159114946905277e-06, - "loss": 0.0272, - "step": 75880 - }, - { - "epoch": 1.926259677624064, - "grad_norm": 0.5484693646430969, - "learning_rate": 7.158268815839574e-06, - "loss": 0.0529, - "step": 75885 - }, - { - "epoch": 1.9263865972839191, - "grad_norm": 0.6801542043685913, - "learning_rate": 7.157422684773873e-06, - "loss": 0.0437, - "step": 75890 - }, - { - "epoch": 1.9265135169437746, - "grad_norm": 0.6465049982070923, - "learning_rate": 7.15657655370817e-06, - "loss": 0.0275, - "step": 75895 - }, - { - "epoch": 1.92664043660363, - "grad_norm": 0.28661683201789856, - "learning_rate": 7.155730422642468e-06, - "loss": 0.0274, - "step": 75900 - }, - { - "epoch": 1.9267673562634853, - "grad_norm": 0.3927159607410431, - "learning_rate": 7.154884291576766e-06, - "loss": 0.042, - "step": 75905 - }, - { - "epoch": 1.9268942759233405, - "grad_norm": 0.4191802144050598, - "learning_rate": 7.154038160511064e-06, - "loss": 0.0449, - "step": 75910 - }, - { - "epoch": 1.9270211955831957, - "grad_norm": 0.32906389236450195, - "learning_rate": 7.153192029445362e-06, - "loss": 0.0377, - "step": 75915 - }, - { - "epoch": 1.9271481152430512, - "grad_norm": 0.5041166543960571, - "learning_rate": 7.15234589837966e-06, - "loss": 0.0414, - "step": 75920 - }, - { - "epoch": 1.9272750349029064, - "grad_norm": 0.504047691822052, - "learning_rate": 7.151499767313958e-06, - "loss": 0.0342, - "step": 75925 - }, - { - "epoch": 1.9274019545627619, - "grad_norm": 0.5813462734222412, - "learning_rate": 7.150653636248256e-06, - "loss": 0.0293, - "step": 75930 - }, - { - "epoch": 1.927528874222617, - "grad_norm": 0.6287632584571838, - "learning_rate": 7.1498075051825535e-06, - "loss": 0.0307, - "step": 75935 - }, - { - "epoch": 1.9276557938824723, - "grad_norm": 0.12771758437156677, - "learning_rate": 7.148961374116852e-06, - "loss": 0.0366, - "step": 75940 - }, - { - "epoch": 1.9277827135423276, - "grad_norm": 0.37826061248779297, - "learning_rate": 7.1481152430511495e-06, - "loss": 0.0282, - "step": 75945 - }, - { - "epoch": 1.927909633202183, - "grad_norm": 0.490369975566864, - "learning_rate": 7.147269111985448e-06, - "loss": 0.039, - "step": 75950 - }, - { - "epoch": 1.9280365528620385, - "grad_norm": 0.47782233357429504, - "learning_rate": 7.146422980919745e-06, - "loss": 0.0267, - "step": 75955 - }, - { - "epoch": 1.9281634725218937, - "grad_norm": 0.49769315123558044, - "learning_rate": 7.145576849854043e-06, - "loss": 0.0428, - "step": 75960 - }, - { - "epoch": 1.928290392181749, - "grad_norm": 0.23507779836654663, - "learning_rate": 7.1447307187883406e-06, - "loss": 0.033, - "step": 75965 - }, - { - "epoch": 1.9284173118416041, - "grad_norm": 0.4234825670719147, - "learning_rate": 7.143884587722639e-06, - "loss": 0.0344, - "step": 75970 - }, - { - "epoch": 1.9285442315014596, - "grad_norm": 0.2218647003173828, - "learning_rate": 7.1430384566569365e-06, - "loss": 0.0262, - "step": 75975 - }, - { - "epoch": 1.928671151161315, - "grad_norm": 0.3857664167881012, - "learning_rate": 7.142192325591235e-06, - "loss": 0.056, - "step": 75980 - }, - { - "epoch": 1.9287980708211703, - "grad_norm": 0.38719409704208374, - "learning_rate": 7.1413461945255325e-06, - "loss": 0.0332, - "step": 75985 - }, - { - "epoch": 1.9289249904810255, - "grad_norm": 0.4469052851200104, - "learning_rate": 7.140500063459831e-06, - "loss": 0.0204, - "step": 75990 - }, - { - "epoch": 1.9290519101408807, - "grad_norm": 0.36486920714378357, - "learning_rate": 7.1396539323941284e-06, - "loss": 0.024, - "step": 75995 - }, - { - "epoch": 1.9291788298007362, - "grad_norm": 0.48609447479248047, - "learning_rate": 7.138807801328427e-06, - "loss": 0.0366, - "step": 76000 - }, - { - "epoch": 1.9293057494605914, - "grad_norm": 0.5244938731193542, - "learning_rate": 7.137961670262724e-06, - "loss": 0.0313, - "step": 76005 - }, - { - "epoch": 1.9294326691204469, - "grad_norm": 0.4246518909931183, - "learning_rate": 7.137115539197023e-06, - "loss": 0.03, - "step": 76010 - }, - { - "epoch": 1.929559588780302, - "grad_norm": 0.5604687929153442, - "learning_rate": 7.13626940813132e-06, - "loss": 0.0335, - "step": 76015 - }, - { - "epoch": 1.9296865084401573, - "grad_norm": 0.3774871230125427, - "learning_rate": 7.135423277065619e-06, - "loss": 0.0208, - "step": 76020 - }, - { - "epoch": 1.9298134281000126, - "grad_norm": 0.3678208589553833, - "learning_rate": 7.1345771459999155e-06, - "loss": 0.0437, - "step": 76025 - }, - { - "epoch": 1.929940347759868, - "grad_norm": 0.28557851910591125, - "learning_rate": 7.133731014934214e-06, - "loss": 0.0296, - "step": 76030 - }, - { - "epoch": 1.9300672674197235, - "grad_norm": 0.46266910433769226, - "learning_rate": 7.132884883868511e-06, - "loss": 0.0278, - "step": 76035 - }, - { - "epoch": 1.9301941870795787, - "grad_norm": 0.6833282709121704, - "learning_rate": 7.13203875280281e-06, - "loss": 0.0418, - "step": 76040 - }, - { - "epoch": 1.930321106739434, - "grad_norm": 0.4469594359397888, - "learning_rate": 7.131192621737107e-06, - "loss": 0.0382, - "step": 76045 - }, - { - "epoch": 1.9304480263992891, - "grad_norm": 0.3545457422733307, - "learning_rate": 7.130346490671406e-06, - "loss": 0.0343, - "step": 76050 - }, - { - "epoch": 1.9305749460591446, - "grad_norm": 0.4654105305671692, - "learning_rate": 7.129500359605703e-06, - "loss": 0.0258, - "step": 76055 - }, - { - "epoch": 1.9307018657189998, - "grad_norm": 0.28722232580184937, - "learning_rate": 7.128654228540002e-06, - "loss": 0.0279, - "step": 76060 - }, - { - "epoch": 1.9308287853788553, - "grad_norm": 0.3402305245399475, - "learning_rate": 7.127808097474299e-06, - "loss": 0.0392, - "step": 76065 - }, - { - "epoch": 1.9309557050387105, - "grad_norm": 1.6457152366638184, - "learning_rate": 7.126961966408598e-06, - "loss": 0.0325, - "step": 76070 - }, - { - "epoch": 1.9310826246985657, - "grad_norm": 0.3457077145576477, - "learning_rate": 7.126115835342895e-06, - "loss": 0.0425, - "step": 76075 - }, - { - "epoch": 1.931209544358421, - "grad_norm": 0.21499846875667572, - "learning_rate": 7.125269704277194e-06, - "loss": 0.0356, - "step": 76080 - }, - { - "epoch": 1.9313364640182764, - "grad_norm": 0.48093101382255554, - "learning_rate": 7.124423573211491e-06, - "loss": 0.0226, - "step": 76085 - }, - { - "epoch": 1.9314633836781319, - "grad_norm": 0.6121925115585327, - "learning_rate": 7.12357744214579e-06, - "loss": 0.0344, - "step": 76090 - }, - { - "epoch": 1.931590303337987, - "grad_norm": 0.43517687916755676, - "learning_rate": 7.122731311080086e-06, - "loss": 0.0196, - "step": 76095 - }, - { - "epoch": 1.9317172229978423, - "grad_norm": 0.5077061653137207, - "learning_rate": 7.121885180014385e-06, - "loss": 0.0295, - "step": 76100 - }, - { - "epoch": 1.9318441426576975, - "grad_norm": 1.8057276010513306, - "learning_rate": 7.121039048948682e-06, - "loss": 0.0521, - "step": 76105 - }, - { - "epoch": 1.931971062317553, - "grad_norm": 0.40059366822242737, - "learning_rate": 7.120192917882981e-06, - "loss": 0.0395, - "step": 76110 - }, - { - "epoch": 1.9320979819774085, - "grad_norm": 0.5937473177909851, - "learning_rate": 7.119346786817278e-06, - "loss": 0.0377, - "step": 76115 - }, - { - "epoch": 1.9322249016372637, - "grad_norm": 0.3544454276561737, - "learning_rate": 7.118500655751577e-06, - "loss": 0.0452, - "step": 76120 - }, - { - "epoch": 1.932351821297119, - "grad_norm": 0.2757922112941742, - "learning_rate": 7.117654524685874e-06, - "loss": 0.0313, - "step": 76125 - }, - { - "epoch": 1.9324787409569741, - "grad_norm": 0.4326791763305664, - "learning_rate": 7.116808393620173e-06, - "loss": 0.0274, - "step": 76130 - }, - { - "epoch": 1.9326056606168296, - "grad_norm": 0.3109041750431061, - "learning_rate": 7.11596226255447e-06, - "loss": 0.0403, - "step": 76135 - }, - { - "epoch": 1.9327325802766848, - "grad_norm": 0.722010612487793, - "learning_rate": 7.1151161314887685e-06, - "loss": 0.0419, - "step": 76140 - }, - { - "epoch": 1.9328594999365403, - "grad_norm": 0.30984875559806824, - "learning_rate": 7.114270000423066e-06, - "loss": 0.0318, - "step": 76145 - }, - { - "epoch": 1.9329864195963955, - "grad_norm": 0.41902443766593933, - "learning_rate": 7.1134238693573645e-06, - "loss": 0.0258, - "step": 76150 - }, - { - "epoch": 1.9331133392562507, - "grad_norm": 0.6133266091346741, - "learning_rate": 7.112577738291662e-06, - "loss": 0.0376, - "step": 76155 - }, - { - "epoch": 1.933240258916106, - "grad_norm": 0.30004894733428955, - "learning_rate": 7.1117316072259605e-06, - "loss": 0.0504, - "step": 76160 - }, - { - "epoch": 1.9333671785759614, - "grad_norm": 0.3665798008441925, - "learning_rate": 7.110885476160257e-06, - "loss": 0.0279, - "step": 76165 - }, - { - "epoch": 1.9334940982358169, - "grad_norm": 0.519153892993927, - "learning_rate": 7.1100393450945556e-06, - "loss": 0.0279, - "step": 76170 - }, - { - "epoch": 1.933621017895672, - "grad_norm": 0.5616127252578735, - "learning_rate": 7.109193214028853e-06, - "loss": 0.0234, - "step": 76175 - }, - { - "epoch": 1.9337479375555273, - "grad_norm": 0.5150366425514221, - "learning_rate": 7.1083470829631515e-06, - "loss": 0.0271, - "step": 76180 - }, - { - "epoch": 1.9338748572153825, - "grad_norm": 0.6496875882148743, - "learning_rate": 7.107500951897449e-06, - "loss": 0.0417, - "step": 76185 - }, - { - "epoch": 1.934001776875238, - "grad_norm": 0.44037237763404846, - "learning_rate": 7.1066548208317475e-06, - "loss": 0.0321, - "step": 76190 - }, - { - "epoch": 1.9341286965350935, - "grad_norm": 0.5211234092712402, - "learning_rate": 7.105808689766045e-06, - "loss": 0.0421, - "step": 76195 - }, - { - "epoch": 1.9342556161949487, - "grad_norm": 0.32373496890068054, - "learning_rate": 7.1049625587003434e-06, - "loss": 0.036, - "step": 76200 - }, - { - "epoch": 1.934382535854804, - "grad_norm": 0.5014294385910034, - "learning_rate": 7.104116427634641e-06, - "loss": 0.0485, - "step": 76205 - }, - { - "epoch": 1.9345094555146591, - "grad_norm": 0.7195017337799072, - "learning_rate": 7.103270296568939e-06, - "loss": 0.0462, - "step": 76210 - }, - { - "epoch": 1.9346363751745144, - "grad_norm": 0.5866661667823792, - "learning_rate": 7.102424165503237e-06, - "loss": 0.0366, - "step": 76215 - }, - { - "epoch": 1.9347632948343698, - "grad_norm": 0.3421565294265747, - "learning_rate": 7.101578034437535e-06, - "loss": 0.0287, - "step": 76220 - }, - { - "epoch": 1.9348902144942253, - "grad_norm": 0.47282856702804565, - "learning_rate": 7.100731903371833e-06, - "loss": 0.0447, - "step": 76225 - }, - { - "epoch": 1.9350171341540805, - "grad_norm": 0.5156549215316772, - "learning_rate": 7.099885772306131e-06, - "loss": 0.0346, - "step": 76230 - }, - { - "epoch": 1.9351440538139357, - "grad_norm": 0.5033981800079346, - "learning_rate": 7.099039641240428e-06, - "loss": 0.03, - "step": 76235 - }, - { - "epoch": 1.935270973473791, - "grad_norm": 0.44116395711898804, - "learning_rate": 7.098193510174726e-06, - "loss": 0.0388, - "step": 76240 - }, - { - "epoch": 1.9353978931336464, - "grad_norm": 0.28346630930900574, - "learning_rate": 7.097347379109024e-06, - "loss": 0.035, - "step": 76245 - }, - { - "epoch": 1.9355248127935019, - "grad_norm": 0.4160240888595581, - "learning_rate": 7.096501248043322e-06, - "loss": 0.0454, - "step": 76250 - }, - { - "epoch": 1.935651732453357, - "grad_norm": 0.5933586955070496, - "learning_rate": 7.09565511697762e-06, - "loss": 0.0385, - "step": 76255 - }, - { - "epoch": 1.9357786521132123, - "grad_norm": 0.7181417942047119, - "learning_rate": 7.094808985911918e-06, - "loss": 0.0445, - "step": 76260 - }, - { - "epoch": 1.9359055717730675, - "grad_norm": 0.4105479121208191, - "learning_rate": 7.093962854846216e-06, - "loss": 0.0353, - "step": 76265 - }, - { - "epoch": 1.936032491432923, - "grad_norm": 0.6463963389396667, - "learning_rate": 7.093116723780514e-06, - "loss": 0.023, - "step": 76270 - }, - { - "epoch": 1.9361594110927782, - "grad_norm": 0.41625189781188965, - "learning_rate": 7.092270592714812e-06, - "loss": 0.0452, - "step": 76275 - }, - { - "epoch": 1.9362863307526337, - "grad_norm": 0.26056963205337524, - "learning_rate": 7.09142446164911e-06, - "loss": 0.0415, - "step": 76280 - }, - { - "epoch": 1.936413250412489, - "grad_norm": 0.6791638135910034, - "learning_rate": 7.090578330583408e-06, - "loss": 0.0398, - "step": 76285 - }, - { - "epoch": 1.9365401700723441, - "grad_norm": 0.35644057393074036, - "learning_rate": 7.089732199517706e-06, - "loss": 0.0479, - "step": 76290 - }, - { - "epoch": 1.9366670897321994, - "grad_norm": 0.5690775513648987, - "learning_rate": 7.088886068452004e-06, - "loss": 0.0379, - "step": 76295 - }, - { - "epoch": 1.9367940093920548, - "grad_norm": 0.5211341977119446, - "learning_rate": 7.088039937386302e-06, - "loss": 0.0487, - "step": 76300 - }, - { - "epoch": 1.9369209290519103, - "grad_norm": 0.8043155074119568, - "learning_rate": 7.087193806320599e-06, - "loss": 0.0366, - "step": 76305 - }, - { - "epoch": 1.9370478487117655, - "grad_norm": 0.5393942594528198, - "learning_rate": 7.086347675254897e-06, - "loss": 0.0312, - "step": 76310 - }, - { - "epoch": 1.9371747683716207, - "grad_norm": 0.37667909264564514, - "learning_rate": 7.085501544189195e-06, - "loss": 0.0466, - "step": 76315 - }, - { - "epoch": 1.937301688031476, - "grad_norm": 0.26116710901260376, - "learning_rate": 7.084655413123493e-06, - "loss": 0.0401, - "step": 76320 - }, - { - "epoch": 1.9374286076913314, - "grad_norm": 0.3941524028778076, - "learning_rate": 7.083809282057791e-06, - "loss": 0.0319, - "step": 76325 - }, - { - "epoch": 1.9375555273511869, - "grad_norm": 0.5203989148139954, - "learning_rate": 7.082963150992089e-06, - "loss": 0.0544, - "step": 76330 - }, - { - "epoch": 1.937682447011042, - "grad_norm": 0.3035343885421753, - "learning_rate": 7.082117019926387e-06, - "loss": 0.055, - "step": 76335 - }, - { - "epoch": 1.9378093666708973, - "grad_norm": 0.5659580826759338, - "learning_rate": 7.081270888860685e-06, - "loss": 0.0309, - "step": 76340 - }, - { - "epoch": 1.9379362863307525, - "grad_norm": 0.46452441811561584, - "learning_rate": 7.080424757794983e-06, - "loss": 0.0202, - "step": 76345 - }, - { - "epoch": 1.938063205990608, - "grad_norm": 0.6195676326751709, - "learning_rate": 7.079578626729281e-06, - "loss": 0.0265, - "step": 76350 - }, - { - "epoch": 1.9381901256504632, - "grad_norm": 0.49253004789352417, - "learning_rate": 7.078732495663579e-06, - "loss": 0.0345, - "step": 76355 - }, - { - "epoch": 1.9383170453103187, - "grad_norm": 0.24763882160186768, - "learning_rate": 7.077886364597877e-06, - "loss": 0.037, - "step": 76360 - }, - { - "epoch": 1.938443964970174, - "grad_norm": 0.7562581896781921, - "learning_rate": 7.077040233532174e-06, - "loss": 0.0332, - "step": 76365 - }, - { - "epoch": 1.9385708846300291, - "grad_norm": 0.25674426555633545, - "learning_rate": 7.076194102466473e-06, - "loss": 0.0339, - "step": 76370 - }, - { - "epoch": 1.9386978042898844, - "grad_norm": 0.2978982627391815, - "learning_rate": 7.07534797140077e-06, - "loss": 0.0386, - "step": 76375 - }, - { - "epoch": 1.9388247239497398, - "grad_norm": 0.5034118294715881, - "learning_rate": 7.074501840335068e-06, - "loss": 0.0386, - "step": 76380 - }, - { - "epoch": 1.9389516436095953, - "grad_norm": 0.8073704838752747, - "learning_rate": 7.073655709269367e-06, - "loss": 0.0332, - "step": 76385 - }, - { - "epoch": 1.9390785632694505, - "grad_norm": 0.35473546385765076, - "learning_rate": 7.072809578203664e-06, - "loss": 0.0249, - "step": 76390 - }, - { - "epoch": 1.9392054829293057, - "grad_norm": 0.4315463602542877, - "learning_rate": 7.0719634471379625e-06, - "loss": 0.0414, - "step": 76395 - }, - { - "epoch": 1.939332402589161, - "grad_norm": 0.4654196798801422, - "learning_rate": 7.07111731607226e-06, - "loss": 0.0403, - "step": 76400 - }, - { - "epoch": 1.9394593222490164, - "grad_norm": 0.32635796070098877, - "learning_rate": 7.0702711850065584e-06, - "loss": 0.0321, - "step": 76405 - }, - { - "epoch": 1.9395862419088716, - "grad_norm": 0.5171048641204834, - "learning_rate": 7.069425053940856e-06, - "loss": 0.029, - "step": 76410 - }, - { - "epoch": 1.939713161568727, - "grad_norm": 0.6194607019424438, - "learning_rate": 7.068578922875154e-06, - "loss": 0.0428, - "step": 76415 - }, - { - "epoch": 1.9398400812285823, - "grad_norm": 0.2626444697380066, - "learning_rate": 7.067732791809452e-06, - "loss": 0.0352, - "step": 76420 - }, - { - "epoch": 1.9399670008884375, - "grad_norm": 0.17465917766094208, - "learning_rate": 7.06688666074375e-06, - "loss": 0.0316, - "step": 76425 - }, - { - "epoch": 1.9400939205482928, - "grad_norm": 1.1290727853775024, - "learning_rate": 7.066040529678048e-06, - "loss": 0.0394, - "step": 76430 - }, - { - "epoch": 1.9402208402081482, - "grad_norm": 0.4971480667591095, - "learning_rate": 7.065194398612346e-06, - "loss": 0.0328, - "step": 76435 - }, - { - "epoch": 1.9403477598680037, - "grad_norm": 0.37379148602485657, - "learning_rate": 7.064348267546644e-06, - "loss": 0.0305, - "step": 76440 - }, - { - "epoch": 1.940474679527859, - "grad_norm": 0.23614241182804108, - "learning_rate": 7.063502136480942e-06, - "loss": 0.035, - "step": 76445 - }, - { - "epoch": 1.9406015991877141, - "grad_norm": 0.3476111590862274, - "learning_rate": 7.062656005415239e-06, - "loss": 0.0413, - "step": 76450 - }, - { - "epoch": 1.9407285188475694, - "grad_norm": 0.4918750822544098, - "learning_rate": 7.061809874349538e-06, - "loss": 0.0491, - "step": 76455 - }, - { - "epoch": 1.9408554385074248, - "grad_norm": 0.5243987441062927, - "learning_rate": 7.060963743283835e-06, - "loss": 0.019, - "step": 76460 - }, - { - "epoch": 1.9409823581672803, - "grad_norm": 0.5134018659591675, - "learning_rate": 7.060117612218133e-06, - "loss": 0.0355, - "step": 76465 - }, - { - "epoch": 1.9411092778271355, - "grad_norm": 0.38532575964927673, - "learning_rate": 7.059271481152431e-06, - "loss": 0.0324, - "step": 76470 - }, - { - "epoch": 1.9412361974869907, - "grad_norm": 0.28648775815963745, - "learning_rate": 7.058425350086729e-06, - "loss": 0.0335, - "step": 76475 - }, - { - "epoch": 1.941363117146846, - "grad_norm": 0.40612733364105225, - "learning_rate": 7.057579219021027e-06, - "loss": 0.038, - "step": 76480 - }, - { - "epoch": 1.9414900368067014, - "grad_norm": 0.6986648440361023, - "learning_rate": 7.056733087955325e-06, - "loss": 0.0449, - "step": 76485 - }, - { - "epoch": 1.9416169564665566, - "grad_norm": 0.6758559942245483, - "learning_rate": 7.055886956889623e-06, - "loss": 0.0276, - "step": 76490 - }, - { - "epoch": 1.941743876126412, - "grad_norm": 0.5901528596878052, - "learning_rate": 7.055040825823921e-06, - "loss": 0.0275, - "step": 76495 - }, - { - "epoch": 1.9418707957862673, - "grad_norm": 0.23658862709999084, - "learning_rate": 7.054194694758219e-06, - "loss": 0.0231, - "step": 76500 - }, - { - "epoch": 1.9419977154461225, - "grad_norm": 0.571203887462616, - "learning_rate": 7.053348563692517e-06, - "loss": 0.0246, - "step": 76505 - }, - { - "epoch": 1.9421246351059778, - "grad_norm": 0.5041667819023132, - "learning_rate": 7.052502432626815e-06, - "loss": 0.0393, - "step": 76510 - }, - { - "epoch": 1.9422515547658332, - "grad_norm": 0.5616449117660522, - "learning_rate": 7.051656301561113e-06, - "loss": 0.0428, - "step": 76515 - }, - { - "epoch": 1.9423784744256887, - "grad_norm": 0.8617438077926636, - "learning_rate": 7.05081017049541e-06, - "loss": 0.0466, - "step": 76520 - }, - { - "epoch": 1.942505394085544, - "grad_norm": 0.2925800681114197, - "learning_rate": 7.049964039429708e-06, - "loss": 0.0289, - "step": 76525 - }, - { - "epoch": 1.9426323137453991, - "grad_norm": 0.1983722746372223, - "learning_rate": 7.049117908364006e-06, - "loss": 0.038, - "step": 76530 - }, - { - "epoch": 1.9427592334052544, - "grad_norm": 0.5769413113594055, - "learning_rate": 7.048271777298304e-06, - "loss": 0.0433, - "step": 76535 - }, - { - "epoch": 1.9428861530651098, - "grad_norm": 0.19565634429454803, - "learning_rate": 7.047425646232602e-06, - "loss": 0.032, - "step": 76540 - }, - { - "epoch": 1.9430130727249653, - "grad_norm": 0.40866976976394653, - "learning_rate": 7.0465795151669e-06, - "loss": 0.0377, - "step": 76545 - }, - { - "epoch": 1.9431399923848205, - "grad_norm": 0.19998197257518768, - "learning_rate": 7.045733384101198e-06, - "loss": 0.0177, - "step": 76550 - }, - { - "epoch": 1.9432669120446757, - "grad_norm": 0.4610229730606079, - "learning_rate": 7.044887253035496e-06, - "loss": 0.0398, - "step": 76555 - }, - { - "epoch": 1.943393831704531, - "grad_norm": 0.5808234214782715, - "learning_rate": 7.044041121969794e-06, - "loss": 0.0519, - "step": 76560 - }, - { - "epoch": 1.9435207513643862, - "grad_norm": 0.26421213150024414, - "learning_rate": 7.043194990904092e-06, - "loss": 0.0246, - "step": 76565 - }, - { - "epoch": 1.9436476710242416, - "grad_norm": 0.37938588857650757, - "learning_rate": 7.04234885983839e-06, - "loss": 0.0213, - "step": 76570 - }, - { - "epoch": 1.943774590684097, - "grad_norm": 0.6170992255210876, - "learning_rate": 7.041502728772688e-06, - "loss": 0.0392, - "step": 76575 - }, - { - "epoch": 1.9439015103439523, - "grad_norm": 0.2558404207229614, - "learning_rate": 7.0406565977069856e-06, - "loss": 0.0319, - "step": 76580 - }, - { - "epoch": 1.9440284300038075, - "grad_norm": 0.4346565902233124, - "learning_rate": 7.039810466641284e-06, - "loss": 0.0451, - "step": 76585 - }, - { - "epoch": 1.9441553496636628, - "grad_norm": 0.31885990500450134, - "learning_rate": 7.038964335575581e-06, - "loss": 0.0372, - "step": 76590 - }, - { - "epoch": 1.9442822693235182, - "grad_norm": 0.3930191397666931, - "learning_rate": 7.038118204509879e-06, - "loss": 0.0354, - "step": 76595 - }, - { - "epoch": 1.9444091889833737, - "grad_norm": 0.430752694606781, - "learning_rate": 7.037272073444177e-06, - "loss": 0.0327, - "step": 76600 - }, - { - "epoch": 1.944536108643229, - "grad_norm": 0.33366283774375916, - "learning_rate": 7.036425942378475e-06, - "loss": 0.0341, - "step": 76605 - }, - { - "epoch": 1.9446630283030841, - "grad_norm": 0.47918394207954407, - "learning_rate": 7.035579811312773e-06, - "loss": 0.0475, - "step": 76610 - }, - { - "epoch": 1.9447899479629394, - "grad_norm": 0.6646750569343567, - "learning_rate": 7.034733680247071e-06, - "loss": 0.0591, - "step": 76615 - }, - { - "epoch": 1.9449168676227948, - "grad_norm": 0.3877485394477844, - "learning_rate": 7.0338875491813685e-06, - "loss": 0.038, - "step": 76620 - }, - { - "epoch": 1.94504378728265, - "grad_norm": 0.3318592607975006, - "learning_rate": 7.033041418115667e-06, - "loss": 0.052, - "step": 76625 - }, - { - "epoch": 1.9451707069425055, - "grad_norm": 0.6236529350280762, - "learning_rate": 7.0321952870499645e-06, - "loss": 0.0342, - "step": 76630 - }, - { - "epoch": 1.9452976266023607, - "grad_norm": 0.28444012999534607, - "learning_rate": 7.031349155984263e-06, - "loss": 0.0247, - "step": 76635 - }, - { - "epoch": 1.945424546262216, - "grad_norm": 0.559786856174469, - "learning_rate": 7.0305030249185605e-06, - "loss": 0.0396, - "step": 76640 - }, - { - "epoch": 1.9455514659220712, - "grad_norm": 0.2638790011405945, - "learning_rate": 7.029656893852859e-06, - "loss": 0.046, - "step": 76645 - }, - { - "epoch": 1.9456783855819266, - "grad_norm": 0.48012346029281616, - "learning_rate": 7.028810762787156e-06, - "loss": 0.0362, - "step": 76650 - }, - { - "epoch": 1.945805305241782, - "grad_norm": 0.6275193095207214, - "learning_rate": 7.027964631721455e-06, - "loss": 0.0238, - "step": 76655 - }, - { - "epoch": 1.9459322249016373, - "grad_norm": 0.6238193511962891, - "learning_rate": 7.0271185006557515e-06, - "loss": 0.0349, - "step": 76660 - }, - { - "epoch": 1.9460591445614925, - "grad_norm": 0.4011465907096863, - "learning_rate": 7.02627236959005e-06, - "loss": 0.0586, - "step": 76665 - }, - { - "epoch": 1.9461860642213478, - "grad_norm": 0.3258163630962372, - "learning_rate": 7.0254262385243475e-06, - "loss": 0.037, - "step": 76670 - }, - { - "epoch": 1.9463129838812032, - "grad_norm": 0.46073371171951294, - "learning_rate": 7.024580107458646e-06, - "loss": 0.0432, - "step": 76675 - }, - { - "epoch": 1.9464399035410587, - "grad_norm": 0.17141635715961456, - "learning_rate": 7.0237339763929434e-06, - "loss": 0.0269, - "step": 76680 - }, - { - "epoch": 1.946566823200914, - "grad_norm": 0.48326626420021057, - "learning_rate": 7.022887845327242e-06, - "loss": 0.038, - "step": 76685 - }, - { - "epoch": 1.9466937428607691, - "grad_norm": 0.40521806478500366, - "learning_rate": 7.022041714261539e-06, - "loss": 0.0488, - "step": 76690 - }, - { - "epoch": 1.9468206625206244, - "grad_norm": 0.45545467734336853, - "learning_rate": 7.021195583195838e-06, - "loss": 0.0347, - "step": 76695 - }, - { - "epoch": 1.9469475821804798, - "grad_norm": 0.5342467427253723, - "learning_rate": 7.020349452130135e-06, - "loss": 0.0377, - "step": 76700 - }, - { - "epoch": 1.947074501840335, - "grad_norm": 0.3173241913318634, - "learning_rate": 7.019503321064434e-06, - "loss": 0.0368, - "step": 76705 - }, - { - "epoch": 1.9472014215001905, - "grad_norm": 0.42605090141296387, - "learning_rate": 7.018657189998731e-06, - "loss": 0.0213, - "step": 76710 - }, - { - "epoch": 1.9473283411600457, - "grad_norm": 0.4713876247406006, - "learning_rate": 7.01781105893303e-06, - "loss": 0.025, - "step": 76715 - }, - { - "epoch": 1.947455260819901, - "grad_norm": 0.4525589942932129, - "learning_rate": 7.016964927867327e-06, - "loss": 0.0245, - "step": 76720 - }, - { - "epoch": 1.9475821804797562, - "grad_norm": 0.47588619589805603, - "learning_rate": 7.016118796801626e-06, - "loss": 0.0374, - "step": 76725 - }, - { - "epoch": 1.9477091001396116, - "grad_norm": 0.3890552222728729, - "learning_rate": 7.015272665735922e-06, - "loss": 0.0249, - "step": 76730 - }, - { - "epoch": 1.947836019799467, - "grad_norm": 0.40480685234069824, - "learning_rate": 7.014426534670221e-06, - "loss": 0.02, - "step": 76735 - }, - { - "epoch": 1.9479629394593223, - "grad_norm": 0.6729466319084167, - "learning_rate": 7.013580403604518e-06, - "loss": 0.0273, - "step": 76740 - }, - { - "epoch": 1.9480898591191775, - "grad_norm": 0.35462555289268494, - "learning_rate": 7.012734272538817e-06, - "loss": 0.0415, - "step": 76745 - }, - { - "epoch": 1.9482167787790328, - "grad_norm": 0.2717261016368866, - "learning_rate": 7.011888141473114e-06, - "loss": 0.0275, - "step": 76750 - }, - { - "epoch": 1.9483436984388882, - "grad_norm": 0.5360318422317505, - "learning_rate": 7.011042010407413e-06, - "loss": 0.034, - "step": 76755 - }, - { - "epoch": 1.9484706180987434, - "grad_norm": 0.7292970418930054, - "learning_rate": 7.01019587934171e-06, - "loss": 0.047, - "step": 76760 - }, - { - "epoch": 1.948597537758599, - "grad_norm": 0.6523832678794861, - "learning_rate": 7.009349748276009e-06, - "loss": 0.0423, - "step": 76765 - }, - { - "epoch": 1.9487244574184541, - "grad_norm": 0.5226122140884399, - "learning_rate": 7.008503617210306e-06, - "loss": 0.0413, - "step": 76770 - }, - { - "epoch": 1.9488513770783094, - "grad_norm": 0.28438401222229004, - "learning_rate": 7.007657486144605e-06, - "loss": 0.0408, - "step": 76775 - }, - { - "epoch": 1.9489782967381646, - "grad_norm": 0.42178279161453247, - "learning_rate": 7.006811355078902e-06, - "loss": 0.0302, - "step": 76780 - }, - { - "epoch": 1.94910521639802, - "grad_norm": 0.3031829297542572, - "learning_rate": 7.0059652240132006e-06, - "loss": 0.0355, - "step": 76785 - }, - { - "epoch": 1.9492321360578755, - "grad_norm": 0.1762445569038391, - "learning_rate": 7.005119092947498e-06, - "loss": 0.0284, - "step": 76790 - }, - { - "epoch": 1.9493590557177307, - "grad_norm": 0.3107685446739197, - "learning_rate": 7.0042729618817965e-06, - "loss": 0.0322, - "step": 76795 - }, - { - "epoch": 1.949485975377586, - "grad_norm": 0.8929114937782288, - "learning_rate": 7.003426830816093e-06, - "loss": 0.0329, - "step": 76800 - }, - { - "epoch": 1.9496128950374412, - "grad_norm": 0.6421287655830383, - "learning_rate": 7.002580699750392e-06, - "loss": 0.0399, - "step": 76805 - }, - { - "epoch": 1.9497398146972966, - "grad_norm": 0.4847058951854706, - "learning_rate": 7.001734568684689e-06, - "loss": 0.0502, - "step": 76810 - }, - { - "epoch": 1.949866734357152, - "grad_norm": 0.28297531604766846, - "learning_rate": 7.000888437618988e-06, - "loss": 0.0419, - "step": 76815 - }, - { - "epoch": 1.9499936540170073, - "grad_norm": 1.4537732601165771, - "learning_rate": 7.000042306553285e-06, - "loss": 0.0238, - "step": 76820 - }, - { - "epoch": 1.9501205736768625, - "grad_norm": 0.4005100429058075, - "learning_rate": 6.9991961754875835e-06, - "loss": 0.0475, - "step": 76825 - }, - { - "epoch": 1.9502474933367178, - "grad_norm": 0.5635502338409424, - "learning_rate": 6.998350044421881e-06, - "loss": 0.0491, - "step": 76830 - }, - { - "epoch": 1.9503744129965732, - "grad_norm": 0.4310988783836365, - "learning_rate": 6.9975039133561795e-06, - "loss": 0.0426, - "step": 76835 - }, - { - "epoch": 1.9505013326564284, - "grad_norm": 0.5304768085479736, - "learning_rate": 6.996657782290477e-06, - "loss": 0.0431, - "step": 76840 - }, - { - "epoch": 1.950628252316284, - "grad_norm": 0.22497929632663727, - "learning_rate": 6.9958116512247755e-06, - "loss": 0.0323, - "step": 76845 - }, - { - "epoch": 1.9507551719761391, - "grad_norm": 0.46571019291877747, - "learning_rate": 6.994965520159073e-06, - "loss": 0.0426, - "step": 76850 - }, - { - "epoch": 1.9508820916359944, - "grad_norm": 0.2974998652935028, - "learning_rate": 6.994119389093371e-06, - "loss": 0.0226, - "step": 76855 - }, - { - "epoch": 1.9510090112958496, - "grad_norm": 0.3088513910770416, - "learning_rate": 6.993273258027668e-06, - "loss": 0.0284, - "step": 76860 - }, - { - "epoch": 1.951135930955705, - "grad_norm": 0.49352848529815674, - "learning_rate": 6.992427126961967e-06, - "loss": 0.0386, - "step": 76865 - }, - { - "epoch": 1.9512628506155605, - "grad_norm": 0.8068318963050842, - "learning_rate": 6.991580995896264e-06, - "loss": 0.0383, - "step": 76870 - }, - { - "epoch": 1.9513897702754157, - "grad_norm": 0.4470415413379669, - "learning_rate": 6.9907348648305625e-06, - "loss": 0.0493, - "step": 76875 - }, - { - "epoch": 1.951516689935271, - "grad_norm": 0.5609257221221924, - "learning_rate": 6.98988873376486e-06, - "loss": 0.0438, - "step": 76880 - }, - { - "epoch": 1.9516436095951262, - "grad_norm": 0.26469704508781433, - "learning_rate": 6.9890426026991584e-06, - "loss": 0.0237, - "step": 76885 - }, - { - "epoch": 1.9517705292549816, - "grad_norm": 0.32592323422431946, - "learning_rate": 6.988196471633456e-06, - "loss": 0.0446, - "step": 76890 - }, - { - "epoch": 1.951897448914837, - "grad_norm": 0.4591796100139618, - "learning_rate": 6.987350340567754e-06, - "loss": 0.0343, - "step": 76895 - }, - { - "epoch": 1.9520243685746923, - "grad_norm": 0.36999961733818054, - "learning_rate": 6.986504209502053e-06, - "loss": 0.0326, - "step": 76900 - }, - { - "epoch": 1.9521512882345475, - "grad_norm": 0.4045880138874054, - "learning_rate": 6.98565807843635e-06, - "loss": 0.0208, - "step": 76905 - }, - { - "epoch": 1.9522782078944028, - "grad_norm": 0.4518730044364929, - "learning_rate": 6.984811947370649e-06, - "loss": 0.031, - "step": 76910 - }, - { - "epoch": 1.952405127554258, - "grad_norm": 0.4573778212070465, - "learning_rate": 6.983965816304946e-06, - "loss": 0.0568, - "step": 76915 - }, - { - "epoch": 1.9525320472141134, - "grad_norm": 0.33253157138824463, - "learning_rate": 6.983119685239245e-06, - "loss": 0.0447, - "step": 76920 - }, - { - "epoch": 1.952658966873969, - "grad_norm": 0.2776288688182831, - "learning_rate": 6.982273554173542e-06, - "loss": 0.0389, - "step": 76925 - }, - { - "epoch": 1.9527858865338241, - "grad_norm": 0.17647047340869904, - "learning_rate": 6.981427423107841e-06, - "loss": 0.0493, - "step": 76930 - }, - { - "epoch": 1.9529128061936794, - "grad_norm": 0.6416774392127991, - "learning_rate": 6.980581292042138e-06, - "loss": 0.0476, - "step": 76935 - }, - { - "epoch": 1.9530397258535346, - "grad_norm": 0.7624486088752747, - "learning_rate": 6.979735160976437e-06, - "loss": 0.0248, - "step": 76940 - }, - { - "epoch": 1.95316664551339, - "grad_norm": 0.37712669372558594, - "learning_rate": 6.978889029910733e-06, - "loss": 0.0361, - "step": 76945 - }, - { - "epoch": 1.9532935651732455, - "grad_norm": 0.3226088583469391, - "learning_rate": 6.978042898845033e-06, - "loss": 0.0336, - "step": 76950 - }, - { - "epoch": 1.9534204848331007, - "grad_norm": 0.40120208263397217, - "learning_rate": 6.977196767779329e-06, - "loss": 0.0341, - "step": 76955 - }, - { - "epoch": 1.953547404492956, - "grad_norm": 0.5031260251998901, - "learning_rate": 6.976350636713628e-06, - "loss": 0.0532, - "step": 76960 - }, - { - "epoch": 1.9536743241528112, - "grad_norm": 0.3247651159763336, - "learning_rate": 6.975504505647925e-06, - "loss": 0.0298, - "step": 76965 - }, - { - "epoch": 1.9538012438126666, - "grad_norm": 0.3089047074317932, - "learning_rate": 6.974658374582224e-06, - "loss": 0.0241, - "step": 76970 - }, - { - "epoch": 1.9539281634725219, - "grad_norm": 0.4457257091999054, - "learning_rate": 6.973812243516521e-06, - "loss": 0.0321, - "step": 76975 - }, - { - "epoch": 1.9540550831323773, - "grad_norm": 0.6031153202056885, - "learning_rate": 6.97296611245082e-06, - "loss": 0.0421, - "step": 76980 - }, - { - "epoch": 1.9541820027922325, - "grad_norm": 0.5066336989402771, - "learning_rate": 6.972119981385117e-06, - "loss": 0.032, - "step": 76985 - }, - { - "epoch": 1.9543089224520878, - "grad_norm": 0.41243216395378113, - "learning_rate": 6.9712738503194156e-06, - "loss": 0.0384, - "step": 76990 - }, - { - "epoch": 1.954435842111943, - "grad_norm": 0.47830286622047424, - "learning_rate": 6.970427719253713e-06, - "loss": 0.0362, - "step": 76995 - }, - { - "epoch": 1.9545627617717984, - "grad_norm": 0.302984356880188, - "learning_rate": 6.9695815881880115e-06, - "loss": 0.045, - "step": 77000 - }, - { - "epoch": 1.954689681431654, - "grad_norm": 0.38966426253318787, - "learning_rate": 6.968735457122309e-06, - "loss": 0.0311, - "step": 77005 - }, - { - "epoch": 1.9548166010915091, - "grad_norm": 0.28358331322669983, - "learning_rate": 6.9678893260566075e-06, - "loss": 0.0228, - "step": 77010 - }, - { - "epoch": 1.9549435207513643, - "grad_norm": 0.6244962811470032, - "learning_rate": 6.967043194990904e-06, - "loss": 0.0381, - "step": 77015 - }, - { - "epoch": 1.9550704404112196, - "grad_norm": 0.5895915031433105, - "learning_rate": 6.966197063925203e-06, - "loss": 0.05, - "step": 77020 - }, - { - "epoch": 1.955197360071075, - "grad_norm": 0.5586034655570984, - "learning_rate": 6.9653509328595e-06, - "loss": 0.0368, - "step": 77025 - }, - { - "epoch": 1.9553242797309305, - "grad_norm": 0.3131779432296753, - "learning_rate": 6.9645048017937985e-06, - "loss": 0.038, - "step": 77030 - }, - { - "epoch": 1.9554511993907857, - "grad_norm": 0.5053142309188843, - "learning_rate": 6.963658670728096e-06, - "loss": 0.0259, - "step": 77035 - }, - { - "epoch": 1.955578119050641, - "grad_norm": 1.3042948246002197, - "learning_rate": 6.9628125396623945e-06, - "loss": 0.0524, - "step": 77040 - }, - { - "epoch": 1.9557050387104962, - "grad_norm": 0.3366584777832031, - "learning_rate": 6.961966408596692e-06, - "loss": 0.0264, - "step": 77045 - }, - { - "epoch": 1.9558319583703516, - "grad_norm": 0.5185450315475464, - "learning_rate": 6.9611202775309905e-06, - "loss": 0.0232, - "step": 77050 - }, - { - "epoch": 1.9559588780302068, - "grad_norm": 0.4044863283634186, - "learning_rate": 6.960274146465288e-06, - "loss": 0.0409, - "step": 77055 - }, - { - "epoch": 1.9560857976900623, - "grad_norm": 0.33047229051589966, - "learning_rate": 6.959428015399586e-06, - "loss": 0.0404, - "step": 77060 - }, - { - "epoch": 1.9562127173499175, - "grad_norm": 0.24141333997249603, - "learning_rate": 6.958581884333884e-06, - "loss": 0.0248, - "step": 77065 - }, - { - "epoch": 1.9563396370097728, - "grad_norm": 0.43195632100105286, - "learning_rate": 6.957735753268182e-06, - "loss": 0.0243, - "step": 77070 - }, - { - "epoch": 1.956466556669628, - "grad_norm": 0.5419920086860657, - "learning_rate": 6.95688962220248e-06, - "loss": 0.0402, - "step": 77075 - }, - { - "epoch": 1.9565934763294834, - "grad_norm": 0.4786190390586853, - "learning_rate": 6.956043491136778e-06, - "loss": 0.0375, - "step": 77080 - }, - { - "epoch": 1.956720395989339, - "grad_norm": 0.377890020608902, - "learning_rate": 6.955197360071075e-06, - "loss": 0.0357, - "step": 77085 - }, - { - "epoch": 1.9568473156491941, - "grad_norm": 0.3752479553222656, - "learning_rate": 6.9543512290053734e-06, - "loss": 0.044, - "step": 77090 - }, - { - "epoch": 1.9569742353090493, - "grad_norm": 0.40580594539642334, - "learning_rate": 6.953505097939671e-06, - "loss": 0.0326, - "step": 77095 - }, - { - "epoch": 1.9571011549689046, - "grad_norm": 0.35719430446624756, - "learning_rate": 6.952658966873969e-06, - "loss": 0.0341, - "step": 77100 - }, - { - "epoch": 1.95722807462876, - "grad_norm": 0.6974915266036987, - "learning_rate": 6.951812835808267e-06, - "loss": 0.038, - "step": 77105 - }, - { - "epoch": 1.9573549942886153, - "grad_norm": 0.672092616558075, - "learning_rate": 6.950966704742565e-06, - "loss": 0.0489, - "step": 77110 - }, - { - "epoch": 1.9574819139484707, - "grad_norm": 0.2070678174495697, - "learning_rate": 6.950120573676863e-06, - "loss": 0.014, - "step": 77115 - }, - { - "epoch": 1.957608833608326, - "grad_norm": 0.49349793791770935, - "learning_rate": 6.949274442611161e-06, - "loss": 0.0356, - "step": 77120 - }, - { - "epoch": 1.9577357532681812, - "grad_norm": 0.6309827566146851, - "learning_rate": 6.948428311545459e-06, - "loss": 0.0318, - "step": 77125 - }, - { - "epoch": 1.9578626729280364, - "grad_norm": 0.4166468679904938, - "learning_rate": 6.947582180479757e-06, - "loss": 0.0365, - "step": 77130 - }, - { - "epoch": 1.9579895925878918, - "grad_norm": 0.8753578066825867, - "learning_rate": 6.946736049414055e-06, - "loss": 0.0406, - "step": 77135 - }, - { - "epoch": 1.9581165122477473, - "grad_norm": 0.22016304731369019, - "learning_rate": 6.945889918348353e-06, - "loss": 0.0268, - "step": 77140 - }, - { - "epoch": 1.9582434319076025, - "grad_norm": 0.6496444344520569, - "learning_rate": 6.945043787282651e-06, - "loss": 0.0454, - "step": 77145 - }, - { - "epoch": 1.9583703515674578, - "grad_norm": 0.38155320286750793, - "learning_rate": 6.944197656216949e-06, - "loss": 0.029, - "step": 77150 - }, - { - "epoch": 1.958497271227313, - "grad_norm": 0.40794140100479126, - "learning_rate": 6.943351525151246e-06, - "loss": 0.0398, - "step": 77155 - }, - { - "epoch": 1.9586241908871684, - "grad_norm": 0.2668152451515198, - "learning_rate": 6.942505394085544e-06, - "loss": 0.0275, - "step": 77160 - }, - { - "epoch": 1.9587511105470239, - "grad_norm": 0.3501046299934387, - "learning_rate": 6.941659263019842e-06, - "loss": 0.0229, - "step": 77165 - }, - { - "epoch": 1.9588780302068791, - "grad_norm": 0.43572574853897095, - "learning_rate": 6.94081313195414e-06, - "loss": 0.0299, - "step": 77170 - }, - { - "epoch": 1.9590049498667343, - "grad_norm": 0.33204033970832825, - "learning_rate": 6.939967000888438e-06, - "loss": 0.0301, - "step": 77175 - }, - { - "epoch": 1.9591318695265896, - "grad_norm": 0.3122084438800812, - "learning_rate": 6.939120869822736e-06, - "loss": 0.0312, - "step": 77180 - }, - { - "epoch": 1.959258789186445, - "grad_norm": 0.4941205084323883, - "learning_rate": 6.938274738757034e-06, - "loss": 0.0286, - "step": 77185 - }, - { - "epoch": 1.9593857088463003, - "grad_norm": 0.4943714141845703, - "learning_rate": 6.937428607691332e-06, - "loss": 0.0388, - "step": 77190 - }, - { - "epoch": 1.9595126285061557, - "grad_norm": 0.670510470867157, - "learning_rate": 6.93658247662563e-06, - "loss": 0.0243, - "step": 77195 - }, - { - "epoch": 1.959639548166011, - "grad_norm": 0.45588093996047974, - "learning_rate": 6.935736345559928e-06, - "loss": 0.0408, - "step": 77200 - }, - { - "epoch": 1.9597664678258662, - "grad_norm": 0.39563360810279846, - "learning_rate": 6.934890214494226e-06, - "loss": 0.0267, - "step": 77205 - }, - { - "epoch": 1.9598933874857214, - "grad_norm": 0.3009461760520935, - "learning_rate": 6.934044083428524e-06, - "loss": 0.0409, - "step": 77210 - }, - { - "epoch": 1.9600203071455768, - "grad_norm": 0.2689210772514343, - "learning_rate": 6.933197952362822e-06, - "loss": 0.0573, - "step": 77215 - }, - { - "epoch": 1.9601472268054323, - "grad_norm": 0.43744969367980957, - "learning_rate": 6.93235182129712e-06, - "loss": 0.032, - "step": 77220 - }, - { - "epoch": 1.9602741464652875, - "grad_norm": 0.46960392594337463, - "learning_rate": 6.931505690231417e-06, - "loss": 0.0418, - "step": 77225 - }, - { - "epoch": 1.9604010661251428, - "grad_norm": 0.6082571148872375, - "learning_rate": 6.930659559165715e-06, - "loss": 0.034, - "step": 77230 - }, - { - "epoch": 1.960527985784998, - "grad_norm": 0.3082669973373413, - "learning_rate": 6.929813428100013e-06, - "loss": 0.035, - "step": 77235 - }, - { - "epoch": 1.9606549054448534, - "grad_norm": 0.2281263768672943, - "learning_rate": 6.928967297034311e-06, - "loss": 0.0231, - "step": 77240 - }, - { - "epoch": 1.9607818251047087, - "grad_norm": 0.40201336145401, - "learning_rate": 6.928121165968609e-06, - "loss": 0.0291, - "step": 77245 - }, - { - "epoch": 1.9609087447645641, - "grad_norm": 0.1701175570487976, - "learning_rate": 6.927275034902907e-06, - "loss": 0.0507, - "step": 77250 - }, - { - "epoch": 1.9610356644244193, - "grad_norm": 0.4639756679534912, - "learning_rate": 6.926428903837205e-06, - "loss": 0.0331, - "step": 77255 - }, - { - "epoch": 1.9611625840842746, - "grad_norm": 0.2804156541824341, - "learning_rate": 6.925582772771503e-06, - "loss": 0.0586, - "step": 77260 - }, - { - "epoch": 1.9612895037441298, - "grad_norm": 0.448494017124176, - "learning_rate": 6.9247366417058006e-06, - "loss": 0.0425, - "step": 77265 - }, - { - "epoch": 1.9614164234039853, - "grad_norm": 0.6430150866508484, - "learning_rate": 6.923890510640099e-06, - "loss": 0.0511, - "step": 77270 - }, - { - "epoch": 1.9615433430638407, - "grad_norm": 0.33439165353775024, - "learning_rate": 6.9230443795743965e-06, - "loss": 0.0288, - "step": 77275 - }, - { - "epoch": 1.961670262723696, - "grad_norm": 0.4962165951728821, - "learning_rate": 6.922198248508695e-06, - "loss": 0.039, - "step": 77280 - }, - { - "epoch": 1.9617971823835512, - "grad_norm": 0.7094833850860596, - "learning_rate": 6.9213521174429925e-06, - "loss": 0.0411, - "step": 77285 - }, - { - "epoch": 1.9619241020434064, - "grad_norm": 0.21808743476867676, - "learning_rate": 6.920505986377291e-06, - "loss": 0.0241, - "step": 77290 - }, - { - "epoch": 1.9620510217032618, - "grad_norm": 0.2844283878803253, - "learning_rate": 6.919659855311588e-06, - "loss": 0.0261, - "step": 77295 - }, - { - "epoch": 1.9621779413631173, - "grad_norm": 0.5544857978820801, - "learning_rate": 6.918813724245886e-06, - "loss": 0.0323, - "step": 77300 - }, - { - "epoch": 1.9623048610229725, - "grad_norm": 0.3592235743999481, - "learning_rate": 6.9179675931801836e-06, - "loss": 0.0367, - "step": 77305 - }, - { - "epoch": 1.9624317806828278, - "grad_norm": 0.6550743579864502, - "learning_rate": 6.917121462114482e-06, - "loss": 0.066, - "step": 77310 - }, - { - "epoch": 1.962558700342683, - "grad_norm": 0.2931695282459259, - "learning_rate": 6.9162753310487795e-06, - "loss": 0.0364, - "step": 77315 - }, - { - "epoch": 1.9626856200025384, - "grad_norm": 0.3883216381072998, - "learning_rate": 6.915429199983078e-06, - "loss": 0.0277, - "step": 77320 - }, - { - "epoch": 1.9628125396623937, - "grad_norm": 0.3585388660430908, - "learning_rate": 6.9145830689173755e-06, - "loss": 0.0371, - "step": 77325 - }, - { - "epoch": 1.9629394593222491, - "grad_norm": 0.35655301809310913, - "learning_rate": 6.913736937851674e-06, - "loss": 0.0391, - "step": 77330 - }, - { - "epoch": 1.9630663789821043, - "grad_norm": 0.775990903377533, - "learning_rate": 6.912890806785971e-06, - "loss": 0.0465, - "step": 77335 - }, - { - "epoch": 1.9631932986419596, - "grad_norm": 0.5534085035324097, - "learning_rate": 6.91204467572027e-06, - "loss": 0.0362, - "step": 77340 - }, - { - "epoch": 1.9633202183018148, - "grad_norm": 0.6283255815505981, - "learning_rate": 6.911198544654567e-06, - "loss": 0.0316, - "step": 77345 - }, - { - "epoch": 1.9634471379616703, - "grad_norm": 0.5085233449935913, - "learning_rate": 6.910352413588866e-06, - "loss": 0.0239, - "step": 77350 - }, - { - "epoch": 1.9635740576215257, - "grad_norm": 0.34132814407348633, - "learning_rate": 6.9095062825231625e-06, - "loss": 0.0254, - "step": 77355 - }, - { - "epoch": 1.963700977281381, - "grad_norm": 0.6846427321434021, - "learning_rate": 6.908660151457462e-06, - "loss": 0.0314, - "step": 77360 - }, - { - "epoch": 1.9638278969412362, - "grad_norm": 0.4783858358860016, - "learning_rate": 6.9078140203917584e-06, - "loss": 0.0366, - "step": 77365 - }, - { - "epoch": 1.9639548166010914, - "grad_norm": 0.5292547345161438, - "learning_rate": 6.906967889326057e-06, - "loss": 0.0398, - "step": 77370 - }, - { - "epoch": 1.9640817362609468, - "grad_norm": 0.45682135224342346, - "learning_rate": 6.906121758260354e-06, - "loss": 0.0257, - "step": 77375 - }, - { - "epoch": 1.9642086559208023, - "grad_norm": 0.2864401936531067, - "learning_rate": 6.905275627194653e-06, - "loss": 0.0225, - "step": 77380 - }, - { - "epoch": 1.9643355755806575, - "grad_norm": 0.37517836689949036, - "learning_rate": 6.90442949612895e-06, - "loss": 0.0265, - "step": 77385 - }, - { - "epoch": 1.9644624952405128, - "grad_norm": 0.4553063213825226, - "learning_rate": 6.903583365063249e-06, - "loss": 0.0219, - "step": 77390 - }, - { - "epoch": 1.964589414900368, - "grad_norm": 1.0911295413970947, - "learning_rate": 6.902737233997546e-06, - "loss": 0.036, - "step": 77395 - }, - { - "epoch": 1.9647163345602234, - "grad_norm": 0.395443856716156, - "learning_rate": 6.901891102931845e-06, - "loss": 0.0257, - "step": 77400 - }, - { - "epoch": 1.9648432542200787, - "grad_norm": 0.4179092347621918, - "learning_rate": 6.901044971866142e-06, - "loss": 0.0395, - "step": 77405 - }, - { - "epoch": 1.9649701738799341, - "grad_norm": 0.4386545419692993, - "learning_rate": 6.900198840800441e-06, - "loss": 0.029, - "step": 77410 - }, - { - "epoch": 1.9650970935397893, - "grad_norm": 0.3148868978023529, - "learning_rate": 6.899352709734739e-06, - "loss": 0.0337, - "step": 77415 - }, - { - "epoch": 1.9652240131996446, - "grad_norm": 0.5130661725997925, - "learning_rate": 6.898506578669037e-06, - "loss": 0.0362, - "step": 77420 - }, - { - "epoch": 1.9653509328594998, - "grad_norm": 0.31969889998435974, - "learning_rate": 6.897660447603335e-06, - "loss": 0.0346, - "step": 77425 - }, - { - "epoch": 1.9654778525193553, - "grad_norm": 0.693089485168457, - "learning_rate": 6.896814316537633e-06, - "loss": 0.0589, - "step": 77430 - }, - { - "epoch": 1.9656047721792107, - "grad_norm": 0.4845138192176819, - "learning_rate": 6.895968185471931e-06, - "loss": 0.0352, - "step": 77435 - }, - { - "epoch": 1.965731691839066, - "grad_norm": 0.4698927700519562, - "learning_rate": 6.895122054406228e-06, - "loss": 0.028, - "step": 77440 - }, - { - "epoch": 1.9658586114989212, - "grad_norm": 0.4023277759552002, - "learning_rate": 6.894275923340527e-06, - "loss": 0.0582, - "step": 77445 - }, - { - "epoch": 1.9659855311587764, - "grad_norm": 0.38884156942367554, - "learning_rate": 6.893429792274824e-06, - "loss": 0.0257, - "step": 77450 - }, - { - "epoch": 1.9661124508186318, - "grad_norm": 0.5911761522293091, - "learning_rate": 6.892583661209122e-06, - "loss": 0.0411, - "step": 77455 - }, - { - "epoch": 1.966239370478487, - "grad_norm": 0.6749811172485352, - "learning_rate": 6.89173753014342e-06, - "loss": 0.0528, - "step": 77460 - }, - { - "epoch": 1.9663662901383425, - "grad_norm": 0.8019583821296692, - "learning_rate": 6.890891399077718e-06, - "loss": 0.0391, - "step": 77465 - }, - { - "epoch": 1.9664932097981977, - "grad_norm": 0.3851868808269501, - "learning_rate": 6.8900452680120156e-06, - "loss": 0.0461, - "step": 77470 - }, - { - "epoch": 1.966620129458053, - "grad_norm": 0.4007853865623474, - "learning_rate": 6.889199136946314e-06, - "loss": 0.0435, - "step": 77475 - }, - { - "epoch": 1.9667470491179082, - "grad_norm": 0.3810976445674896, - "learning_rate": 6.8883530058806115e-06, - "loss": 0.037, - "step": 77480 - }, - { - "epoch": 1.9668739687777637, - "grad_norm": 0.5003877878189087, - "learning_rate": 6.88750687481491e-06, - "loss": 0.0279, - "step": 77485 - }, - { - "epoch": 1.967000888437619, - "grad_norm": 0.42285171151161194, - "learning_rate": 6.8866607437492075e-06, - "loss": 0.0278, - "step": 77490 - }, - { - "epoch": 1.9671278080974743, - "grad_norm": 0.47383370995521545, - "learning_rate": 6.885814612683506e-06, - "loss": 0.0239, - "step": 77495 - }, - { - "epoch": 1.9672547277573296, - "grad_norm": 0.5083811283111572, - "learning_rate": 6.8849684816178034e-06, - "loss": 0.0357, - "step": 77500 - }, - { - "epoch": 1.9673816474171848, - "grad_norm": 0.4648616313934326, - "learning_rate": 6.884122350552102e-06, - "loss": 0.0324, - "step": 77505 - }, - { - "epoch": 1.9675085670770402, - "grad_norm": 0.42434000968933105, - "learning_rate": 6.8832762194863986e-06, - "loss": 0.0504, - "step": 77510 - }, - { - "epoch": 1.9676354867368957, - "grad_norm": 0.39278072118759155, - "learning_rate": 6.882430088420697e-06, - "loss": 0.0465, - "step": 77515 - }, - { - "epoch": 1.967762406396751, - "grad_norm": 0.41664397716522217, - "learning_rate": 6.8815839573549945e-06, - "loss": 0.0328, - "step": 77520 - }, - { - "epoch": 1.9678893260566062, - "grad_norm": 1.0113558769226074, - "learning_rate": 6.880737826289293e-06, - "loss": 0.0291, - "step": 77525 - }, - { - "epoch": 1.9680162457164614, - "grad_norm": 0.45184245705604553, - "learning_rate": 6.8798916952235905e-06, - "loss": 0.0304, - "step": 77530 - }, - { - "epoch": 1.9681431653763168, - "grad_norm": 0.4482358694076538, - "learning_rate": 6.879045564157889e-06, - "loss": 0.0341, - "step": 77535 - }, - { - "epoch": 1.968270085036172, - "grad_norm": 0.3893332779407501, - "learning_rate": 6.8781994330921864e-06, - "loss": 0.0414, - "step": 77540 - }, - { - "epoch": 1.9683970046960275, - "grad_norm": 0.5228439569473267, - "learning_rate": 6.877353302026485e-06, - "loss": 0.04, - "step": 77545 - }, - { - "epoch": 1.9685239243558827, - "grad_norm": 0.2589626908302307, - "learning_rate": 6.876507170960782e-06, - "loss": 0.0217, - "step": 77550 - }, - { - "epoch": 1.968650844015738, - "grad_norm": 0.5864951610565186, - "learning_rate": 6.875661039895081e-06, - "loss": 0.0514, - "step": 77555 - }, - { - "epoch": 1.9687777636755932, - "grad_norm": 0.5819805860519409, - "learning_rate": 6.874814908829378e-06, - "loss": 0.0294, - "step": 77560 - }, - { - "epoch": 1.9689046833354487, - "grad_norm": 1.075878381729126, - "learning_rate": 6.873968777763677e-06, - "loss": 0.0368, - "step": 77565 - }, - { - "epoch": 1.969031602995304, - "grad_norm": 0.3995381295681, - "learning_rate": 6.873122646697974e-06, - "loss": 0.0299, - "step": 77570 - }, - { - "epoch": 1.9691585226551593, - "grad_norm": 0.49363958835601807, - "learning_rate": 6.872276515632273e-06, - "loss": 0.0323, - "step": 77575 - }, - { - "epoch": 1.9692854423150146, - "grad_norm": 0.3372837007045746, - "learning_rate": 6.871430384566569e-06, - "loss": 0.034, - "step": 77580 - }, - { - "epoch": 1.9694123619748698, - "grad_norm": 0.46875953674316406, - "learning_rate": 6.870584253500868e-06, - "loss": 0.032, - "step": 77585 - }, - { - "epoch": 1.9695392816347252, - "grad_norm": 0.32090193033218384, - "learning_rate": 6.869738122435165e-06, - "loss": 0.0406, - "step": 77590 - }, - { - "epoch": 1.9696662012945805, - "grad_norm": 0.41627171635627747, - "learning_rate": 6.868891991369464e-06, - "loss": 0.026, - "step": 77595 - }, - { - "epoch": 1.969793120954436, - "grad_norm": 0.3795778751373291, - "learning_rate": 6.868045860303761e-06, - "loss": 0.0402, - "step": 77600 - }, - { - "epoch": 1.9699200406142912, - "grad_norm": 0.32386553287506104, - "learning_rate": 6.86719972923806e-06, - "loss": 0.0435, - "step": 77605 - }, - { - "epoch": 1.9700469602741464, - "grad_norm": 0.5795367360115051, - "learning_rate": 6.866353598172357e-06, - "loss": 0.0179, - "step": 77610 - }, - { - "epoch": 1.9701738799340016, - "grad_norm": 0.3629811704158783, - "learning_rate": 6.865507467106656e-06, - "loss": 0.0386, - "step": 77615 - }, - { - "epoch": 1.970300799593857, - "grad_norm": 0.7190067172050476, - "learning_rate": 6.864661336040953e-06, - "loss": 0.038, - "step": 77620 - }, - { - "epoch": 1.9704277192537125, - "grad_norm": 0.39813172817230225, - "learning_rate": 6.863815204975252e-06, - "loss": 0.0273, - "step": 77625 - }, - { - "epoch": 1.9705546389135677, - "grad_norm": 0.22270257771015167, - "learning_rate": 6.862969073909549e-06, - "loss": 0.041, - "step": 77630 - }, - { - "epoch": 1.970681558573423, - "grad_norm": 0.449643075466156, - "learning_rate": 6.862122942843848e-06, - "loss": 0.0349, - "step": 77635 - }, - { - "epoch": 1.9708084782332782, - "grad_norm": 0.35184094309806824, - "learning_rate": 6.861276811778145e-06, - "loss": 0.0391, - "step": 77640 - }, - { - "epoch": 1.9709353978931337, - "grad_norm": 0.5204915404319763, - "learning_rate": 6.8604306807124435e-06, - "loss": 0.0261, - "step": 77645 - }, - { - "epoch": 1.971062317552989, - "grad_norm": 0.5338562726974487, - "learning_rate": 6.85958454964674e-06, - "loss": 0.0405, - "step": 77650 - }, - { - "epoch": 1.9711892372128443, - "grad_norm": 0.49335014820098877, - "learning_rate": 6.858738418581039e-06, - "loss": 0.0446, - "step": 77655 - }, - { - "epoch": 1.9713161568726996, - "grad_norm": 0.3632381856441498, - "learning_rate": 6.857892287515336e-06, - "loss": 0.0263, - "step": 77660 - }, - { - "epoch": 1.9714430765325548, - "grad_norm": 0.4274775981903076, - "learning_rate": 6.857046156449635e-06, - "loss": 0.0251, - "step": 77665 - }, - { - "epoch": 1.9715699961924102, - "grad_norm": 0.558635950088501, - "learning_rate": 6.856200025383932e-06, - "loss": 0.0383, - "step": 77670 - }, - { - "epoch": 1.9716969158522655, - "grad_norm": 0.3300378620624542, - "learning_rate": 6.8553538943182306e-06, - "loss": 0.0374, - "step": 77675 - }, - { - "epoch": 1.971823835512121, - "grad_norm": 0.3963364064693451, - "learning_rate": 6.854507763252528e-06, - "loss": 0.0325, - "step": 77680 - }, - { - "epoch": 1.9719507551719762, - "grad_norm": 0.567065417766571, - "learning_rate": 6.8536616321868265e-06, - "loss": 0.0248, - "step": 77685 - }, - { - "epoch": 1.9720776748318314, - "grad_norm": 0.38106024265289307, - "learning_rate": 6.852815501121124e-06, - "loss": 0.0366, - "step": 77690 - }, - { - "epoch": 1.9722045944916866, - "grad_norm": 0.3531460165977478, - "learning_rate": 6.8519693700554225e-06, - "loss": 0.0574, - "step": 77695 - }, - { - "epoch": 1.972331514151542, - "grad_norm": 0.47668689489364624, - "learning_rate": 6.85112323898972e-06, - "loss": 0.0396, - "step": 77700 - }, - { - "epoch": 1.9724584338113975, - "grad_norm": 0.9229491353034973, - "learning_rate": 6.8502771079240184e-06, - "loss": 0.027, - "step": 77705 - }, - { - "epoch": 1.9725853534712527, - "grad_norm": 0.34047937393188477, - "learning_rate": 6.849430976858316e-06, - "loss": 0.0361, - "step": 77710 - }, - { - "epoch": 1.972712273131108, - "grad_norm": 1.6522669792175293, - "learning_rate": 6.848584845792614e-06, - "loss": 0.0429, - "step": 77715 - }, - { - "epoch": 1.9728391927909632, - "grad_norm": 0.5117645263671875, - "learning_rate": 6.847738714726911e-06, - "loss": 0.0338, - "step": 77720 - }, - { - "epoch": 1.9729661124508187, - "grad_norm": 0.7546731233596802, - "learning_rate": 6.8468925836612095e-06, - "loss": 0.0375, - "step": 77725 - }, - { - "epoch": 1.973093032110674, - "grad_norm": 0.27045369148254395, - "learning_rate": 6.846046452595507e-06, - "loss": 0.0312, - "step": 77730 - }, - { - "epoch": 1.9732199517705293, - "grad_norm": 0.2871149182319641, - "learning_rate": 6.8452003215298055e-06, - "loss": 0.0347, - "step": 77735 - }, - { - "epoch": 1.9733468714303846, - "grad_norm": 0.4255141317844391, - "learning_rate": 6.844354190464103e-06, - "loss": 0.03, - "step": 77740 - }, - { - "epoch": 1.9734737910902398, - "grad_norm": 0.42251965403556824, - "learning_rate": 6.8435080593984014e-06, - "loss": 0.0237, - "step": 77745 - }, - { - "epoch": 1.9736007107500952, - "grad_norm": 0.32614415884017944, - "learning_rate": 6.842661928332699e-06, - "loss": 0.0338, - "step": 77750 - }, - { - "epoch": 1.9737276304099505, - "grad_norm": 0.443033903837204, - "learning_rate": 6.841815797266997e-06, - "loss": 0.0332, - "step": 77755 - }, - { - "epoch": 1.973854550069806, - "grad_norm": 0.5601297616958618, - "learning_rate": 6.840969666201295e-06, - "loss": 0.021, - "step": 77760 - }, - { - "epoch": 1.9739814697296612, - "grad_norm": 0.30559495091438293, - "learning_rate": 6.840123535135593e-06, - "loss": 0.0407, - "step": 77765 - }, - { - "epoch": 1.9741083893895164, - "grad_norm": 0.5053434371948242, - "learning_rate": 6.839277404069891e-06, - "loss": 0.0257, - "step": 77770 - }, - { - "epoch": 1.9742353090493716, - "grad_norm": 0.37901198863983154, - "learning_rate": 6.838431273004189e-06, - "loss": 0.0271, - "step": 77775 - }, - { - "epoch": 1.974362228709227, - "grad_norm": 0.549964189529419, - "learning_rate": 6.837585141938487e-06, - "loss": 0.0425, - "step": 77780 - }, - { - "epoch": 1.9744891483690825, - "grad_norm": 0.5751891136169434, - "learning_rate": 6.836739010872785e-06, - "loss": 0.0517, - "step": 77785 - }, - { - "epoch": 1.9746160680289377, - "grad_norm": 0.5101956725120544, - "learning_rate": 6.835892879807082e-06, - "loss": 0.0319, - "step": 77790 - }, - { - "epoch": 1.974742987688793, - "grad_norm": 0.3804854452610016, - "learning_rate": 6.83504674874138e-06, - "loss": 0.0372, - "step": 77795 - }, - { - "epoch": 1.9748699073486482, - "grad_norm": 0.4531211853027344, - "learning_rate": 6.834200617675678e-06, - "loss": 0.0379, - "step": 77800 - }, - { - "epoch": 1.9749968270085037, - "grad_norm": 0.657293438911438, - "learning_rate": 6.833354486609976e-06, - "loss": 0.0302, - "step": 77805 - }, - { - "epoch": 1.9751237466683589, - "grad_norm": 0.35873255133628845, - "learning_rate": 6.832508355544274e-06, - "loss": 0.0316, - "step": 77810 - }, - { - "epoch": 1.9752506663282143, - "grad_norm": 0.5848709344863892, - "learning_rate": 6.831662224478572e-06, - "loss": 0.0402, - "step": 77815 - }, - { - "epoch": 1.9753775859880696, - "grad_norm": 0.47982874512672424, - "learning_rate": 6.83081609341287e-06, - "loss": 0.0527, - "step": 77820 - }, - { - "epoch": 1.9755045056479248, - "grad_norm": 0.5342438817024231, - "learning_rate": 6.829969962347168e-06, - "loss": 0.0471, - "step": 77825 - }, - { - "epoch": 1.97563142530778, - "grad_norm": 0.4084111452102661, - "learning_rate": 6.829123831281466e-06, - "loss": 0.031, - "step": 77830 - }, - { - "epoch": 1.9757583449676355, - "grad_norm": 0.9967249631881714, - "learning_rate": 6.828277700215764e-06, - "loss": 0.0316, - "step": 77835 - }, - { - "epoch": 1.975885264627491, - "grad_norm": 0.6059869527816772, - "learning_rate": 6.827431569150062e-06, - "loss": 0.033, - "step": 77840 - }, - { - "epoch": 1.9760121842873462, - "grad_norm": 0.8854791522026062, - "learning_rate": 6.82658543808436e-06, - "loss": 0.0491, - "step": 77845 - }, - { - "epoch": 1.9761391039472014, - "grad_norm": 0.31030791997909546, - "learning_rate": 6.825739307018658e-06, - "loss": 0.0257, - "step": 77850 - }, - { - "epoch": 1.9762660236070566, - "grad_norm": 0.6608283519744873, - "learning_rate": 6.824893175952956e-06, - "loss": 0.0379, - "step": 77855 - }, - { - "epoch": 1.976392943266912, - "grad_norm": 0.3494052588939667, - "learning_rate": 6.824047044887253e-06, - "loss": 0.0469, - "step": 77860 - }, - { - "epoch": 1.9765198629267675, - "grad_norm": 0.21526604890823364, - "learning_rate": 6.823200913821551e-06, - "loss": 0.0365, - "step": 77865 - }, - { - "epoch": 1.9766467825866227, - "grad_norm": 0.41578879952430725, - "learning_rate": 6.822354782755849e-06, - "loss": 0.0507, - "step": 77870 - }, - { - "epoch": 1.976773702246478, - "grad_norm": 0.25978773832321167, - "learning_rate": 6.821508651690147e-06, - "loss": 0.0252, - "step": 77875 - }, - { - "epoch": 1.9769006219063332, - "grad_norm": 0.21272599697113037, - "learning_rate": 6.820662520624445e-06, - "loss": 0.031, - "step": 77880 - }, - { - "epoch": 1.9770275415661887, - "grad_norm": 0.6931493282318115, - "learning_rate": 6.819816389558743e-06, - "loss": 0.0348, - "step": 77885 - }, - { - "epoch": 1.9771544612260439, - "grad_norm": 0.520466148853302, - "learning_rate": 6.818970258493041e-06, - "loss": 0.0384, - "step": 77890 - }, - { - "epoch": 1.9772813808858993, - "grad_norm": 0.43951207399368286, - "learning_rate": 6.818124127427339e-06, - "loss": 0.0247, - "step": 77895 - }, - { - "epoch": 1.9774083005457546, - "grad_norm": 0.40167978405952454, - "learning_rate": 6.817277996361637e-06, - "loss": 0.0269, - "step": 77900 - }, - { - "epoch": 1.9775352202056098, - "grad_norm": 0.19395919144153595, - "learning_rate": 6.816431865295935e-06, - "loss": 0.0368, - "step": 77905 - }, - { - "epoch": 1.977662139865465, - "grad_norm": 0.2743365466594696, - "learning_rate": 6.815585734230233e-06, - "loss": 0.0267, - "step": 77910 - }, - { - "epoch": 1.9777890595253205, - "grad_norm": 0.38004812598228455, - "learning_rate": 6.814739603164531e-06, - "loss": 0.0435, - "step": 77915 - }, - { - "epoch": 1.977915979185176, - "grad_norm": 0.8107854127883911, - "learning_rate": 6.813893472098828e-06, - "loss": 0.0277, - "step": 77920 - }, - { - "epoch": 1.9780428988450312, - "grad_norm": 0.3155663311481476, - "learning_rate": 6.813047341033127e-06, - "loss": 0.0297, - "step": 77925 - }, - { - "epoch": 1.9781698185048864, - "grad_norm": 0.4185563921928406, - "learning_rate": 6.812201209967425e-06, - "loss": 0.0344, - "step": 77930 - }, - { - "epoch": 1.9782967381647416, - "grad_norm": 0.5681030750274658, - "learning_rate": 6.811355078901722e-06, - "loss": 0.0353, - "step": 77935 - }, - { - "epoch": 1.978423657824597, - "grad_norm": 0.9423580765724182, - "learning_rate": 6.810508947836021e-06, - "loss": 0.0484, - "step": 77940 - }, - { - "epoch": 1.9785505774844523, - "grad_norm": 0.45057958364486694, - "learning_rate": 6.809662816770318e-06, - "loss": 0.0404, - "step": 77945 - }, - { - "epoch": 1.9786774971443077, - "grad_norm": 0.21904955804347992, - "learning_rate": 6.8088166857046164e-06, - "loss": 0.0215, - "step": 77950 - }, - { - "epoch": 1.978804416804163, - "grad_norm": 0.4004300832748413, - "learning_rate": 6.807970554638914e-06, - "loss": 0.0319, - "step": 77955 - }, - { - "epoch": 1.9789313364640182, - "grad_norm": 0.49162185192108154, - "learning_rate": 6.807124423573212e-06, - "loss": 0.0469, - "step": 77960 - }, - { - "epoch": 1.9790582561238734, - "grad_norm": 0.4727995693683624, - "learning_rate": 6.80627829250751e-06, - "loss": 0.0313, - "step": 77965 - }, - { - "epoch": 1.9791851757837289, - "grad_norm": 0.31196776032447815, - "learning_rate": 6.805432161441808e-06, - "loss": 0.0358, - "step": 77970 - }, - { - "epoch": 1.9793120954435843, - "grad_norm": 0.6643561720848083, - "learning_rate": 6.804586030376106e-06, - "loss": 0.0321, - "step": 77975 - }, - { - "epoch": 1.9794390151034396, - "grad_norm": 0.3291933238506317, - "learning_rate": 6.803739899310404e-06, - "loss": 0.0455, - "step": 77980 - }, - { - "epoch": 1.9795659347632948, - "grad_norm": 0.5622776746749878, - "learning_rate": 6.802893768244702e-06, - "loss": 0.0338, - "step": 77985 - }, - { - "epoch": 1.97969285442315, - "grad_norm": 0.3872607946395874, - "learning_rate": 6.802047637179e-06, - "loss": 0.0313, - "step": 77990 - }, - { - "epoch": 1.9798197740830055, - "grad_norm": 0.39400097727775574, - "learning_rate": 6.801201506113298e-06, - "loss": 0.0498, - "step": 77995 - }, - { - "epoch": 1.979946693742861, - "grad_norm": 0.42889243364334106, - "learning_rate": 6.800355375047596e-06, - "loss": 0.0275, - "step": 78000 - }, - { - "epoch": 1.9800736134027161, - "grad_norm": 0.584221363067627, - "learning_rate": 6.799509243981893e-06, - "loss": 0.0266, - "step": 78005 - }, - { - "epoch": 1.9802005330625714, - "grad_norm": 1.1989383697509766, - "learning_rate": 6.798663112916191e-06, - "loss": 0.0298, - "step": 78010 - }, - { - "epoch": 1.9803274527224266, - "grad_norm": 0.4037652611732483, - "learning_rate": 6.797816981850489e-06, - "loss": 0.029, - "step": 78015 - }, - { - "epoch": 1.980454372382282, - "grad_norm": 0.3232622742652893, - "learning_rate": 6.796970850784787e-06, - "loss": 0.0269, - "step": 78020 - }, - { - "epoch": 1.9805812920421373, - "grad_norm": 0.5277105569839478, - "learning_rate": 6.796124719719085e-06, - "loss": 0.0547, - "step": 78025 - }, - { - "epoch": 1.9807082117019927, - "grad_norm": 1.044446349143982, - "learning_rate": 6.795278588653383e-06, - "loss": 0.0498, - "step": 78030 - }, - { - "epoch": 1.980835131361848, - "grad_norm": 0.5080462098121643, - "learning_rate": 6.794432457587681e-06, - "loss": 0.0477, - "step": 78035 - }, - { - "epoch": 1.9809620510217032, - "grad_norm": 0.40579652786254883, - "learning_rate": 6.793586326521979e-06, - "loss": 0.0333, - "step": 78040 - }, - { - "epoch": 1.9810889706815584, - "grad_norm": 1.5842682123184204, - "learning_rate": 6.792740195456277e-06, - "loss": 0.0538, - "step": 78045 - }, - { - "epoch": 1.9812158903414139, - "grad_norm": 0.30790379643440247, - "learning_rate": 6.791894064390575e-06, - "loss": 0.0291, - "step": 78050 - }, - { - "epoch": 1.9813428100012693, - "grad_norm": 0.4236738681793213, - "learning_rate": 6.791047933324873e-06, - "loss": 0.0365, - "step": 78055 - }, - { - "epoch": 1.9814697296611246, - "grad_norm": 0.5662766098976135, - "learning_rate": 6.790201802259171e-06, - "loss": 0.0243, - "step": 78060 - }, - { - "epoch": 1.9815966493209798, - "grad_norm": 0.5012952089309692, - "learning_rate": 6.789355671193469e-06, - "loss": 0.0427, - "step": 78065 - }, - { - "epoch": 1.981723568980835, - "grad_norm": 0.34518319368362427, - "learning_rate": 6.788509540127767e-06, - "loss": 0.0351, - "step": 78070 - }, - { - "epoch": 1.9818504886406905, - "grad_norm": 0.9171311855316162, - "learning_rate": 6.787663409062064e-06, - "loss": 0.0435, - "step": 78075 - }, - { - "epoch": 1.981977408300546, - "grad_norm": 0.4412439167499542, - "learning_rate": 6.786817277996362e-06, - "loss": 0.0242, - "step": 78080 - }, - { - "epoch": 1.9821043279604011, - "grad_norm": 0.5514691472053528, - "learning_rate": 6.78597114693066e-06, - "loss": 0.0448, - "step": 78085 - }, - { - "epoch": 1.9822312476202564, - "grad_norm": 0.4731886088848114, - "learning_rate": 6.785125015864958e-06, - "loss": 0.0327, - "step": 78090 - }, - { - "epoch": 1.9823581672801116, - "grad_norm": 0.4285946190357208, - "learning_rate": 6.784278884799256e-06, - "loss": 0.0235, - "step": 78095 - }, - { - "epoch": 1.982485086939967, - "grad_norm": 0.29571640491485596, - "learning_rate": 6.783432753733554e-06, - "loss": 0.0237, - "step": 78100 - }, - { - "epoch": 1.9826120065998223, - "grad_norm": 0.5239272117614746, - "learning_rate": 6.782586622667852e-06, - "loss": 0.039, - "step": 78105 - }, - { - "epoch": 1.9827389262596777, - "grad_norm": 0.455178827047348, - "learning_rate": 6.78174049160215e-06, - "loss": 0.0352, - "step": 78110 - }, - { - "epoch": 1.982865845919533, - "grad_norm": 0.4374772906303406, - "learning_rate": 6.780894360536448e-06, - "loss": 0.0365, - "step": 78115 - }, - { - "epoch": 1.9829927655793882, - "grad_norm": 1.0961962938308716, - "learning_rate": 6.780048229470746e-06, - "loss": 0.0467, - "step": 78120 - }, - { - "epoch": 1.9831196852392434, - "grad_norm": 0.5485217571258545, - "learning_rate": 6.7792020984050436e-06, - "loss": 0.0458, - "step": 78125 - }, - { - "epoch": 1.9832466048990989, - "grad_norm": 0.3530927002429962, - "learning_rate": 6.778355967339342e-06, - "loss": 0.0385, - "step": 78130 - }, - { - "epoch": 1.9833735245589543, - "grad_norm": 0.39299023151397705, - "learning_rate": 6.7775098362736395e-06, - "loss": 0.0357, - "step": 78135 - }, - { - "epoch": 1.9835004442188096, - "grad_norm": 0.3092808425426483, - "learning_rate": 6.776663705207938e-06, - "loss": 0.0355, - "step": 78140 - }, - { - "epoch": 1.9836273638786648, - "grad_norm": 0.24532845616340637, - "learning_rate": 6.775817574142235e-06, - "loss": 0.021, - "step": 78145 - }, - { - "epoch": 1.98375428353852, - "grad_norm": 0.4050248861312866, - "learning_rate": 6.774971443076533e-06, - "loss": 0.0242, - "step": 78150 - }, - { - "epoch": 1.9838812031983755, - "grad_norm": 0.5188576579093933, - "learning_rate": 6.774125312010831e-06, - "loss": 0.0385, - "step": 78155 - }, - { - "epoch": 1.9840081228582307, - "grad_norm": 0.5920747518539429, - "learning_rate": 6.773279180945129e-06, - "loss": 0.0422, - "step": 78160 - }, - { - "epoch": 1.9841350425180861, - "grad_norm": 0.33228474855422974, - "learning_rate": 6.7724330498794265e-06, - "loss": 0.0314, - "step": 78165 - }, - { - "epoch": 1.9842619621779414, - "grad_norm": 0.4705577790737152, - "learning_rate": 6.771586918813725e-06, - "loss": 0.0372, - "step": 78170 - }, - { - "epoch": 1.9843888818377966, - "grad_norm": 0.28733178973197937, - "learning_rate": 6.7707407877480225e-06, - "loss": 0.0339, - "step": 78175 - }, - { - "epoch": 1.9845158014976518, - "grad_norm": 0.2695188522338867, - "learning_rate": 6.769894656682321e-06, - "loss": 0.0233, - "step": 78180 - }, - { - "epoch": 1.9846427211575073, - "grad_norm": 0.384003609418869, - "learning_rate": 6.7690485256166184e-06, - "loss": 0.0327, - "step": 78185 - }, - { - "epoch": 1.9847696408173627, - "grad_norm": 0.31456390023231506, - "learning_rate": 6.768202394550917e-06, - "loss": 0.0276, - "step": 78190 - }, - { - "epoch": 1.984896560477218, - "grad_norm": 0.3452562987804413, - "learning_rate": 6.767356263485214e-06, - "loss": 0.0257, - "step": 78195 - }, - { - "epoch": 1.9850234801370732, - "grad_norm": 0.1721457839012146, - "learning_rate": 6.766510132419513e-06, - "loss": 0.0419, - "step": 78200 - }, - { - "epoch": 1.9851503997969284, - "grad_norm": 0.4614481031894684, - "learning_rate": 6.76566400135381e-06, - "loss": 0.0415, - "step": 78205 - }, - { - "epoch": 1.9852773194567839, - "grad_norm": 0.5380414724349976, - "learning_rate": 6.764817870288109e-06, - "loss": 0.0696, - "step": 78210 - }, - { - "epoch": 1.9854042391166393, - "grad_norm": 0.13993018865585327, - "learning_rate": 6.7639717392224055e-06, - "loss": 0.0217, - "step": 78215 - }, - { - "epoch": 1.9855311587764946, - "grad_norm": 0.5560346841812134, - "learning_rate": 6.763125608156704e-06, - "loss": 0.038, - "step": 78220 - }, - { - "epoch": 1.9856580784363498, - "grad_norm": 0.6346275210380554, - "learning_rate": 6.7622794770910014e-06, - "loss": 0.028, - "step": 78225 - }, - { - "epoch": 1.985784998096205, - "grad_norm": 0.4192825257778168, - "learning_rate": 6.7614333460253e-06, - "loss": 0.0286, - "step": 78230 - }, - { - "epoch": 1.9859119177560605, - "grad_norm": 0.4012502133846283, - "learning_rate": 6.760587214959597e-06, - "loss": 0.0247, - "step": 78235 - }, - { - "epoch": 1.9860388374159157, - "grad_norm": 0.36419352889060974, - "learning_rate": 6.759741083893896e-06, - "loss": 0.0352, - "step": 78240 - }, - { - "epoch": 1.9861657570757711, - "grad_norm": 0.3872445225715637, - "learning_rate": 6.758894952828193e-06, - "loss": 0.0406, - "step": 78245 - }, - { - "epoch": 1.9862926767356264, - "grad_norm": 0.7594440579414368, - "learning_rate": 6.758048821762492e-06, - "loss": 0.044, - "step": 78250 - }, - { - "epoch": 1.9864195963954816, - "grad_norm": 1.2207934856414795, - "learning_rate": 6.757202690696789e-06, - "loss": 0.0479, - "step": 78255 - }, - { - "epoch": 1.9865465160553368, - "grad_norm": 0.6385036110877991, - "learning_rate": 6.756356559631088e-06, - "loss": 0.0477, - "step": 78260 - }, - { - "epoch": 1.9866734357151923, - "grad_norm": 0.2912749946117401, - "learning_rate": 6.755510428565385e-06, - "loss": 0.0339, - "step": 78265 - }, - { - "epoch": 1.9868003553750477, - "grad_norm": 0.21846599876880646, - "learning_rate": 6.754664297499684e-06, - "loss": 0.0362, - "step": 78270 - }, - { - "epoch": 1.986927275034903, - "grad_norm": 0.5382324457168579, - "learning_rate": 6.753818166433981e-06, - "loss": 0.0329, - "step": 78275 - }, - { - "epoch": 1.9870541946947582, - "grad_norm": 0.49905943870544434, - "learning_rate": 6.75297203536828e-06, - "loss": 0.0401, - "step": 78280 - }, - { - "epoch": 1.9871811143546134, - "grad_norm": 0.3911565840244293, - "learning_rate": 6.752125904302576e-06, - "loss": 0.0379, - "step": 78285 - }, - { - "epoch": 1.9873080340144689, - "grad_norm": 0.5789175629615784, - "learning_rate": 6.751279773236875e-06, - "loss": 0.0235, - "step": 78290 - }, - { - "epoch": 1.987434953674324, - "grad_norm": 0.48774775862693787, - "learning_rate": 6.750433642171172e-06, - "loss": 0.0345, - "step": 78295 - }, - { - "epoch": 1.9875618733341796, - "grad_norm": 0.3353675603866577, - "learning_rate": 6.749587511105471e-06, - "loss": 0.0216, - "step": 78300 - }, - { - "epoch": 1.9876887929940348, - "grad_norm": 0.4038142263889313, - "learning_rate": 6.748741380039768e-06, - "loss": 0.0213, - "step": 78305 - }, - { - "epoch": 1.98781571265389, - "grad_norm": 0.34350791573524475, - "learning_rate": 6.747895248974067e-06, - "loss": 0.0336, - "step": 78310 - }, - { - "epoch": 1.9879426323137452, - "grad_norm": 0.6062818169593811, - "learning_rate": 6.747049117908364e-06, - "loss": 0.0273, - "step": 78315 - }, - { - "epoch": 1.9880695519736007, - "grad_norm": 0.42715391516685486, - "learning_rate": 6.746202986842663e-06, - "loss": 0.0438, - "step": 78320 - }, - { - "epoch": 1.9881964716334561, - "grad_norm": 0.27827414870262146, - "learning_rate": 6.74535685577696e-06, - "loss": 0.0307, - "step": 78325 - }, - { - "epoch": 1.9883233912933114, - "grad_norm": 0.3199903964996338, - "learning_rate": 6.7445107247112586e-06, - "loss": 0.0361, - "step": 78330 - }, - { - "epoch": 1.9884503109531666, - "grad_norm": 0.6843271851539612, - "learning_rate": 6.743664593645556e-06, - "loss": 0.031, - "step": 78335 - }, - { - "epoch": 1.9885772306130218, - "grad_norm": 1.2801871299743652, - "learning_rate": 6.7428184625798545e-06, - "loss": 0.0409, - "step": 78340 - }, - { - "epoch": 1.9887041502728773, - "grad_norm": 0.5976167321205139, - "learning_rate": 6.741972331514152e-06, - "loss": 0.0558, - "step": 78345 - }, - { - "epoch": 1.9888310699327327, - "grad_norm": 0.7439396381378174, - "learning_rate": 6.7411262004484505e-06, - "loss": 0.0423, - "step": 78350 - }, - { - "epoch": 1.988957989592588, - "grad_norm": 0.516546905040741, - "learning_rate": 6.740280069382747e-06, - "loss": 0.0574, - "step": 78355 - }, - { - "epoch": 1.9890849092524432, - "grad_norm": 0.36054179072380066, - "learning_rate": 6.739433938317046e-06, - "loss": 0.027, - "step": 78360 - }, - { - "epoch": 1.9892118289122984, - "grad_norm": 0.3667360842227936, - "learning_rate": 6.738587807251343e-06, - "loss": 0.0365, - "step": 78365 - }, - { - "epoch": 1.9893387485721539, - "grad_norm": 0.6744925379753113, - "learning_rate": 6.7377416761856415e-06, - "loss": 0.0375, - "step": 78370 - }, - { - "epoch": 1.989465668232009, - "grad_norm": 1.3269729614257812, - "learning_rate": 6.736895545119939e-06, - "loss": 0.0356, - "step": 78375 - }, - { - "epoch": 1.9895925878918646, - "grad_norm": 0.5019115209579468, - "learning_rate": 6.7360494140542375e-06, - "loss": 0.0217, - "step": 78380 - }, - { - "epoch": 1.9897195075517198, - "grad_norm": 0.2592547535896301, - "learning_rate": 6.735203282988535e-06, - "loss": 0.0263, - "step": 78385 - }, - { - "epoch": 1.989846427211575, - "grad_norm": 0.7031739950180054, - "learning_rate": 6.7343571519228335e-06, - "loss": 0.0507, - "step": 78390 - }, - { - "epoch": 1.9899733468714302, - "grad_norm": 0.43913406133651733, - "learning_rate": 6.733511020857131e-06, - "loss": 0.0327, - "step": 78395 - }, - { - "epoch": 1.9901002665312857, - "grad_norm": 0.41462963819503784, - "learning_rate": 6.732664889791429e-06, - "loss": 0.0224, - "step": 78400 - }, - { - "epoch": 1.9902271861911411, - "grad_norm": 0.5339758992195129, - "learning_rate": 6.731818758725727e-06, - "loss": 0.0432, - "step": 78405 - }, - { - "epoch": 1.9903541058509964, - "grad_norm": 1.2943477630615234, - "learning_rate": 6.730972627660025e-06, - "loss": 0.0312, - "step": 78410 - }, - { - "epoch": 1.9904810255108516, - "grad_norm": 0.598518431186676, - "learning_rate": 6.730126496594322e-06, - "loss": 0.0386, - "step": 78415 - }, - { - "epoch": 1.9906079451707068, - "grad_norm": 0.4834861159324646, - "learning_rate": 6.729280365528621e-06, - "loss": 0.0175, - "step": 78420 - }, - { - "epoch": 1.9907348648305623, - "grad_norm": 0.3435346186161041, - "learning_rate": 6.728434234462918e-06, - "loss": 0.0312, - "step": 78425 - }, - { - "epoch": 1.9908617844904177, - "grad_norm": 0.2544908821582794, - "learning_rate": 6.7275881033972164e-06, - "loss": 0.0332, - "step": 78430 - }, - { - "epoch": 1.990988704150273, - "grad_norm": 0.4005286991596222, - "learning_rate": 6.726741972331514e-06, - "loss": 0.0298, - "step": 78435 - }, - { - "epoch": 1.9911156238101282, - "grad_norm": 0.4323800504207611, - "learning_rate": 6.725895841265812e-06, - "loss": 0.0496, - "step": 78440 - }, - { - "epoch": 1.9912425434699834, - "grad_norm": 0.22347351908683777, - "learning_rate": 6.72504971020011e-06, - "loss": 0.0363, - "step": 78445 - }, - { - "epoch": 1.9913694631298386, - "grad_norm": 0.4490489065647125, - "learning_rate": 6.724203579134408e-06, - "loss": 0.0256, - "step": 78450 - }, - { - "epoch": 1.991496382789694, - "grad_norm": 0.46909549832344055, - "learning_rate": 6.723357448068707e-06, - "loss": 0.0288, - "step": 78455 - }, - { - "epoch": 1.9916233024495495, - "grad_norm": 0.37474843859672546, - "learning_rate": 6.722511317003004e-06, - "loss": 0.0377, - "step": 78460 - }, - { - "epoch": 1.9917502221094048, - "grad_norm": 0.6801578402519226, - "learning_rate": 6.721665185937303e-06, - "loss": 0.0361, - "step": 78465 - }, - { - "epoch": 1.99187714176926, - "grad_norm": 0.4860595464706421, - "learning_rate": 6.7208190548716e-06, - "loss": 0.0445, - "step": 78470 - }, - { - "epoch": 1.9920040614291152, - "grad_norm": 0.3573508560657501, - "learning_rate": 6.719972923805899e-06, - "loss": 0.042, - "step": 78475 - }, - { - "epoch": 1.9921309810889707, - "grad_norm": 0.34712719917297363, - "learning_rate": 6.719126792740196e-06, - "loss": 0.0388, - "step": 78480 - }, - { - "epoch": 1.9922579007488261, - "grad_norm": 0.6539756655693054, - "learning_rate": 6.718280661674495e-06, - "loss": 0.0487, - "step": 78485 - }, - { - "epoch": 1.9923848204086814, - "grad_norm": 0.504552960395813, - "learning_rate": 6.717434530608792e-06, - "loss": 0.0266, - "step": 78490 - }, - { - "epoch": 1.9925117400685366, - "grad_norm": 0.5531230568885803, - "learning_rate": 6.7165883995430906e-06, - "loss": 0.0405, - "step": 78495 - }, - { - "epoch": 1.9926386597283918, - "grad_norm": 0.3999694585800171, - "learning_rate": 6.715742268477387e-06, - "loss": 0.0308, - "step": 78500 - }, - { - "epoch": 1.9927655793882473, - "grad_norm": 0.5262141823768616, - "learning_rate": 6.7148961374116865e-06, - "loss": 0.0335, - "step": 78505 - }, - { - "epoch": 1.9928924990481025, - "grad_norm": 0.3030613660812378, - "learning_rate": 6.714050006345983e-06, - "loss": 0.0424, - "step": 78510 - }, - { - "epoch": 1.993019418707958, - "grad_norm": 0.6238569021224976, - "learning_rate": 6.713203875280282e-06, - "loss": 0.0411, - "step": 78515 - }, - { - "epoch": 1.9931463383678132, - "grad_norm": 0.43626508116722107, - "learning_rate": 6.712357744214579e-06, - "loss": 0.0448, - "step": 78520 - }, - { - "epoch": 1.9932732580276684, - "grad_norm": 0.288548082113266, - "learning_rate": 6.711511613148878e-06, - "loss": 0.0288, - "step": 78525 - }, - { - "epoch": 1.9934001776875236, - "grad_norm": 0.4548340439796448, - "learning_rate": 6.710665482083175e-06, - "loss": 0.0507, - "step": 78530 - }, - { - "epoch": 1.993527097347379, - "grad_norm": 0.3028653860092163, - "learning_rate": 6.7098193510174736e-06, - "loss": 0.0358, - "step": 78535 - }, - { - "epoch": 1.9936540170072345, - "grad_norm": 0.36247673630714417, - "learning_rate": 6.708973219951771e-06, - "loss": 0.0354, - "step": 78540 - }, - { - "epoch": 1.9937809366670898, - "grad_norm": 0.15340466797351837, - "learning_rate": 6.7081270888860695e-06, - "loss": 0.0154, - "step": 78545 - }, - { - "epoch": 1.993907856326945, - "grad_norm": 0.4914100766181946, - "learning_rate": 6.707280957820367e-06, - "loss": 0.0313, - "step": 78550 - }, - { - "epoch": 1.9940347759868002, - "grad_norm": 0.40737587213516235, - "learning_rate": 6.7064348267546655e-06, - "loss": 0.044, - "step": 78555 - }, - { - "epoch": 1.9941616956466557, - "grad_norm": 0.7601488828659058, - "learning_rate": 6.705588695688963e-06, - "loss": 0.0415, - "step": 78560 - }, - { - "epoch": 1.9942886153065111, - "grad_norm": 0.29816675186157227, - "learning_rate": 6.7047425646232614e-06, - "loss": 0.0251, - "step": 78565 - }, - { - "epoch": 1.9944155349663664, - "grad_norm": 0.45963940024375916, - "learning_rate": 6.703896433557558e-06, - "loss": 0.0499, - "step": 78570 - }, - { - "epoch": 1.9945424546262216, - "grad_norm": 0.32008254528045654, - "learning_rate": 6.7030503024918565e-06, - "loss": 0.0195, - "step": 78575 - }, - { - "epoch": 1.9946693742860768, - "grad_norm": 0.7205250263214111, - "learning_rate": 6.702204171426154e-06, - "loss": 0.0392, - "step": 78580 - }, - { - "epoch": 1.9947962939459323, - "grad_norm": 0.3339502215385437, - "learning_rate": 6.7013580403604525e-06, - "loss": 0.0177, - "step": 78585 - }, - { - "epoch": 1.9949232136057875, - "grad_norm": 0.5905521512031555, - "learning_rate": 6.70051190929475e-06, - "loss": 0.0429, - "step": 78590 - }, - { - "epoch": 1.995050133265643, - "grad_norm": 0.5579733848571777, - "learning_rate": 6.6996657782290485e-06, - "loss": 0.0282, - "step": 78595 - }, - { - "epoch": 1.9951770529254982, - "grad_norm": 0.519138753414154, - "learning_rate": 6.698819647163346e-06, - "loss": 0.032, - "step": 78600 - }, - { - "epoch": 1.9953039725853534, - "grad_norm": 0.2945477068424225, - "learning_rate": 6.697973516097644e-06, - "loss": 0.035, - "step": 78605 - }, - { - "epoch": 1.9954308922452086, - "grad_norm": 0.4025100767612457, - "learning_rate": 6.697127385031942e-06, - "loss": 0.0547, - "step": 78610 - }, - { - "epoch": 1.995557811905064, - "grad_norm": 0.39372217655181885, - "learning_rate": 6.69628125396624e-06, - "loss": 0.0306, - "step": 78615 - }, - { - "epoch": 1.9956847315649195, - "grad_norm": 0.5421264171600342, - "learning_rate": 6.695435122900538e-06, - "loss": 0.0581, - "step": 78620 - }, - { - "epoch": 1.9958116512247748, - "grad_norm": 0.40098100900650024, - "learning_rate": 6.694588991834836e-06, - "loss": 0.0388, - "step": 78625 - }, - { - "epoch": 1.99593857088463, - "grad_norm": 0.31377682089805603, - "learning_rate": 6.693742860769134e-06, - "loss": 0.0302, - "step": 78630 - }, - { - "epoch": 1.9960654905444852, - "grad_norm": 0.3539169430732727, - "learning_rate": 6.692896729703432e-06, - "loss": 0.0237, - "step": 78635 - }, - { - "epoch": 1.9961924102043407, - "grad_norm": 0.5098527073860168, - "learning_rate": 6.692050598637729e-06, - "loss": 0.0333, - "step": 78640 - }, - { - "epoch": 1.996319329864196, - "grad_norm": 0.6698564291000366, - "learning_rate": 6.691204467572027e-06, - "loss": 0.0407, - "step": 78645 - }, - { - "epoch": 1.9964462495240514, - "grad_norm": 0.582912266254425, - "learning_rate": 6.690358336506325e-06, - "loss": 0.0216, - "step": 78650 - }, - { - "epoch": 1.9965731691839066, - "grad_norm": 0.47408366203308105, - "learning_rate": 6.689512205440623e-06, - "loss": 0.0415, - "step": 78655 - }, - { - "epoch": 1.9967000888437618, - "grad_norm": 0.4388082027435303, - "learning_rate": 6.688666074374921e-06, - "loss": 0.0353, - "step": 78660 - }, - { - "epoch": 1.996827008503617, - "grad_norm": 0.4226011633872986, - "learning_rate": 6.687819943309219e-06, - "loss": 0.0254, - "step": 78665 - }, - { - "epoch": 1.9969539281634725, - "grad_norm": 0.285794734954834, - "learning_rate": 6.686973812243517e-06, - "loss": 0.0226, - "step": 78670 - }, - { - "epoch": 1.997080847823328, - "grad_norm": 0.43846040964126587, - "learning_rate": 6.686127681177815e-06, - "loss": 0.029, - "step": 78675 - }, - { - "epoch": 1.9972077674831832, - "grad_norm": 0.5071697235107422, - "learning_rate": 6.685281550112113e-06, - "loss": 0.0341, - "step": 78680 - }, - { - "epoch": 1.9973346871430384, - "grad_norm": 0.7531111836433411, - "learning_rate": 6.684435419046411e-06, - "loss": 0.0379, - "step": 78685 - }, - { - "epoch": 1.9974616068028936, - "grad_norm": 0.27982428669929504, - "learning_rate": 6.683589287980709e-06, - "loss": 0.0325, - "step": 78690 - }, - { - "epoch": 1.997588526462749, - "grad_norm": 0.5837037563323975, - "learning_rate": 6.682743156915007e-06, - "loss": 0.0298, - "step": 78695 - }, - { - "epoch": 1.9977154461226045, - "grad_norm": 0.47883695363998413, - "learning_rate": 6.681897025849305e-06, - "loss": 0.0633, - "step": 78700 - }, - { - "epoch": 1.9978423657824598, - "grad_norm": 0.42451727390289307, - "learning_rate": 6.681050894783603e-06, - "loss": 0.0402, - "step": 78705 - }, - { - "epoch": 1.997969285442315, - "grad_norm": 0.5400234460830688, - "learning_rate": 6.6802047637179e-06, - "loss": 0.0299, - "step": 78710 - }, - { - "epoch": 1.9980962051021702, - "grad_norm": 0.6590980887413025, - "learning_rate": 6.679358632652198e-06, - "loss": 0.0332, - "step": 78715 - }, - { - "epoch": 1.9982231247620257, - "grad_norm": 0.4216865599155426, - "learning_rate": 6.678512501586496e-06, - "loss": 0.0234, - "step": 78720 - }, - { - "epoch": 1.998350044421881, - "grad_norm": 0.6257888078689575, - "learning_rate": 6.677666370520794e-06, - "loss": 0.0624, - "step": 78725 - }, - { - "epoch": 1.9984769640817364, - "grad_norm": 0.21655352413654327, - "learning_rate": 6.676820239455092e-06, - "loss": 0.025, - "step": 78730 - }, - { - "epoch": 1.9986038837415916, - "grad_norm": 0.5635204911231995, - "learning_rate": 6.67597410838939e-06, - "loss": 0.0384, - "step": 78735 - }, - { - "epoch": 1.9987308034014468, - "grad_norm": 0.5117113590240479, - "learning_rate": 6.675127977323688e-06, - "loss": 0.0434, - "step": 78740 - }, - { - "epoch": 1.998857723061302, - "grad_norm": 0.35583871603012085, - "learning_rate": 6.674281846257986e-06, - "loss": 0.0347, - "step": 78745 - }, - { - "epoch": 1.9989846427211575, - "grad_norm": 0.36839020252227783, - "learning_rate": 6.673435715192284e-06, - "loss": 0.0197, - "step": 78750 - }, - { - "epoch": 1.999111562381013, - "grad_norm": 0.40340954065322876, - "learning_rate": 6.672589584126582e-06, - "loss": 0.0345, - "step": 78755 - }, - { - "epoch": 1.9992384820408682, - "grad_norm": 0.7574951648712158, - "learning_rate": 6.67174345306088e-06, - "loss": 0.0289, - "step": 78760 - }, - { - "epoch": 1.9993654017007234, - "grad_norm": 0.5881302952766418, - "learning_rate": 6.670897321995178e-06, - "loss": 0.0307, - "step": 78765 - }, - { - "epoch": 1.9994923213605786, - "grad_norm": 0.23537391424179077, - "learning_rate": 6.670051190929476e-06, - "loss": 0.0331, - "step": 78770 - }, - { - "epoch": 1.999619241020434, - "grad_norm": 0.26379498839378357, - "learning_rate": 6.669205059863774e-06, - "loss": 0.0198, - "step": 78775 - }, - { - "epoch": 1.9997461606802895, - "grad_norm": 1.0597769021987915, - "learning_rate": 6.668358928798071e-06, - "loss": 0.0393, - "step": 78780 - }, - { - "epoch": 1.9998730803401448, - "grad_norm": 0.4701576828956604, - "learning_rate": 6.667512797732369e-06, - "loss": 0.029, - "step": 78785 - }, - { - "epoch": 2.0, - "grad_norm": 0.6298708915710449, - "learning_rate": 6.666666666666667e-06, - "loss": 0.0346, - "step": 78790 - }, - { - "epoch": 2.0, - "eval_loss": 0.22474703192710876, - "eval_runtime": 1244.7903, - "eval_samples_per_second": 100.419, - "eval_steps_per_second": 6.277, - "step": 78790 + "epoch": 1.0, + "eval_loss": 0.160739004611969, + "eval_runtime": 1784.4225, + "eval_samples_per_second": 70.051, + "eval_steps_per_second": 2.19, + "step": 19699 } ], "logging_steps": 5, - "max_steps": 118185, + "max_steps": 59097, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, @@ -110348,8 +27607,8 @@ "attributes": {} } }, - "total_flos": 3.778913788703539e+17, - "train_batch_size": 16, + "total_flos": 3.7791296170072474e+17, + "train_batch_size": 32, "trial_name": null, "trial_params": null }