{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.964806124969128, "eval_steps": 500, "global_step": 10120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.000395159298592245, "grad_norm": 45.00149749159182, "learning_rate": 1.0638297872340427e-08, "loss": 5.3324, "step": 1 }, { "epoch": 0.00079031859718449, "grad_norm": 47.65956613810862, "learning_rate": 2.1276595744680853e-08, "loss": 5.4979, "step": 2 }, { "epoch": 0.001185477895776735, "grad_norm": 44.18618860656718, "learning_rate": 3.191489361702128e-08, "loss": 5.1904, "step": 3 }, { "epoch": 0.00158063719436898, "grad_norm": 46.48562838932194, "learning_rate": 4.2553191489361707e-08, "loss": 5.3019, "step": 4 }, { "epoch": 0.001975796492961225, "grad_norm": 46.390295089186544, "learning_rate": 5.319148936170213e-08, "loss": 5.3445, "step": 5 }, { "epoch": 0.00237095579155347, "grad_norm": 46.76761442399584, "learning_rate": 6.382978723404255e-08, "loss": 5.3375, "step": 6 }, { "epoch": 0.002766115090145715, "grad_norm": 48.279218165562085, "learning_rate": 7.446808510638299e-08, "loss": 5.3938, "step": 7 }, { "epoch": 0.00316127438873796, "grad_norm": 49.205312835168, "learning_rate": 8.510638297872341e-08, "loss": 5.4594, "step": 8 }, { "epoch": 0.003556433687330205, "grad_norm": 46.56040077093359, "learning_rate": 9.574468085106384e-08, "loss": 5.3369, "step": 9 }, { "epoch": 0.00395159298592245, "grad_norm": 45.73670653221075, "learning_rate": 1.0638297872340426e-07, "loss": 5.28, "step": 10 }, { "epoch": 0.004346752284514695, "grad_norm": 45.65613647161846, "learning_rate": 1.1702127659574468e-07, "loss": 5.4326, "step": 11 }, { "epoch": 0.00474191158310694, "grad_norm": 48.50884423593061, "learning_rate": 1.276595744680851e-07, "loss": 5.4455, "step": 12 }, { "epoch": 0.005137070881699185, "grad_norm": 48.08180573773191, "learning_rate": 1.3829787234042553e-07, "loss": 5.5208, "step": 13 }, { "epoch": 0.00553223018029143, "grad_norm": 45.43412625685486, "learning_rate": 1.4893617021276598e-07, "loss": 5.3535, "step": 14 }, { "epoch": 0.005927389478883675, "grad_norm": 46.86389365193829, "learning_rate": 1.5957446808510638e-07, "loss": 5.3197, "step": 15 }, { "epoch": 0.00632254877747592, "grad_norm": 44.6253221018194, "learning_rate": 1.7021276595744683e-07, "loss": 5.4598, "step": 16 }, { "epoch": 0.006717708076068165, "grad_norm": 45.02802440676611, "learning_rate": 1.8085106382978722e-07, "loss": 5.1813, "step": 17 }, { "epoch": 0.00711286737466041, "grad_norm": 44.458003741695514, "learning_rate": 1.9148936170212767e-07, "loss": 5.299, "step": 18 }, { "epoch": 0.007508026673252655, "grad_norm": 42.134533089902625, "learning_rate": 2.0212765957446812e-07, "loss": 5.0162, "step": 19 }, { "epoch": 0.0079031859718449, "grad_norm": 45.3211377446349, "learning_rate": 2.1276595744680852e-07, "loss": 5.3317, "step": 20 }, { "epoch": 0.008298345270437145, "grad_norm": 39.99349721590117, "learning_rate": 2.2340425531914897e-07, "loss": 5.1135, "step": 21 }, { "epoch": 0.00869350456902939, "grad_norm": 38.01803171664136, "learning_rate": 2.3404255319148937e-07, "loss": 5.1482, "step": 22 }, { "epoch": 0.009088663867621635, "grad_norm": 39.354061184796315, "learning_rate": 2.446808510638298e-07, "loss": 5.2229, "step": 23 }, { "epoch": 0.00948382316621388, "grad_norm": 40.80324954040139, "learning_rate": 2.553191489361702e-07, "loss": 5.2, "step": 24 }, { "epoch": 0.009878982464806126, "grad_norm": 38.10588841551195, "learning_rate": 2.6595744680851066e-07, "loss": 5.0376, "step": 25 }, { "epoch": 0.01027414176339837, "grad_norm": 38.040044463686115, "learning_rate": 2.7659574468085106e-07, "loss": 5.234, "step": 26 }, { "epoch": 0.010669301061990615, "grad_norm": 38.93658315055219, "learning_rate": 2.872340425531915e-07, "loss": 5.2858, "step": 27 }, { "epoch": 0.01106446036058286, "grad_norm": 27.7819822121004, "learning_rate": 2.9787234042553196e-07, "loss": 4.8375, "step": 28 }, { "epoch": 0.011459619659175104, "grad_norm": 27.368335853965625, "learning_rate": 3.0851063829787236e-07, "loss": 4.9516, "step": 29 }, { "epoch": 0.01185477895776735, "grad_norm": 27.250102842852968, "learning_rate": 3.1914893617021275e-07, "loss": 4.926, "step": 30 }, { "epoch": 0.012249938256359595, "grad_norm": 26.50459159867008, "learning_rate": 3.297872340425532e-07, "loss": 4.8223, "step": 31 }, { "epoch": 0.01264509755495184, "grad_norm": 27.513101530119382, "learning_rate": 3.4042553191489365e-07, "loss": 5.0109, "step": 32 }, { "epoch": 0.013040256853544084, "grad_norm": 26.6602907635092, "learning_rate": 3.510638297872341e-07, "loss": 4.8947, "step": 33 }, { "epoch": 0.01343541615213633, "grad_norm": 23.204176582249318, "learning_rate": 3.6170212765957445e-07, "loss": 4.6691, "step": 34 }, { "epoch": 0.013830575450728575, "grad_norm": 24.66170575750984, "learning_rate": 3.723404255319149e-07, "loss": 4.9299, "step": 35 }, { "epoch": 0.01422573474932082, "grad_norm": 24.263834716366237, "learning_rate": 3.8297872340425535e-07, "loss": 4.9347, "step": 36 }, { "epoch": 0.014620894047913065, "grad_norm": 23.965936128051734, "learning_rate": 3.936170212765958e-07, "loss": 4.8474, "step": 37 }, { "epoch": 0.01501605334650531, "grad_norm": 18.157336850120906, "learning_rate": 4.0425531914893625e-07, "loss": 4.6366, "step": 38 }, { "epoch": 0.015411212645097556, "grad_norm": 17.163104051613317, "learning_rate": 4.148936170212766e-07, "loss": 4.5056, "step": 39 }, { "epoch": 0.0158063719436898, "grad_norm": 18.14853573769075, "learning_rate": 4.2553191489361704e-07, "loss": 4.5394, "step": 40 }, { "epoch": 0.016201531242282043, "grad_norm": 17.031326941596244, "learning_rate": 4.361702127659575e-07, "loss": 4.5494, "step": 41 }, { "epoch": 0.01659669054087429, "grad_norm": 16.530898546137706, "learning_rate": 4.4680851063829794e-07, "loss": 4.3963, "step": 42 }, { "epoch": 0.016991849839466534, "grad_norm": 15.10305267365516, "learning_rate": 4.574468085106383e-07, "loss": 4.3983, "step": 43 }, { "epoch": 0.01738700913805878, "grad_norm": 15.265493803692115, "learning_rate": 4.6808510638297873e-07, "loss": 4.3438, "step": 44 }, { "epoch": 0.017782168436651025, "grad_norm": 14.891772118131469, "learning_rate": 4.787234042553192e-07, "loss": 4.356, "step": 45 }, { "epoch": 0.01817732773524327, "grad_norm": 14.448417003857594, "learning_rate": 4.893617021276596e-07, "loss": 4.3505, "step": 46 }, { "epoch": 0.018572487033835516, "grad_norm": 14.013807338161678, "learning_rate": 5.000000000000001e-07, "loss": 4.3902, "step": 47 }, { "epoch": 0.01896764633242776, "grad_norm": 12.903706951798439, "learning_rate": 5.106382978723404e-07, "loss": 4.2129, "step": 48 }, { "epoch": 0.019362805631020004, "grad_norm": 12.74392330179218, "learning_rate": 5.212765957446809e-07, "loss": 4.2245, "step": 49 }, { "epoch": 0.01975796492961225, "grad_norm": 11.409859418409809, "learning_rate": 5.319148936170213e-07, "loss": 4.0668, "step": 50 }, { "epoch": 0.020153124228204495, "grad_norm": 12.382991509917876, "learning_rate": 5.425531914893618e-07, "loss": 4.0922, "step": 51 }, { "epoch": 0.02054828352679674, "grad_norm": 11.858955825560276, "learning_rate": 5.531914893617021e-07, "loss": 4.0067, "step": 52 }, { "epoch": 0.020943442825388986, "grad_norm": 12.68072624432379, "learning_rate": 5.638297872340426e-07, "loss": 4.0186, "step": 53 }, { "epoch": 0.02133860212398123, "grad_norm": 10.958508064800348, "learning_rate": 5.74468085106383e-07, "loss": 3.873, "step": 54 }, { "epoch": 0.021733761422573473, "grad_norm": 10.842426212742, "learning_rate": 5.851063829787235e-07, "loss": 3.9817, "step": 55 }, { "epoch": 0.02212892072116572, "grad_norm": 10.471356929993854, "learning_rate": 5.957446808510639e-07, "loss": 3.9189, "step": 56 }, { "epoch": 0.022524080019757965, "grad_norm": 9.522689670857774, "learning_rate": 6.063829787234043e-07, "loss": 3.7485, "step": 57 }, { "epoch": 0.02291923931835021, "grad_norm": 9.89336870076738, "learning_rate": 6.170212765957447e-07, "loss": 3.7502, "step": 58 }, { "epoch": 0.023314398616942456, "grad_norm": 10.12284131423492, "learning_rate": 6.276595744680851e-07, "loss": 3.784, "step": 59 }, { "epoch": 0.0237095579155347, "grad_norm": 9.549611583907577, "learning_rate": 6.382978723404255e-07, "loss": 3.7651, "step": 60 }, { "epoch": 0.024104717214126947, "grad_norm": 9.025126475730096, "learning_rate": 6.48936170212766e-07, "loss": 3.6512, "step": 61 }, { "epoch": 0.02449987651271919, "grad_norm": 8.597629129461852, "learning_rate": 6.595744680851064e-07, "loss": 3.7376, "step": 62 }, { "epoch": 0.024895035811311434, "grad_norm": 8.345445781598853, "learning_rate": 6.702127659574469e-07, "loss": 3.4983, "step": 63 }, { "epoch": 0.02529019510990368, "grad_norm": 7.990649600345105, "learning_rate": 6.808510638297873e-07, "loss": 3.5068, "step": 64 }, { "epoch": 0.025685354408495925, "grad_norm": 7.513578806398596, "learning_rate": 6.914893617021278e-07, "loss": 3.5573, "step": 65 }, { "epoch": 0.02608051370708817, "grad_norm": 7.9623181239477105, "learning_rate": 7.021276595744682e-07, "loss": 3.5052, "step": 66 }, { "epoch": 0.026475673005680416, "grad_norm": 7.637547791371253, "learning_rate": 7.127659574468087e-07, "loss": 3.4709, "step": 67 }, { "epoch": 0.02687083230427266, "grad_norm": 8.558533087051865, "learning_rate": 7.234042553191489e-07, "loss": 3.3483, "step": 68 }, { "epoch": 0.027265991602864904, "grad_norm": 8.199113740225883, "learning_rate": 7.340425531914893e-07, "loss": 3.2996, "step": 69 }, { "epoch": 0.02766115090145715, "grad_norm": 8.450241754459654, "learning_rate": 7.446808510638298e-07, "loss": 3.1916, "step": 70 }, { "epoch": 0.028056310200049395, "grad_norm": 7.103052632657081, "learning_rate": 7.553191489361702e-07, "loss": 3.165, "step": 71 }, { "epoch": 0.02845146949864164, "grad_norm": 7.397343059451777, "learning_rate": 7.659574468085107e-07, "loss": 3.1549, "step": 72 }, { "epoch": 0.028846628797233886, "grad_norm": 7.532992807551275, "learning_rate": 7.765957446808511e-07, "loss": 3.0944, "step": 73 }, { "epoch": 0.02924178809582613, "grad_norm": 6.784491281435092, "learning_rate": 7.872340425531916e-07, "loss": 3.0294, "step": 74 }, { "epoch": 0.029636947394418373, "grad_norm": 7.083810212959109, "learning_rate": 7.97872340425532e-07, "loss": 3.0021, "step": 75 }, { "epoch": 0.03003210669301062, "grad_norm": 6.443669804960258, "learning_rate": 8.085106382978725e-07, "loss": 2.9301, "step": 76 }, { "epoch": 0.030427265991602864, "grad_norm": 6.650880145536403, "learning_rate": 8.191489361702127e-07, "loss": 2.9397, "step": 77 }, { "epoch": 0.03082242529019511, "grad_norm": 6.36715615120205, "learning_rate": 8.297872340425532e-07, "loss": 2.8843, "step": 78 }, { "epoch": 0.031217584588787355, "grad_norm": 6.238954181171991, "learning_rate": 8.404255319148936e-07, "loss": 2.7895, "step": 79 }, { "epoch": 0.0316127438873796, "grad_norm": 6.232900546667891, "learning_rate": 8.510638297872341e-07, "loss": 2.8601, "step": 80 }, { "epoch": 0.032007903185971846, "grad_norm": 5.726543181847973, "learning_rate": 8.617021276595745e-07, "loss": 2.6892, "step": 81 }, { "epoch": 0.03240306248456409, "grad_norm": 5.502730599732169, "learning_rate": 8.72340425531915e-07, "loss": 2.6352, "step": 82 }, { "epoch": 0.032798221783156334, "grad_norm": 5.739593573701677, "learning_rate": 8.829787234042554e-07, "loss": 2.6015, "step": 83 }, { "epoch": 0.03319338108174858, "grad_norm": 5.6507901978379955, "learning_rate": 8.936170212765959e-07, "loss": 2.5505, "step": 84 }, { "epoch": 0.03358854038034083, "grad_norm": 5.183242056909587, "learning_rate": 9.042553191489363e-07, "loss": 2.5001, "step": 85 }, { "epoch": 0.03398369967893307, "grad_norm": 5.362205769595081, "learning_rate": 9.148936170212766e-07, "loss": 2.5267, "step": 86 }, { "epoch": 0.034378858977525316, "grad_norm": 4.811942843640751, "learning_rate": 9.25531914893617e-07, "loss": 2.3826, "step": 87 }, { "epoch": 0.03477401827611756, "grad_norm": 4.904542737672584, "learning_rate": 9.361702127659575e-07, "loss": 2.4234, "step": 88 }, { "epoch": 0.0351691775747098, "grad_norm": 4.870107420594597, "learning_rate": 9.468085106382979e-07, "loss": 2.321, "step": 89 }, { "epoch": 0.03556433687330205, "grad_norm": 4.581596683493078, "learning_rate": 9.574468085106384e-07, "loss": 2.2948, "step": 90 }, { "epoch": 0.0359594961718943, "grad_norm": 4.661567248806833, "learning_rate": 9.680851063829788e-07, "loss": 2.2851, "step": 91 }, { "epoch": 0.03635465547048654, "grad_norm": 4.1499647842616385, "learning_rate": 9.787234042553193e-07, "loss": 2.2358, "step": 92 }, { "epoch": 0.036749814769078785, "grad_norm": 4.064054364491757, "learning_rate": 9.893617021276597e-07, "loss": 2.2021, "step": 93 }, { "epoch": 0.03714497406767103, "grad_norm": 4.019497224170159, "learning_rate": 1.0000000000000002e-06, "loss": 2.1558, "step": 94 }, { "epoch": 0.03754013336626327, "grad_norm": 3.7739064116455254, "learning_rate": 1.0106382978723404e-06, "loss": 2.145, "step": 95 }, { "epoch": 0.03793529266485552, "grad_norm": 3.587043554533132, "learning_rate": 1.0212765957446809e-06, "loss": 2.0649, "step": 96 }, { "epoch": 0.03833045196344777, "grad_norm": 3.5425961325072732, "learning_rate": 1.0319148936170213e-06, "loss": 2.0494, "step": 97 }, { "epoch": 0.03872561126204001, "grad_norm": 3.4793118680889914, "learning_rate": 1.0425531914893618e-06, "loss": 1.9564, "step": 98 }, { "epoch": 0.039120770560632255, "grad_norm": 3.480928663874862, "learning_rate": 1.0531914893617022e-06, "loss": 1.9516, "step": 99 }, { "epoch": 0.0395159298592245, "grad_norm": 3.4270001249424427, "learning_rate": 1.0638297872340427e-06, "loss": 1.943, "step": 100 }, { "epoch": 0.03991108915781674, "grad_norm": 3.142501786484995, "learning_rate": 1.074468085106383e-06, "loss": 1.858, "step": 101 }, { "epoch": 0.04030624845640899, "grad_norm": 3.2413282091965376, "learning_rate": 1.0851063829787236e-06, "loss": 1.8773, "step": 102 }, { "epoch": 0.04070140775500124, "grad_norm": 3.2180251637134223, "learning_rate": 1.095744680851064e-06, "loss": 1.7858, "step": 103 }, { "epoch": 0.04109656705359348, "grad_norm": 3.0827158001171715, "learning_rate": 1.1063829787234042e-06, "loss": 1.8103, "step": 104 }, { "epoch": 0.041491726352185725, "grad_norm": 3.0156067280676284, "learning_rate": 1.1170212765957447e-06, "loss": 1.7703, "step": 105 }, { "epoch": 0.04188688565077797, "grad_norm": 3.1564655772648536, "learning_rate": 1.1276595744680851e-06, "loss": 1.7584, "step": 106 }, { "epoch": 0.04228204494937021, "grad_norm": 2.8464417493034384, "learning_rate": 1.1382978723404256e-06, "loss": 1.7098, "step": 107 }, { "epoch": 0.04267720424796246, "grad_norm": 2.979579920610495, "learning_rate": 1.148936170212766e-06, "loss": 1.6593, "step": 108 }, { "epoch": 0.04307236354655471, "grad_norm": 2.7758712165539996, "learning_rate": 1.1595744680851065e-06, "loss": 1.6563, "step": 109 }, { "epoch": 0.04346752284514695, "grad_norm": 2.5891620159779016, "learning_rate": 1.170212765957447e-06, "loss": 1.6248, "step": 110 }, { "epoch": 0.043862682143739194, "grad_norm": 2.4867507899845473, "learning_rate": 1.1808510638297874e-06, "loss": 1.5999, "step": 111 }, { "epoch": 0.04425784144233144, "grad_norm": 2.411241253335211, "learning_rate": 1.1914893617021278e-06, "loss": 1.6233, "step": 112 }, { "epoch": 0.04465300074092368, "grad_norm": 2.675802892730842, "learning_rate": 1.202127659574468e-06, "loss": 1.5911, "step": 113 }, { "epoch": 0.04504816003951593, "grad_norm": 2.4484713846419783, "learning_rate": 1.2127659574468085e-06, "loss": 1.552, "step": 114 }, { "epoch": 0.045443319338108176, "grad_norm": 2.126895225481791, "learning_rate": 1.223404255319149e-06, "loss": 1.5116, "step": 115 }, { "epoch": 0.04583847863670042, "grad_norm": 2.108035496774095, "learning_rate": 1.2340425531914894e-06, "loss": 1.5332, "step": 116 }, { "epoch": 0.046233637935292664, "grad_norm": 2.170204737476494, "learning_rate": 1.2446808510638299e-06, "loss": 1.5099, "step": 117 }, { "epoch": 0.04662879723388491, "grad_norm": 1.9897269791458945, "learning_rate": 1.2553191489361701e-06, "loss": 1.4747, "step": 118 }, { "epoch": 0.04702395653247716, "grad_norm": 1.924882132114436, "learning_rate": 1.2659574468085106e-06, "loss": 1.4877, "step": 119 }, { "epoch": 0.0474191158310694, "grad_norm": 2.1236023483825934, "learning_rate": 1.276595744680851e-06, "loss": 1.4593, "step": 120 }, { "epoch": 0.047814275129661646, "grad_norm": 1.8555875106493023, "learning_rate": 1.2872340425531915e-06, "loss": 1.3795, "step": 121 }, { "epoch": 0.04820943442825389, "grad_norm": 1.9400614964548064, "learning_rate": 1.297872340425532e-06, "loss": 1.404, "step": 122 }, { "epoch": 0.04860459372684613, "grad_norm": 1.6699536554778585, "learning_rate": 1.3085106382978724e-06, "loss": 1.3696, "step": 123 }, { "epoch": 0.04899975302543838, "grad_norm": 1.689620342600094, "learning_rate": 1.3191489361702128e-06, "loss": 1.344, "step": 124 }, { "epoch": 0.04939491232403063, "grad_norm": 1.7509004836307809, "learning_rate": 1.3297872340425533e-06, "loss": 1.3462, "step": 125 }, { "epoch": 0.04979007162262287, "grad_norm": 1.7664945294015308, "learning_rate": 1.3404255319148937e-06, "loss": 1.3477, "step": 126 }, { "epoch": 0.050185230921215115, "grad_norm": 2.084264973245835, "learning_rate": 1.3510638297872342e-06, "loss": 1.3218, "step": 127 }, { "epoch": 0.05058039021980736, "grad_norm": 1.5965337608217485, "learning_rate": 1.3617021276595746e-06, "loss": 1.3084, "step": 128 }, { "epoch": 0.0509755495183996, "grad_norm": 1.619614456507732, "learning_rate": 1.372340425531915e-06, "loss": 1.2938, "step": 129 }, { "epoch": 0.05137070881699185, "grad_norm": 1.8247931156223327, "learning_rate": 1.3829787234042555e-06, "loss": 1.2944, "step": 130 }, { "epoch": 0.0517658681155841, "grad_norm": 1.580039976604606, "learning_rate": 1.393617021276596e-06, "loss": 1.2777, "step": 131 }, { "epoch": 0.05216102741417634, "grad_norm": 1.509843143895781, "learning_rate": 1.4042553191489364e-06, "loss": 1.2841, "step": 132 }, { "epoch": 0.052556186712768585, "grad_norm": 1.6756920394271753, "learning_rate": 1.4148936170212769e-06, "loss": 1.2419, "step": 133 }, { "epoch": 0.05295134601136083, "grad_norm": 1.4623629043957789, "learning_rate": 1.4255319148936173e-06, "loss": 1.2077, "step": 134 }, { "epoch": 0.05334650530995307, "grad_norm": 1.383626312375799, "learning_rate": 1.4361702127659578e-06, "loss": 1.2265, "step": 135 }, { "epoch": 0.05374166460854532, "grad_norm": 1.7925919805177952, "learning_rate": 1.4468085106382978e-06, "loss": 1.2186, "step": 136 }, { "epoch": 0.05413682390713757, "grad_norm": 1.3738854790506727, "learning_rate": 1.4574468085106382e-06, "loss": 1.2159, "step": 137 }, { "epoch": 0.05453198320572981, "grad_norm": 1.4567831484040439, "learning_rate": 1.4680851063829787e-06, "loss": 1.1814, "step": 138 }, { "epoch": 0.054927142504322055, "grad_norm": 1.3546122882454086, "learning_rate": 1.4787234042553191e-06, "loss": 1.1519, "step": 139 }, { "epoch": 0.0553223018029143, "grad_norm": 1.3541293229496851, "learning_rate": 1.4893617021276596e-06, "loss": 1.1625, "step": 140 }, { "epoch": 0.05571746110150654, "grad_norm": 1.5328490619353512, "learning_rate": 1.5e-06, "loss": 1.1564, "step": 141 }, { "epoch": 0.05611262040009879, "grad_norm": 1.2335978296031311, "learning_rate": 1.5106382978723405e-06, "loss": 1.1294, "step": 142 }, { "epoch": 0.05650777969869104, "grad_norm": 1.281543201298622, "learning_rate": 1.521276595744681e-06, "loss": 1.1539, "step": 143 }, { "epoch": 0.05690293899728328, "grad_norm": 1.1347832342130402, "learning_rate": 1.5319148936170214e-06, "loss": 1.1475, "step": 144 }, { "epoch": 0.057298098295875524, "grad_norm": 1.272401514207382, "learning_rate": 1.5425531914893618e-06, "loss": 1.1062, "step": 145 }, { "epoch": 0.05769325759446777, "grad_norm": 1.1113360919213993, "learning_rate": 1.5531914893617023e-06, "loss": 1.0904, "step": 146 }, { "epoch": 0.05808841689306001, "grad_norm": 1.2143209476879704, "learning_rate": 1.5638297872340427e-06, "loss": 1.1221, "step": 147 }, { "epoch": 0.05848357619165226, "grad_norm": 1.1975123411456277, "learning_rate": 1.5744680851063832e-06, "loss": 1.1134, "step": 148 }, { "epoch": 0.058878735490244506, "grad_norm": 1.1752282768767688, "learning_rate": 1.5851063829787236e-06, "loss": 1.1073, "step": 149 }, { "epoch": 0.059273894788836747, "grad_norm": 1.1221206874706884, "learning_rate": 1.595744680851064e-06, "loss": 1.0863, "step": 150 }, { "epoch": 0.059669054087428994, "grad_norm": 1.0719072351198553, "learning_rate": 1.6063829787234045e-06, "loss": 1.087, "step": 151 }, { "epoch": 0.06006421338602124, "grad_norm": 1.0618749015123967, "learning_rate": 1.617021276595745e-06, "loss": 1.0724, "step": 152 }, { "epoch": 0.06045937268461349, "grad_norm": 1.289175259046802, "learning_rate": 1.6276595744680854e-06, "loss": 1.0913, "step": 153 }, { "epoch": 0.06085453198320573, "grad_norm": 1.1077238693214941, "learning_rate": 1.6382978723404255e-06, "loss": 1.0408, "step": 154 }, { "epoch": 0.061249691281797976, "grad_norm": 1.3121363094248382, "learning_rate": 1.648936170212766e-06, "loss": 1.0649, "step": 155 }, { "epoch": 0.06164485058039022, "grad_norm": 1.0047858271903711, "learning_rate": 1.6595744680851064e-06, "loss": 1.0339, "step": 156 }, { "epoch": 0.06204000987898246, "grad_norm": 1.222322248701329, "learning_rate": 1.6702127659574468e-06, "loss": 1.0367, "step": 157 }, { "epoch": 0.06243516917757471, "grad_norm": 0.9790824508239406, "learning_rate": 1.6808510638297873e-06, "loss": 1.0221, "step": 158 }, { "epoch": 0.06283032847616696, "grad_norm": 1.067740944312297, "learning_rate": 1.6914893617021277e-06, "loss": 1.0334, "step": 159 }, { "epoch": 0.0632254877747592, "grad_norm": 0.896625566421198, "learning_rate": 1.7021276595744682e-06, "loss": 1.0092, "step": 160 }, { "epoch": 0.06362064707335144, "grad_norm": 0.9627618743848442, "learning_rate": 1.7127659574468086e-06, "loss": 1.0463, "step": 161 }, { "epoch": 0.06401580637194369, "grad_norm": 1.0734207189542146, "learning_rate": 1.723404255319149e-06, "loss": 1.0392, "step": 162 }, { "epoch": 0.06441096567053593, "grad_norm": 0.9349753594761937, "learning_rate": 1.7340425531914895e-06, "loss": 0.9892, "step": 163 }, { "epoch": 0.06480612496912817, "grad_norm": 0.9980254538111385, "learning_rate": 1.74468085106383e-06, "loss": 0.9982, "step": 164 }, { "epoch": 0.06520128426772043, "grad_norm": 1.0811827248370265, "learning_rate": 1.7553191489361704e-06, "loss": 1.0152, "step": 165 }, { "epoch": 0.06559644356631267, "grad_norm": 0.9331981369264469, "learning_rate": 1.7659574468085109e-06, "loss": 0.98, "step": 166 }, { "epoch": 0.06599160286490491, "grad_norm": 0.9206315543562278, "learning_rate": 1.7765957446808513e-06, "loss": 1.0212, "step": 167 }, { "epoch": 0.06638676216349716, "grad_norm": 1.1010365241697404, "learning_rate": 1.7872340425531918e-06, "loss": 0.9817, "step": 168 }, { "epoch": 0.0667819214620894, "grad_norm": 0.9070688042861361, "learning_rate": 1.7978723404255322e-06, "loss": 0.9974, "step": 169 }, { "epoch": 0.06717708076068166, "grad_norm": 0.9259977243442546, "learning_rate": 1.8085106382978727e-06, "loss": 0.9979, "step": 170 }, { "epoch": 0.0675722400592739, "grad_norm": 0.9016076582389854, "learning_rate": 1.8191489361702131e-06, "loss": 0.9735, "step": 171 }, { "epoch": 0.06796739935786614, "grad_norm": 0.9114028424001606, "learning_rate": 1.8297872340425531e-06, "loss": 0.9806, "step": 172 }, { "epoch": 0.06836255865645839, "grad_norm": 0.9002141700632048, "learning_rate": 1.8404255319148936e-06, "loss": 0.9922, "step": 173 }, { "epoch": 0.06875771795505063, "grad_norm": 0.9028305681256402, "learning_rate": 1.851063829787234e-06, "loss": 0.9698, "step": 174 }, { "epoch": 0.06915287725364287, "grad_norm": 0.8797232554167207, "learning_rate": 1.8617021276595745e-06, "loss": 0.9627, "step": 175 }, { "epoch": 0.06954803655223513, "grad_norm": 0.8502350286865529, "learning_rate": 1.872340425531915e-06, "loss": 0.9788, "step": 176 }, { "epoch": 0.06994319585082737, "grad_norm": 0.9168330903333247, "learning_rate": 1.8829787234042554e-06, "loss": 0.9566, "step": 177 }, { "epoch": 0.0703383551494196, "grad_norm": 0.8633709197803816, "learning_rate": 1.8936170212765958e-06, "loss": 0.9403, "step": 178 }, { "epoch": 0.07073351444801186, "grad_norm": 0.9428773416078605, "learning_rate": 1.9042553191489363e-06, "loss": 0.973, "step": 179 }, { "epoch": 0.0711286737466041, "grad_norm": 0.8934209230197848, "learning_rate": 1.9148936170212767e-06, "loss": 0.9531, "step": 180 }, { "epoch": 0.07152383304519634, "grad_norm": 0.891758559309114, "learning_rate": 1.925531914893617e-06, "loss": 0.9379, "step": 181 }, { "epoch": 0.0719189923437886, "grad_norm": 0.8289117111620914, "learning_rate": 1.9361702127659576e-06, "loss": 0.9594, "step": 182 }, { "epoch": 0.07231415164238084, "grad_norm": 1.0538786469036632, "learning_rate": 1.946808510638298e-06, "loss": 0.9784, "step": 183 }, { "epoch": 0.07270931094097308, "grad_norm": 0.8224300832472103, "learning_rate": 1.9574468085106385e-06, "loss": 0.9571, "step": 184 }, { "epoch": 0.07310447023956533, "grad_norm": 0.8493893795427978, "learning_rate": 1.968085106382979e-06, "loss": 0.9436, "step": 185 }, { "epoch": 0.07349962953815757, "grad_norm": 0.7674702264899148, "learning_rate": 1.9787234042553194e-06, "loss": 0.9391, "step": 186 }, { "epoch": 0.07389478883674981, "grad_norm": 0.7756609871194469, "learning_rate": 1.98936170212766e-06, "loss": 0.9097, "step": 187 }, { "epoch": 0.07428994813534207, "grad_norm": 0.8401464399948625, "learning_rate": 2.0000000000000003e-06, "loss": 0.9084, "step": 188 }, { "epoch": 0.0746851074339343, "grad_norm": 0.800464792845907, "learning_rate": 2.0106382978723408e-06, "loss": 0.955, "step": 189 }, { "epoch": 0.07508026673252655, "grad_norm": 0.7963237467038586, "learning_rate": 2.021276595744681e-06, "loss": 0.9077, "step": 190 }, { "epoch": 0.0754754260311188, "grad_norm": 0.8597621463693865, "learning_rate": 2.0319148936170213e-06, "loss": 0.9145, "step": 191 }, { "epoch": 0.07587058532971104, "grad_norm": 0.8089654571622219, "learning_rate": 2.0425531914893617e-06, "loss": 0.896, "step": 192 }, { "epoch": 0.07626574462830328, "grad_norm": 0.8373533262314947, "learning_rate": 2.053191489361702e-06, "loss": 0.9412, "step": 193 }, { "epoch": 0.07666090392689554, "grad_norm": 0.7388812923284674, "learning_rate": 2.0638297872340426e-06, "loss": 0.9099, "step": 194 }, { "epoch": 0.07705606322548778, "grad_norm": 0.7430489194577857, "learning_rate": 2.074468085106383e-06, "loss": 0.8875, "step": 195 }, { "epoch": 0.07745122252408002, "grad_norm": 0.9205612468817582, "learning_rate": 2.0851063829787235e-06, "loss": 0.8999, "step": 196 }, { "epoch": 0.07784638182267227, "grad_norm": 0.7981256350175032, "learning_rate": 2.095744680851064e-06, "loss": 0.906, "step": 197 }, { "epoch": 0.07824154112126451, "grad_norm": 0.934795287215565, "learning_rate": 2.1063829787234044e-06, "loss": 0.8908, "step": 198 }, { "epoch": 0.07863670041985675, "grad_norm": 0.8225561510286254, "learning_rate": 2.117021276595745e-06, "loss": 0.901, "step": 199 }, { "epoch": 0.079031859718449, "grad_norm": 0.814892859258292, "learning_rate": 2.1276595744680853e-06, "loss": 0.897, "step": 200 }, { "epoch": 0.07942701901704124, "grad_norm": 0.9225501505784203, "learning_rate": 2.1382978723404258e-06, "loss": 0.9174, "step": 201 }, { "epoch": 0.07982217831563349, "grad_norm": 0.7007925087166256, "learning_rate": 2.148936170212766e-06, "loss": 0.9093, "step": 202 }, { "epoch": 0.08021733761422574, "grad_norm": 0.7613541888010653, "learning_rate": 2.1595744680851067e-06, "loss": 0.8826, "step": 203 }, { "epoch": 0.08061249691281798, "grad_norm": 0.726817783841255, "learning_rate": 2.170212765957447e-06, "loss": 0.8738, "step": 204 }, { "epoch": 0.08100765621141022, "grad_norm": 0.7037687083978766, "learning_rate": 2.1808510638297876e-06, "loss": 0.8989, "step": 205 }, { "epoch": 0.08140281551000247, "grad_norm": 0.7363960547281249, "learning_rate": 2.191489361702128e-06, "loss": 0.8902, "step": 206 }, { "epoch": 0.08179797480859471, "grad_norm": 0.7580280652504182, "learning_rate": 2.2021276595744685e-06, "loss": 0.9102, "step": 207 }, { "epoch": 0.08219313410718695, "grad_norm": 0.7757175348061099, "learning_rate": 2.2127659574468085e-06, "loss": 0.8779, "step": 208 }, { "epoch": 0.08258829340577921, "grad_norm": 0.7482271345832553, "learning_rate": 2.223404255319149e-06, "loss": 0.8865, "step": 209 }, { "epoch": 0.08298345270437145, "grad_norm": 0.7398985474265629, "learning_rate": 2.2340425531914894e-06, "loss": 0.8801, "step": 210 }, { "epoch": 0.08337861200296369, "grad_norm": 0.7323608656730606, "learning_rate": 2.24468085106383e-06, "loss": 0.9002, "step": 211 }, { "epoch": 0.08377377130155594, "grad_norm": 0.8148667910929218, "learning_rate": 2.2553191489361703e-06, "loss": 0.8647, "step": 212 }, { "epoch": 0.08416893060014818, "grad_norm": 0.7262927203241404, "learning_rate": 2.2659574468085107e-06, "loss": 0.8741, "step": 213 }, { "epoch": 0.08456408989874042, "grad_norm": 0.8201240188887061, "learning_rate": 2.276595744680851e-06, "loss": 0.9041, "step": 214 }, { "epoch": 0.08495924919733268, "grad_norm": 0.7492499292809485, "learning_rate": 2.2872340425531916e-06, "loss": 0.853, "step": 215 }, { "epoch": 0.08535440849592492, "grad_norm": 0.7916341013954181, "learning_rate": 2.297872340425532e-06, "loss": 0.8871, "step": 216 }, { "epoch": 0.08574956779451716, "grad_norm": 0.7520394996291526, "learning_rate": 2.3085106382978725e-06, "loss": 0.8562, "step": 217 }, { "epoch": 0.08614472709310941, "grad_norm": 1.1281408516742564, "learning_rate": 2.319148936170213e-06, "loss": 0.8645, "step": 218 }, { "epoch": 0.08653988639170165, "grad_norm": 0.8788573403694457, "learning_rate": 2.3297872340425534e-06, "loss": 0.8623, "step": 219 }, { "epoch": 0.0869350456902939, "grad_norm": 0.867944142979568, "learning_rate": 2.340425531914894e-06, "loss": 0.8572, "step": 220 }, { "epoch": 0.08733020498888615, "grad_norm": 0.7926936676598336, "learning_rate": 2.3510638297872343e-06, "loss": 0.8713, "step": 221 }, { "epoch": 0.08772536428747839, "grad_norm": 0.76255556176971, "learning_rate": 2.3617021276595748e-06, "loss": 0.8494, "step": 222 }, { "epoch": 0.08812052358607063, "grad_norm": 0.7600673269310736, "learning_rate": 2.3723404255319152e-06, "loss": 0.8482, "step": 223 }, { "epoch": 0.08851568288466288, "grad_norm": 0.7112814516322407, "learning_rate": 2.3829787234042557e-06, "loss": 0.8533, "step": 224 }, { "epoch": 0.08891084218325512, "grad_norm": 0.7036773729937497, "learning_rate": 2.393617021276596e-06, "loss": 0.847, "step": 225 }, { "epoch": 0.08930600148184736, "grad_norm": 0.6590954821985295, "learning_rate": 2.404255319148936e-06, "loss": 0.8561, "step": 226 }, { "epoch": 0.08970116078043962, "grad_norm": 0.7847232653490934, "learning_rate": 2.4148936170212766e-06, "loss": 0.8536, "step": 227 }, { "epoch": 0.09009632007903186, "grad_norm": 0.6509851787949374, "learning_rate": 2.425531914893617e-06, "loss": 0.85, "step": 228 }, { "epoch": 0.0904914793776241, "grad_norm": 0.648445158504499, "learning_rate": 2.4361702127659575e-06, "loss": 0.8012, "step": 229 }, { "epoch": 0.09088663867621635, "grad_norm": 0.736497309836382, "learning_rate": 2.446808510638298e-06, "loss": 0.8639, "step": 230 }, { "epoch": 0.09128179797480859, "grad_norm": 0.645898512592661, "learning_rate": 2.4574468085106384e-06, "loss": 0.8243, "step": 231 }, { "epoch": 0.09167695727340083, "grad_norm": 0.67481419189577, "learning_rate": 2.468085106382979e-06, "loss": 0.8682, "step": 232 }, { "epoch": 0.09207211657199309, "grad_norm": 0.7664263353306855, "learning_rate": 2.4787234042553193e-06, "loss": 0.8623, "step": 233 }, { "epoch": 0.09246727587058533, "grad_norm": 0.6372201971565544, "learning_rate": 2.4893617021276598e-06, "loss": 0.8153, "step": 234 }, { "epoch": 0.09286243516917757, "grad_norm": 0.7305559386754572, "learning_rate": 2.5e-06, "loss": 0.85, "step": 235 }, { "epoch": 0.09325759446776982, "grad_norm": 0.8258720994595878, "learning_rate": 2.5106382978723402e-06, "loss": 0.8588, "step": 236 }, { "epoch": 0.09365275376636206, "grad_norm": 0.762878968342536, "learning_rate": 2.521276595744681e-06, "loss": 0.8606, "step": 237 }, { "epoch": 0.09404791306495432, "grad_norm": 0.7142096057042978, "learning_rate": 2.531914893617021e-06, "loss": 0.8337, "step": 238 }, { "epoch": 0.09444307236354656, "grad_norm": 0.6622420389390998, "learning_rate": 2.542553191489362e-06, "loss": 0.8475, "step": 239 }, { "epoch": 0.0948382316621388, "grad_norm": 0.7610481119780552, "learning_rate": 2.553191489361702e-06, "loss": 0.8637, "step": 240 }, { "epoch": 0.09523339096073105, "grad_norm": 0.8093851995765269, "learning_rate": 2.563829787234043e-06, "loss": 0.8521, "step": 241 }, { "epoch": 0.09562855025932329, "grad_norm": 0.6922332669648779, "learning_rate": 2.574468085106383e-06, "loss": 0.8318, "step": 242 }, { "epoch": 0.09602370955791553, "grad_norm": 0.7693846197783168, "learning_rate": 2.585106382978724e-06, "loss": 0.8344, "step": 243 }, { "epoch": 0.09641886885650779, "grad_norm": 0.827949068079929, "learning_rate": 2.595744680851064e-06, "loss": 0.8486, "step": 244 }, { "epoch": 0.09681402815510003, "grad_norm": 0.6650613101391282, "learning_rate": 2.6063829787234047e-06, "loss": 0.8562, "step": 245 }, { "epoch": 0.09720918745369227, "grad_norm": 0.8278434657863171, "learning_rate": 2.6170212765957447e-06, "loss": 0.8416, "step": 246 }, { "epoch": 0.09760434675228452, "grad_norm": 0.7938328023685507, "learning_rate": 2.6276595744680856e-06, "loss": 0.8311, "step": 247 }, { "epoch": 0.09799950605087676, "grad_norm": 0.6143610103260296, "learning_rate": 2.6382978723404256e-06, "loss": 0.8254, "step": 248 }, { "epoch": 0.098394665349469, "grad_norm": 0.7619119256406987, "learning_rate": 2.6489361702127665e-06, "loss": 0.839, "step": 249 }, { "epoch": 0.09878982464806126, "grad_norm": 0.7986196919326185, "learning_rate": 2.6595744680851065e-06, "loss": 0.8455, "step": 250 }, { "epoch": 0.0991849839466535, "grad_norm": 0.6717154185196351, "learning_rate": 2.6702127659574474e-06, "loss": 0.8336, "step": 251 }, { "epoch": 0.09958014324524574, "grad_norm": 0.7767129776930844, "learning_rate": 2.6808510638297874e-06, "loss": 0.8145, "step": 252 }, { "epoch": 0.09997530254383799, "grad_norm": 0.6439894121807275, "learning_rate": 2.6914893617021283e-06, "loss": 0.8111, "step": 253 }, { "epoch": 0.10037046184243023, "grad_norm": 0.607679779354355, "learning_rate": 2.7021276595744683e-06, "loss": 0.8291, "step": 254 }, { "epoch": 0.10076562114102247, "grad_norm": 0.7464563131206438, "learning_rate": 2.7127659574468084e-06, "loss": 0.8017, "step": 255 }, { "epoch": 0.10116078043961473, "grad_norm": 0.7095071407490194, "learning_rate": 2.7234042553191492e-06, "loss": 0.8497, "step": 256 }, { "epoch": 0.10155593973820697, "grad_norm": 0.6459106720980804, "learning_rate": 2.7340425531914893e-06, "loss": 0.8405, "step": 257 }, { "epoch": 0.1019510990367992, "grad_norm": 0.7520801305481896, "learning_rate": 2.74468085106383e-06, "loss": 0.8074, "step": 258 }, { "epoch": 0.10234625833539146, "grad_norm": 0.7352594748784674, "learning_rate": 2.75531914893617e-06, "loss": 0.8185, "step": 259 }, { "epoch": 0.1027414176339837, "grad_norm": 0.7017112677861721, "learning_rate": 2.765957446808511e-06, "loss": 0.8266, "step": 260 }, { "epoch": 0.10313657693257594, "grad_norm": 0.761095345544042, "learning_rate": 2.776595744680851e-06, "loss": 0.7901, "step": 261 }, { "epoch": 0.1035317362311682, "grad_norm": 0.7630856502083389, "learning_rate": 2.787234042553192e-06, "loss": 0.8038, "step": 262 }, { "epoch": 0.10392689552976044, "grad_norm": 0.7132952976277227, "learning_rate": 2.797872340425532e-06, "loss": 0.8157, "step": 263 }, { "epoch": 0.10432205482835268, "grad_norm": 0.900249895843058, "learning_rate": 2.808510638297873e-06, "loss": 0.8025, "step": 264 }, { "epoch": 0.10471721412694493, "grad_norm": 0.6280451084365367, "learning_rate": 2.819148936170213e-06, "loss": 0.7995, "step": 265 }, { "epoch": 0.10511237342553717, "grad_norm": 0.7227719924319213, "learning_rate": 2.8297872340425537e-06, "loss": 0.8428, "step": 266 }, { "epoch": 0.10550753272412941, "grad_norm": 0.7918390149442235, "learning_rate": 2.8404255319148938e-06, "loss": 0.7932, "step": 267 }, { "epoch": 0.10590269202272166, "grad_norm": 0.6180037290493834, "learning_rate": 2.8510638297872346e-06, "loss": 0.8234, "step": 268 }, { "epoch": 0.1062978513213139, "grad_norm": 0.6527196921600206, "learning_rate": 2.8617021276595747e-06, "loss": 0.8047, "step": 269 }, { "epoch": 0.10669301061990615, "grad_norm": 1.0085142638197158, "learning_rate": 2.8723404255319155e-06, "loss": 0.8164, "step": 270 }, { "epoch": 0.1070881699184984, "grad_norm": 0.9812147195881408, "learning_rate": 2.8829787234042556e-06, "loss": 0.7965, "step": 271 }, { "epoch": 0.10748332921709064, "grad_norm": 1.0448400314614967, "learning_rate": 2.8936170212765956e-06, "loss": 0.8107, "step": 272 }, { "epoch": 0.10787848851568288, "grad_norm": 0.6984652148632842, "learning_rate": 2.9042553191489365e-06, "loss": 0.7865, "step": 273 }, { "epoch": 0.10827364781427513, "grad_norm": 0.615065445787656, "learning_rate": 2.9148936170212765e-06, "loss": 0.7847, "step": 274 }, { "epoch": 0.10866880711286737, "grad_norm": 0.8697531900111486, "learning_rate": 2.9255319148936174e-06, "loss": 0.8037, "step": 275 }, { "epoch": 0.10906396641145961, "grad_norm": 0.6197520935871196, "learning_rate": 2.9361702127659574e-06, "loss": 0.8151, "step": 276 }, { "epoch": 0.10945912571005187, "grad_norm": 0.8060011131830561, "learning_rate": 2.9468085106382983e-06, "loss": 0.7873, "step": 277 }, { "epoch": 0.10985428500864411, "grad_norm": 0.6756833173956306, "learning_rate": 2.9574468085106383e-06, "loss": 0.8293, "step": 278 }, { "epoch": 0.11024944430723635, "grad_norm": 0.6099851620784188, "learning_rate": 2.968085106382979e-06, "loss": 0.8241, "step": 279 }, { "epoch": 0.1106446036058286, "grad_norm": 0.7283434437348605, "learning_rate": 2.978723404255319e-06, "loss": 0.7936, "step": 280 }, { "epoch": 0.11103976290442084, "grad_norm": 0.6267015873151124, "learning_rate": 2.98936170212766e-06, "loss": 0.7962, "step": 281 }, { "epoch": 0.11143492220301308, "grad_norm": 0.6116326742433275, "learning_rate": 3e-06, "loss": 0.8187, "step": 282 }, { "epoch": 0.11183008150160534, "grad_norm": 0.7049851223926784, "learning_rate": 3.010638297872341e-06, "loss": 0.7831, "step": 283 }, { "epoch": 0.11222524080019758, "grad_norm": 0.6257674496318532, "learning_rate": 3.021276595744681e-06, "loss": 0.7781, "step": 284 }, { "epoch": 0.11262040009878982, "grad_norm": 0.6190650007298025, "learning_rate": 3.031914893617022e-06, "loss": 0.7847, "step": 285 }, { "epoch": 0.11301555939738207, "grad_norm": 0.6089050965059565, "learning_rate": 3.042553191489362e-06, "loss": 0.7922, "step": 286 }, { "epoch": 0.11341071869597431, "grad_norm": 0.6557347874370106, "learning_rate": 3.0531914893617027e-06, "loss": 0.7963, "step": 287 }, { "epoch": 0.11380587799456655, "grad_norm": 0.605070002885156, "learning_rate": 3.0638297872340428e-06, "loss": 0.8013, "step": 288 }, { "epoch": 0.11420103729315881, "grad_norm": 0.6311761000607505, "learning_rate": 3.0744680851063836e-06, "loss": 0.8005, "step": 289 }, { "epoch": 0.11459619659175105, "grad_norm": 0.6053509981952528, "learning_rate": 3.0851063829787237e-06, "loss": 0.7998, "step": 290 }, { "epoch": 0.11499135589034329, "grad_norm": 0.6235992193861427, "learning_rate": 3.0957446808510637e-06, "loss": 0.7804, "step": 291 }, { "epoch": 0.11538651518893554, "grad_norm": 0.7433766960086654, "learning_rate": 3.1063829787234046e-06, "loss": 0.7867, "step": 292 }, { "epoch": 0.11578167448752778, "grad_norm": 1.272372503947218, "learning_rate": 3.1170212765957446e-06, "loss": 0.7929, "step": 293 }, { "epoch": 0.11617683378612002, "grad_norm": 0.8186585325087611, "learning_rate": 3.1276595744680855e-06, "loss": 0.7854, "step": 294 }, { "epoch": 0.11657199308471228, "grad_norm": 0.6739199686171928, "learning_rate": 3.1382978723404255e-06, "loss": 0.805, "step": 295 }, { "epoch": 0.11696715238330452, "grad_norm": 0.6306549835549222, "learning_rate": 3.1489361702127664e-06, "loss": 0.786, "step": 296 }, { "epoch": 0.11736231168189676, "grad_norm": 0.7237829920169022, "learning_rate": 3.1595744680851064e-06, "loss": 0.7921, "step": 297 }, { "epoch": 0.11775747098048901, "grad_norm": 0.6263525455488399, "learning_rate": 3.1702127659574473e-06, "loss": 0.7794, "step": 298 }, { "epoch": 0.11815263027908125, "grad_norm": 0.6260917117859388, "learning_rate": 3.1808510638297873e-06, "loss": 0.7821, "step": 299 }, { "epoch": 0.11854778957767349, "grad_norm": 0.6670782296472709, "learning_rate": 3.191489361702128e-06, "loss": 0.7889, "step": 300 }, { "epoch": 0.11894294887626575, "grad_norm": 0.7021296376843668, "learning_rate": 3.202127659574468e-06, "loss": 0.7879, "step": 301 }, { "epoch": 0.11933810817485799, "grad_norm": 0.6436206115473436, "learning_rate": 3.212765957446809e-06, "loss": 0.7463, "step": 302 }, { "epoch": 0.11973326747345023, "grad_norm": 0.6581559963727542, "learning_rate": 3.223404255319149e-06, "loss": 0.7833, "step": 303 }, { "epoch": 0.12012842677204248, "grad_norm": 0.6085378327556474, "learning_rate": 3.23404255319149e-06, "loss": 0.7761, "step": 304 }, { "epoch": 0.12052358607063472, "grad_norm": 0.6332080516803438, "learning_rate": 3.24468085106383e-06, "loss": 0.7687, "step": 305 }, { "epoch": 0.12091874536922698, "grad_norm": 0.5853426452014024, "learning_rate": 3.255319148936171e-06, "loss": 0.7878, "step": 306 }, { "epoch": 0.12131390466781922, "grad_norm": 0.6223748575158505, "learning_rate": 3.265957446808511e-06, "loss": 0.7739, "step": 307 }, { "epoch": 0.12170906396641146, "grad_norm": 0.6981138823006418, "learning_rate": 3.276595744680851e-06, "loss": 0.7695, "step": 308 }, { "epoch": 0.12210422326500371, "grad_norm": 0.6663161701946723, "learning_rate": 3.287234042553192e-06, "loss": 0.8053, "step": 309 }, { "epoch": 0.12249938256359595, "grad_norm": 0.5681680114078884, "learning_rate": 3.297872340425532e-06, "loss": 0.7752, "step": 310 }, { "epoch": 0.12289454186218819, "grad_norm": 0.5858906412963043, "learning_rate": 3.3085106382978727e-06, "loss": 0.7916, "step": 311 }, { "epoch": 0.12328970116078045, "grad_norm": 0.6534395144260531, "learning_rate": 3.3191489361702127e-06, "loss": 0.7912, "step": 312 }, { "epoch": 0.12368486045937269, "grad_norm": 0.586198108795242, "learning_rate": 3.3297872340425536e-06, "loss": 0.7813, "step": 313 }, { "epoch": 0.12408001975796493, "grad_norm": 0.647904927155932, "learning_rate": 3.3404255319148936e-06, "loss": 0.7936, "step": 314 }, { "epoch": 0.12447517905655718, "grad_norm": 0.58805476345561, "learning_rate": 3.3510638297872345e-06, "loss": 0.7973, "step": 315 }, { "epoch": 0.12487033835514942, "grad_norm": 0.5947946152867843, "learning_rate": 3.3617021276595745e-06, "loss": 0.7632, "step": 316 }, { "epoch": 0.12526549765374168, "grad_norm": 0.6193828087421168, "learning_rate": 3.3723404255319154e-06, "loss": 0.7848, "step": 317 }, { "epoch": 0.12566065695233392, "grad_norm": 0.6258831363958954, "learning_rate": 3.3829787234042554e-06, "loss": 0.7678, "step": 318 }, { "epoch": 0.12605581625092616, "grad_norm": 0.5747152594546233, "learning_rate": 3.3936170212765963e-06, "loss": 0.7817, "step": 319 }, { "epoch": 0.1264509755495184, "grad_norm": 0.731674333424822, "learning_rate": 3.4042553191489363e-06, "loss": 0.7643, "step": 320 }, { "epoch": 0.12684613484811064, "grad_norm": 0.6529010150169795, "learning_rate": 3.414893617021277e-06, "loss": 0.7667, "step": 321 }, { "epoch": 0.12724129414670288, "grad_norm": 0.7014365604795955, "learning_rate": 3.4255319148936172e-06, "loss": 0.7787, "step": 322 }, { "epoch": 0.12763645344529514, "grad_norm": 0.5834784417943027, "learning_rate": 3.436170212765958e-06, "loss": 0.779, "step": 323 }, { "epoch": 0.12803161274388739, "grad_norm": 0.7742370051056543, "learning_rate": 3.446808510638298e-06, "loss": 0.7733, "step": 324 }, { "epoch": 0.12842677204247963, "grad_norm": 0.5865654907094333, "learning_rate": 3.457446808510639e-06, "loss": 0.7456, "step": 325 }, { "epoch": 0.12882193134107187, "grad_norm": 0.682149646741674, "learning_rate": 3.468085106382979e-06, "loss": 0.7994, "step": 326 }, { "epoch": 0.1292170906396641, "grad_norm": 0.6701640838897713, "learning_rate": 3.478723404255319e-06, "loss": 0.7757, "step": 327 }, { "epoch": 0.12961224993825635, "grad_norm": 0.6614941446237965, "learning_rate": 3.48936170212766e-06, "loss": 0.7725, "step": 328 }, { "epoch": 0.13000740923684861, "grad_norm": 0.6095415461958933, "learning_rate": 3.5e-06, "loss": 0.7653, "step": 329 }, { "epoch": 0.13040256853544085, "grad_norm": 0.6190745963316048, "learning_rate": 3.510638297872341e-06, "loss": 0.7805, "step": 330 }, { "epoch": 0.1307977278340331, "grad_norm": 0.6147443464940033, "learning_rate": 3.521276595744681e-06, "loss": 0.7763, "step": 331 }, { "epoch": 0.13119288713262534, "grad_norm": 0.5866635120263384, "learning_rate": 3.5319148936170217e-06, "loss": 0.7698, "step": 332 }, { "epoch": 0.13158804643121758, "grad_norm": 0.6186148281750584, "learning_rate": 3.5425531914893617e-06, "loss": 0.7771, "step": 333 }, { "epoch": 0.13198320572980982, "grad_norm": 0.6620495658950574, "learning_rate": 3.5531914893617026e-06, "loss": 0.7622, "step": 334 }, { "epoch": 0.13237836502840208, "grad_norm": 0.6402051904915239, "learning_rate": 3.5638297872340426e-06, "loss": 0.7734, "step": 335 }, { "epoch": 0.13277352432699432, "grad_norm": 0.6309802907790648, "learning_rate": 3.5744680851063835e-06, "loss": 0.7665, "step": 336 }, { "epoch": 0.13316868362558656, "grad_norm": 0.6408680224306361, "learning_rate": 3.5851063829787235e-06, "loss": 0.7686, "step": 337 }, { "epoch": 0.1335638429241788, "grad_norm": 0.6160995866186305, "learning_rate": 3.5957446808510644e-06, "loss": 0.7373, "step": 338 }, { "epoch": 0.13395900222277105, "grad_norm": 0.6840190156980204, "learning_rate": 3.6063829787234044e-06, "loss": 0.7648, "step": 339 }, { "epoch": 0.1343541615213633, "grad_norm": 0.6581837356659743, "learning_rate": 3.6170212765957453e-06, "loss": 0.7531, "step": 340 }, { "epoch": 0.13474932081995555, "grad_norm": 0.7148764476179186, "learning_rate": 3.6276595744680853e-06, "loss": 0.7819, "step": 341 }, { "epoch": 0.1351444801185478, "grad_norm": 0.6600259458370601, "learning_rate": 3.6382978723404262e-06, "loss": 0.7842, "step": 342 }, { "epoch": 0.13553963941714003, "grad_norm": 0.6126556516156064, "learning_rate": 3.6489361702127662e-06, "loss": 0.7639, "step": 343 }, { "epoch": 0.13593479871573227, "grad_norm": 0.6672986551005967, "learning_rate": 3.6595744680851063e-06, "loss": 0.7592, "step": 344 }, { "epoch": 0.13632995801432451, "grad_norm": 0.6223043791589471, "learning_rate": 3.670212765957447e-06, "loss": 0.7564, "step": 345 }, { "epoch": 0.13672511731291678, "grad_norm": 0.8110409579655677, "learning_rate": 3.680851063829787e-06, "loss": 0.7651, "step": 346 }, { "epoch": 0.13712027661150902, "grad_norm": 0.6581211301407619, "learning_rate": 3.691489361702128e-06, "loss": 0.7611, "step": 347 }, { "epoch": 0.13751543591010126, "grad_norm": 0.6744339312700035, "learning_rate": 3.702127659574468e-06, "loss": 0.7613, "step": 348 }, { "epoch": 0.1379105952086935, "grad_norm": 0.605969858747699, "learning_rate": 3.712765957446809e-06, "loss": 0.7847, "step": 349 }, { "epoch": 0.13830575450728574, "grad_norm": 0.6796481089227777, "learning_rate": 3.723404255319149e-06, "loss": 0.7698, "step": 350 }, { "epoch": 0.13870091380587798, "grad_norm": 0.6445216487073453, "learning_rate": 3.73404255319149e-06, "loss": 0.7452, "step": 351 }, { "epoch": 0.13909607310447025, "grad_norm": 0.5848343681445489, "learning_rate": 3.74468085106383e-06, "loss": 0.7532, "step": 352 }, { "epoch": 0.1394912324030625, "grad_norm": 0.686482496408356, "learning_rate": 3.7553191489361707e-06, "loss": 0.745, "step": 353 }, { "epoch": 0.13988639170165473, "grad_norm": 0.6484269721297492, "learning_rate": 3.7659574468085108e-06, "loss": 0.7408, "step": 354 }, { "epoch": 0.14028155100024697, "grad_norm": 0.5809696958364097, "learning_rate": 3.7765957446808516e-06, "loss": 0.7471, "step": 355 }, { "epoch": 0.1406767102988392, "grad_norm": 0.6182389029108604, "learning_rate": 3.7872340425531917e-06, "loss": 0.7654, "step": 356 }, { "epoch": 0.14107186959743145, "grad_norm": 0.6745002737038325, "learning_rate": 3.7978723404255325e-06, "loss": 0.7604, "step": 357 }, { "epoch": 0.14146702889602372, "grad_norm": 0.7457299483196483, "learning_rate": 3.8085106382978726e-06, "loss": 0.771, "step": 358 }, { "epoch": 0.14186218819461596, "grad_norm": 0.626223558343114, "learning_rate": 3.819148936170213e-06, "loss": 0.7856, "step": 359 }, { "epoch": 0.1422573474932082, "grad_norm": 0.6297264134042032, "learning_rate": 3.8297872340425535e-06, "loss": 0.748, "step": 360 }, { "epoch": 0.14265250679180044, "grad_norm": 0.5951480073799157, "learning_rate": 3.840425531914894e-06, "loss": 0.7782, "step": 361 }, { "epoch": 0.14304766609039268, "grad_norm": 0.5886768213260241, "learning_rate": 3.851063829787234e-06, "loss": 0.7455, "step": 362 }, { "epoch": 0.14344282538898492, "grad_norm": 0.673440091158271, "learning_rate": 3.861702127659575e-06, "loss": 0.7419, "step": 363 }, { "epoch": 0.1438379846875772, "grad_norm": 0.6400671142527239, "learning_rate": 3.872340425531915e-06, "loss": 0.7648, "step": 364 }, { "epoch": 0.14423314398616943, "grad_norm": 0.6378848588398233, "learning_rate": 3.882978723404256e-06, "loss": 0.764, "step": 365 }, { "epoch": 0.14462830328476167, "grad_norm": 0.5855533735447821, "learning_rate": 3.893617021276596e-06, "loss": 0.755, "step": 366 }, { "epoch": 0.1450234625833539, "grad_norm": 0.5988399094182794, "learning_rate": 3.904255319148937e-06, "loss": 0.7717, "step": 367 }, { "epoch": 0.14541862188194615, "grad_norm": 0.5909689175761067, "learning_rate": 3.914893617021277e-06, "loss": 0.7305, "step": 368 }, { "epoch": 0.1458137811805384, "grad_norm": 0.6091829357239786, "learning_rate": 3.9255319148936175e-06, "loss": 0.7582, "step": 369 }, { "epoch": 0.14620894047913066, "grad_norm": 0.663665703959751, "learning_rate": 3.936170212765958e-06, "loss": 0.7634, "step": 370 }, { "epoch": 0.1466040997777229, "grad_norm": 0.6131032136468707, "learning_rate": 3.946808510638298e-06, "loss": 0.7352, "step": 371 }, { "epoch": 0.14699925907631514, "grad_norm": 0.6438458775790014, "learning_rate": 3.957446808510639e-06, "loss": 0.7558, "step": 372 }, { "epoch": 0.14739441837490738, "grad_norm": 0.6221026846089203, "learning_rate": 3.968085106382979e-06, "loss": 0.7483, "step": 373 }, { "epoch": 0.14778957767349962, "grad_norm": 0.718970949273493, "learning_rate": 3.97872340425532e-06, "loss": 0.7582, "step": 374 }, { "epoch": 0.14818473697209186, "grad_norm": 0.7632958625164173, "learning_rate": 3.98936170212766e-06, "loss": 0.7487, "step": 375 }, { "epoch": 0.14857989627068413, "grad_norm": 0.6975538226598186, "learning_rate": 4.000000000000001e-06, "loss": 0.7548, "step": 376 }, { "epoch": 0.14897505556927637, "grad_norm": 1.011413729830943, "learning_rate": 4.010638297872341e-06, "loss": 0.7355, "step": 377 }, { "epoch": 0.1493702148678686, "grad_norm": 0.6337564143926464, "learning_rate": 4.0212765957446816e-06, "loss": 0.7519, "step": 378 }, { "epoch": 0.14976537416646085, "grad_norm": 0.668831153796517, "learning_rate": 4.031914893617022e-06, "loss": 0.7624, "step": 379 }, { "epoch": 0.1501605334650531, "grad_norm": 0.6436926511990773, "learning_rate": 4.042553191489362e-06, "loss": 0.7474, "step": 380 }, { "epoch": 0.15055569276364533, "grad_norm": 0.6272202096101458, "learning_rate": 4.053191489361702e-06, "loss": 0.7602, "step": 381 }, { "epoch": 0.1509508520622376, "grad_norm": 0.6212797540013587, "learning_rate": 4.0638297872340425e-06, "loss": 0.7772, "step": 382 }, { "epoch": 0.15134601136082984, "grad_norm": 0.6289946916321798, "learning_rate": 4.074468085106383e-06, "loss": 0.7689, "step": 383 }, { "epoch": 0.15174117065942208, "grad_norm": 0.6380191041059411, "learning_rate": 4.085106382978723e-06, "loss": 0.7572, "step": 384 }, { "epoch": 0.15213632995801432, "grad_norm": 0.5779795688082269, "learning_rate": 4.095744680851064e-06, "loss": 0.7546, "step": 385 }, { "epoch": 0.15253148925660656, "grad_norm": 0.7842430533418746, "learning_rate": 4.106382978723404e-06, "loss": 0.7751, "step": 386 }, { "epoch": 0.1529266485551988, "grad_norm": 0.6729893760136558, "learning_rate": 4.117021276595745e-06, "loss": 0.7455, "step": 387 }, { "epoch": 0.15332180785379107, "grad_norm": 0.5882912759438818, "learning_rate": 4.127659574468085e-06, "loss": 0.7696, "step": 388 }, { "epoch": 0.1537169671523833, "grad_norm": 0.6227877790472663, "learning_rate": 4.138297872340426e-06, "loss": 0.7608, "step": 389 }, { "epoch": 0.15411212645097555, "grad_norm": 0.6619432056903835, "learning_rate": 4.148936170212766e-06, "loss": 0.7473, "step": 390 }, { "epoch": 0.1545072857495678, "grad_norm": 0.628215165860546, "learning_rate": 4.1595744680851066e-06, "loss": 0.7489, "step": 391 }, { "epoch": 0.15490244504816003, "grad_norm": 0.5841047919949067, "learning_rate": 4.170212765957447e-06, "loss": 0.7447, "step": 392 }, { "epoch": 0.15529760434675227, "grad_norm": 0.5825058321729848, "learning_rate": 4.1808510638297875e-06, "loss": 0.7518, "step": 393 }, { "epoch": 0.15569276364534454, "grad_norm": 0.5989180700052267, "learning_rate": 4.191489361702128e-06, "loss": 0.7463, "step": 394 }, { "epoch": 0.15608792294393678, "grad_norm": 0.5485054555998863, "learning_rate": 4.202127659574468e-06, "loss": 0.7254, "step": 395 }, { "epoch": 0.15648308224252902, "grad_norm": 0.646716189095031, "learning_rate": 4.212765957446809e-06, "loss": 0.7583, "step": 396 }, { "epoch": 0.15687824154112126, "grad_norm": 0.6866055568311499, "learning_rate": 4.223404255319149e-06, "loss": 0.7372, "step": 397 }, { "epoch": 0.1572734008397135, "grad_norm": 0.7140385676935366, "learning_rate": 4.23404255319149e-06, "loss": 0.739, "step": 398 }, { "epoch": 0.15766856013830574, "grad_norm": 0.643023278201698, "learning_rate": 4.24468085106383e-06, "loss": 0.7522, "step": 399 }, { "epoch": 0.158063719436898, "grad_norm": 0.6312967460011237, "learning_rate": 4.255319148936171e-06, "loss": 0.7676, "step": 400 }, { "epoch": 0.15845887873549025, "grad_norm": 0.6382138110089172, "learning_rate": 4.265957446808511e-06, "loss": 0.7629, "step": 401 }, { "epoch": 0.1588540380340825, "grad_norm": 0.6332459333263118, "learning_rate": 4.2765957446808515e-06, "loss": 0.7311, "step": 402 }, { "epoch": 0.15924919733267473, "grad_norm": 0.6412487468227186, "learning_rate": 4.287234042553192e-06, "loss": 0.7567, "step": 403 }, { "epoch": 0.15964435663126697, "grad_norm": 0.7012222995798256, "learning_rate": 4.297872340425532e-06, "loss": 0.7436, "step": 404 }, { "epoch": 0.1600395159298592, "grad_norm": 0.6181464501586335, "learning_rate": 4.308510638297873e-06, "loss": 0.7298, "step": 405 }, { "epoch": 0.16043467522845148, "grad_norm": 0.6207970587414706, "learning_rate": 4.319148936170213e-06, "loss": 0.7376, "step": 406 }, { "epoch": 0.16082983452704372, "grad_norm": 0.6106494887539161, "learning_rate": 4.329787234042554e-06, "loss": 0.7613, "step": 407 }, { "epoch": 0.16122499382563596, "grad_norm": 0.6219536291754221, "learning_rate": 4.340425531914894e-06, "loss": 0.7397, "step": 408 }, { "epoch": 0.1616201531242282, "grad_norm": 0.6668879597496847, "learning_rate": 4.351063829787235e-06, "loss": 0.7469, "step": 409 }, { "epoch": 0.16201531242282044, "grad_norm": 0.6048928125977155, "learning_rate": 4.361702127659575e-06, "loss": 0.7184, "step": 410 }, { "epoch": 0.1624104717214127, "grad_norm": 0.6204217080705178, "learning_rate": 4.3723404255319156e-06, "loss": 0.7433, "step": 411 }, { "epoch": 0.16280563102000495, "grad_norm": 0.6071870434011859, "learning_rate": 4.382978723404256e-06, "loss": 0.7522, "step": 412 }, { "epoch": 0.1632007903185972, "grad_norm": 0.6179635375795429, "learning_rate": 4.3936170212765965e-06, "loss": 0.7521, "step": 413 }, { "epoch": 0.16359594961718943, "grad_norm": 0.8065353676281373, "learning_rate": 4.404255319148937e-06, "loss": 0.7371, "step": 414 }, { "epoch": 0.16399110891578167, "grad_norm": 0.7223783073823268, "learning_rate": 4.414893617021277e-06, "loss": 0.749, "step": 415 }, { "epoch": 0.1643862682143739, "grad_norm": 0.6418436008798059, "learning_rate": 4.425531914893617e-06, "loss": 0.7074, "step": 416 }, { "epoch": 0.16478142751296618, "grad_norm": 0.649001786754692, "learning_rate": 4.436170212765957e-06, "loss": 0.7409, "step": 417 }, { "epoch": 0.16517658681155842, "grad_norm": 0.6488082324591959, "learning_rate": 4.446808510638298e-06, "loss": 0.7294, "step": 418 }, { "epoch": 0.16557174611015066, "grad_norm": 0.6815680138497775, "learning_rate": 4.457446808510638e-06, "loss": 0.7516, "step": 419 }, { "epoch": 0.1659669054087429, "grad_norm": 0.627178650684025, "learning_rate": 4.468085106382979e-06, "loss": 0.7462, "step": 420 }, { "epoch": 0.16636206470733514, "grad_norm": 0.6847077790393925, "learning_rate": 4.478723404255319e-06, "loss": 0.7642, "step": 421 }, { "epoch": 0.16675722400592738, "grad_norm": 0.6677432032398735, "learning_rate": 4.48936170212766e-06, "loss": 0.7376, "step": 422 }, { "epoch": 0.16715238330451965, "grad_norm": 0.6622301887078457, "learning_rate": 4.5e-06, "loss": 0.7281, "step": 423 }, { "epoch": 0.1675475426031119, "grad_norm": 0.7032852662429785, "learning_rate": 4.5106382978723406e-06, "loss": 0.7295, "step": 424 }, { "epoch": 0.16794270190170413, "grad_norm": 0.9182307524223992, "learning_rate": 4.521276595744681e-06, "loss": 0.7251, "step": 425 }, { "epoch": 0.16833786120029637, "grad_norm": 0.6549474458937882, "learning_rate": 4.5319148936170215e-06, "loss": 0.7393, "step": 426 }, { "epoch": 0.1687330204988886, "grad_norm": 0.6260937965574087, "learning_rate": 4.542553191489362e-06, "loss": 0.7466, "step": 427 }, { "epoch": 0.16912817979748085, "grad_norm": 0.6098749910337613, "learning_rate": 4.553191489361702e-06, "loss": 0.7429, "step": 428 }, { "epoch": 0.16952333909607312, "grad_norm": 0.6258409309068056, "learning_rate": 4.563829787234043e-06, "loss": 0.7437, "step": 429 }, { "epoch": 0.16991849839466536, "grad_norm": 0.6998727053444407, "learning_rate": 4.574468085106383e-06, "loss": 0.7349, "step": 430 }, { "epoch": 0.1703136576932576, "grad_norm": 0.635839700174999, "learning_rate": 4.585106382978724e-06, "loss": 0.742, "step": 431 }, { "epoch": 0.17070881699184984, "grad_norm": 0.6881295298901814, "learning_rate": 4.595744680851064e-06, "loss": 0.7635, "step": 432 }, { "epoch": 0.17110397629044208, "grad_norm": 0.6138576739147359, "learning_rate": 4.606382978723405e-06, "loss": 0.7215, "step": 433 }, { "epoch": 0.17149913558903432, "grad_norm": 0.6786451341607185, "learning_rate": 4.617021276595745e-06, "loss": 0.746, "step": 434 }, { "epoch": 0.1718942948876266, "grad_norm": 0.6085640309923145, "learning_rate": 4.6276595744680855e-06, "loss": 0.7201, "step": 435 }, { "epoch": 0.17228945418621883, "grad_norm": 0.6543509049521442, "learning_rate": 4.638297872340426e-06, "loss": 0.7376, "step": 436 }, { "epoch": 0.17268461348481107, "grad_norm": 0.6281733392631065, "learning_rate": 4.648936170212766e-06, "loss": 0.7386, "step": 437 }, { "epoch": 0.1730797727834033, "grad_norm": 0.5766921427888596, "learning_rate": 4.659574468085107e-06, "loss": 0.7344, "step": 438 }, { "epoch": 0.17347493208199555, "grad_norm": 0.6238350960095946, "learning_rate": 4.670212765957447e-06, "loss": 0.7401, "step": 439 }, { "epoch": 0.1738700913805878, "grad_norm": 0.6308341725603104, "learning_rate": 4.680851063829788e-06, "loss": 0.7311, "step": 440 }, { "epoch": 0.17426525067918006, "grad_norm": 0.6418577607256782, "learning_rate": 4.691489361702128e-06, "loss": 0.7234, "step": 441 }, { "epoch": 0.1746604099777723, "grad_norm": 0.5854790033107359, "learning_rate": 4.702127659574469e-06, "loss": 0.7323, "step": 442 }, { "epoch": 0.17505556927636454, "grad_norm": 0.6823496981208785, "learning_rate": 4.712765957446809e-06, "loss": 0.7323, "step": 443 }, { "epoch": 0.17545072857495678, "grad_norm": 0.6175278963352572, "learning_rate": 4.7234042553191496e-06, "loss": 0.7147, "step": 444 }, { "epoch": 0.17584588787354902, "grad_norm": 0.7360888497887278, "learning_rate": 4.73404255319149e-06, "loss": 0.7348, "step": 445 }, { "epoch": 0.17624104717214126, "grad_norm": 0.6230101684392804, "learning_rate": 4.7446808510638305e-06, "loss": 0.7119, "step": 446 }, { "epoch": 0.17663620647073353, "grad_norm": 0.6554804476316878, "learning_rate": 4.755319148936171e-06, "loss": 0.7294, "step": 447 }, { "epoch": 0.17703136576932577, "grad_norm": 0.8564206801667128, "learning_rate": 4.765957446808511e-06, "loss": 0.7423, "step": 448 }, { "epoch": 0.177426525067918, "grad_norm": 1.3629336168086166, "learning_rate": 4.776595744680852e-06, "loss": 0.7484, "step": 449 }, { "epoch": 0.17782168436651025, "grad_norm": 0.7691195276316329, "learning_rate": 4.787234042553192e-06, "loss": 0.7097, "step": 450 }, { "epoch": 0.1782168436651025, "grad_norm": 0.6194923429303832, "learning_rate": 4.797872340425533e-06, "loss": 0.7285, "step": 451 }, { "epoch": 0.17861200296369473, "grad_norm": 0.673233699921991, "learning_rate": 4.808510638297872e-06, "loss": 0.7278, "step": 452 }, { "epoch": 0.179007162262287, "grad_norm": 0.6766336054718974, "learning_rate": 4.819148936170213e-06, "loss": 0.7356, "step": 453 }, { "epoch": 0.17940232156087924, "grad_norm": 0.57599098500566, "learning_rate": 4.829787234042553e-06, "loss": 0.7115, "step": 454 }, { "epoch": 0.17979748085947148, "grad_norm": 0.8080267118445753, "learning_rate": 4.840425531914894e-06, "loss": 0.7534, "step": 455 }, { "epoch": 0.18019264015806372, "grad_norm": 0.603929876457928, "learning_rate": 4.851063829787234e-06, "loss": 0.7105, "step": 456 }, { "epoch": 0.18058779945665596, "grad_norm": 0.643703372655632, "learning_rate": 4.8617021276595746e-06, "loss": 0.7279, "step": 457 }, { "epoch": 0.1809829587552482, "grad_norm": 0.8198396590432799, "learning_rate": 4.872340425531915e-06, "loss": 0.7198, "step": 458 }, { "epoch": 0.18137811805384046, "grad_norm": 0.65771945253315, "learning_rate": 4.8829787234042555e-06, "loss": 0.7113, "step": 459 }, { "epoch": 0.1817732773524327, "grad_norm": 0.6963361768452864, "learning_rate": 4.893617021276596e-06, "loss": 0.74, "step": 460 }, { "epoch": 0.18216843665102495, "grad_norm": 0.6261965671131124, "learning_rate": 4.904255319148936e-06, "loss": 0.7126, "step": 461 }, { "epoch": 0.18256359594961719, "grad_norm": 0.6148512179719411, "learning_rate": 4.914893617021277e-06, "loss": 0.726, "step": 462 }, { "epoch": 0.18295875524820943, "grad_norm": 0.676252763119563, "learning_rate": 4.925531914893617e-06, "loss": 0.73, "step": 463 }, { "epoch": 0.18335391454680167, "grad_norm": 0.7917693042214963, "learning_rate": 4.936170212765958e-06, "loss": 0.745, "step": 464 }, { "epoch": 0.18374907384539393, "grad_norm": 0.623944439707609, "learning_rate": 4.946808510638298e-06, "loss": 0.7178, "step": 465 }, { "epoch": 0.18414423314398617, "grad_norm": 0.7595206103625429, "learning_rate": 4.957446808510639e-06, "loss": 0.7511, "step": 466 }, { "epoch": 0.18453939244257841, "grad_norm": 0.6236195124431673, "learning_rate": 4.968085106382979e-06, "loss": 0.7443, "step": 467 }, { "epoch": 0.18493455174117066, "grad_norm": 0.6344774337895079, "learning_rate": 4.9787234042553195e-06, "loss": 0.7502, "step": 468 }, { "epoch": 0.1853297110397629, "grad_norm": 0.6180463187517735, "learning_rate": 4.98936170212766e-06, "loss": 0.7089, "step": 469 }, { "epoch": 0.18572487033835514, "grad_norm": 0.6437262535137148, "learning_rate": 5e-06, "loss": 0.7118, "step": 470 }, { "epoch": 0.1861200296369474, "grad_norm": 0.8247696667741479, "learning_rate": 4.999999995090759e-06, "loss": 0.7448, "step": 471 }, { "epoch": 0.18651518893553964, "grad_norm": 0.6821659327496776, "learning_rate": 4.9999999803630365e-06, "loss": 0.7096, "step": 472 }, { "epoch": 0.18691034823413188, "grad_norm": 0.6937300518785938, "learning_rate": 4.999999955816832e-06, "loss": 0.7505, "step": 473 }, { "epoch": 0.18730550753272412, "grad_norm": 0.64493393853509, "learning_rate": 4.999999921452146e-06, "loss": 0.7246, "step": 474 }, { "epoch": 0.18770066683131637, "grad_norm": 0.699335103430049, "learning_rate": 4.999999877268977e-06, "loss": 0.7362, "step": 475 }, { "epoch": 0.18809582612990863, "grad_norm": 0.6958675091974922, "learning_rate": 4.999999823267328e-06, "loss": 0.7425, "step": 476 }, { "epoch": 0.18849098542850087, "grad_norm": 0.6561148703214672, "learning_rate": 4.999999759447197e-06, "loss": 0.7193, "step": 477 }, { "epoch": 0.1888861447270931, "grad_norm": 0.7332659964684728, "learning_rate": 4.999999685808585e-06, "loss": 0.7367, "step": 478 }, { "epoch": 0.18928130402568535, "grad_norm": 0.6434935616449193, "learning_rate": 4.999999602351493e-06, "loss": 0.7241, "step": 479 }, { "epoch": 0.1896764633242776, "grad_norm": 0.7085133055543303, "learning_rate": 4.99999950907592e-06, "loss": 0.7294, "step": 480 }, { "epoch": 0.19007162262286983, "grad_norm": 0.6037858004944545, "learning_rate": 4.999999405981868e-06, "loss": 0.7169, "step": 481 }, { "epoch": 0.1904667819214621, "grad_norm": 0.6353596283356293, "learning_rate": 4.999999293069335e-06, "loss": 0.7255, "step": 482 }, { "epoch": 0.19086194122005434, "grad_norm": 0.6011323842788836, "learning_rate": 4.999999170338324e-06, "loss": 0.7098, "step": 483 }, { "epoch": 0.19125710051864658, "grad_norm": 0.685601090106948, "learning_rate": 4.999999037788834e-06, "loss": 0.7333, "step": 484 }, { "epoch": 0.19165225981723882, "grad_norm": 0.6377038979338291, "learning_rate": 4.999998895420866e-06, "loss": 0.7257, "step": 485 }, { "epoch": 0.19204741911583106, "grad_norm": 0.6192918261508922, "learning_rate": 4.9999987432344195e-06, "loss": 0.7275, "step": 486 }, { "epoch": 0.1924425784144233, "grad_norm": 0.6720726722815289, "learning_rate": 4.999998581229497e-06, "loss": 0.7209, "step": 487 }, { "epoch": 0.19283773771301557, "grad_norm": 0.5958748564691253, "learning_rate": 4.999998409406098e-06, "loss": 0.7182, "step": 488 }, { "epoch": 0.1932328970116078, "grad_norm": 0.6523018913824231, "learning_rate": 4.999998227764223e-06, "loss": 0.7223, "step": 489 }, { "epoch": 0.19362805631020005, "grad_norm": 0.6969026955713506, "learning_rate": 4.999998036303873e-06, "loss": 0.7017, "step": 490 }, { "epoch": 0.1940232156087923, "grad_norm": 0.6199690598086954, "learning_rate": 4.999997835025049e-06, "loss": 0.724, "step": 491 }, { "epoch": 0.19441837490738453, "grad_norm": 0.6899050040288978, "learning_rate": 4.999997623927752e-06, "loss": 0.702, "step": 492 }, { "epoch": 0.19481353420597677, "grad_norm": 0.6023461037422734, "learning_rate": 4.999997403011982e-06, "loss": 0.7192, "step": 493 }, { "epoch": 0.19520869350456904, "grad_norm": 0.7949013705151751, "learning_rate": 4.9999971722777395e-06, "loss": 0.7214, "step": 494 }, { "epoch": 0.19560385280316128, "grad_norm": 0.8406697228012115, "learning_rate": 4.9999969317250276e-06, "loss": 0.7511, "step": 495 }, { "epoch": 0.19599901210175352, "grad_norm": 0.6165479711982665, "learning_rate": 4.999996681353845e-06, "loss": 0.7129, "step": 496 }, { "epoch": 0.19639417140034576, "grad_norm": 0.6365598818327172, "learning_rate": 4.999996421164194e-06, "loss": 0.724, "step": 497 }, { "epoch": 0.196789330698938, "grad_norm": 0.5804862939924182, "learning_rate": 4.999996151156075e-06, "loss": 0.7208, "step": 498 }, { "epoch": 0.19718448999753024, "grad_norm": 0.8043069867513684, "learning_rate": 4.9999958713294886e-06, "loss": 0.7316, "step": 499 }, { "epoch": 0.1975796492961225, "grad_norm": 0.7039727180630397, "learning_rate": 4.999995581684437e-06, "loss": 0.7273, "step": 500 }, { "epoch": 0.19797480859471475, "grad_norm": 0.5796311427949719, "learning_rate": 4.999995282220921e-06, "loss": 0.7353, "step": 501 }, { "epoch": 0.198369967893307, "grad_norm": 0.640816004878865, "learning_rate": 4.999994972938941e-06, "loss": 0.718, "step": 502 }, { "epoch": 0.19876512719189923, "grad_norm": 0.6625901203049049, "learning_rate": 4.9999946538384995e-06, "loss": 0.717, "step": 503 }, { "epoch": 0.19916028649049147, "grad_norm": 0.731900514664347, "learning_rate": 4.999994324919596e-06, "loss": 0.7154, "step": 504 }, { "epoch": 0.1995554457890837, "grad_norm": 0.6358220991451766, "learning_rate": 4.9999939861822345e-06, "loss": 0.7135, "step": 505 }, { "epoch": 0.19995060508767598, "grad_norm": 0.6772742599590151, "learning_rate": 4.999993637626413e-06, "loss": 0.7142, "step": 506 }, { "epoch": 0.20034576438626822, "grad_norm": 0.6203815751394517, "learning_rate": 4.999993279252136e-06, "loss": 0.7094, "step": 507 }, { "epoch": 0.20074092368486046, "grad_norm": 0.6159319745437677, "learning_rate": 4.999992911059402e-06, "loss": 0.7097, "step": 508 }, { "epoch": 0.2011360829834527, "grad_norm": 0.6394572431227349, "learning_rate": 4.999992533048216e-06, "loss": 0.7389, "step": 509 }, { "epoch": 0.20153124228204494, "grad_norm": 0.5730980921754206, "learning_rate": 4.999992145218576e-06, "loss": 0.7148, "step": 510 }, { "epoch": 0.20192640158063718, "grad_norm": 0.642558910052563, "learning_rate": 4.999991747570485e-06, "loss": 0.6986, "step": 511 }, { "epoch": 0.20232156087922945, "grad_norm": 0.7198501180731144, "learning_rate": 4.999991340103944e-06, "loss": 0.7169, "step": 512 }, { "epoch": 0.2027167201778217, "grad_norm": 0.5448656307534819, "learning_rate": 4.999990922818957e-06, "loss": 0.6977, "step": 513 }, { "epoch": 0.20311187947641393, "grad_norm": 0.6205152271493748, "learning_rate": 4.999990495715522e-06, "loss": 0.7289, "step": 514 }, { "epoch": 0.20350703877500617, "grad_norm": 0.5899827817607862, "learning_rate": 4.9999900587936425e-06, "loss": 0.7269, "step": 515 }, { "epoch": 0.2039021980735984, "grad_norm": 0.6379914033363828, "learning_rate": 4.999989612053321e-06, "loss": 0.7031, "step": 516 }, { "epoch": 0.20429735737219065, "grad_norm": 0.63396345510458, "learning_rate": 4.999989155494558e-06, "loss": 0.7113, "step": 517 }, { "epoch": 0.20469251667078292, "grad_norm": 0.5769043304193019, "learning_rate": 4.999988689117355e-06, "loss": 0.7371, "step": 518 }, { "epoch": 0.20508767596937516, "grad_norm": 0.6508775630628256, "learning_rate": 4.999988212921715e-06, "loss": 0.6937, "step": 519 }, { "epoch": 0.2054828352679674, "grad_norm": 0.6868129416605413, "learning_rate": 4.99998772690764e-06, "loss": 0.7383, "step": 520 }, { "epoch": 0.20587799456655964, "grad_norm": 0.5389127646966064, "learning_rate": 4.9999872310751305e-06, "loss": 0.6993, "step": 521 }, { "epoch": 0.20627315386515188, "grad_norm": 0.6060273410792214, "learning_rate": 4.9999867254241894e-06, "loss": 0.7118, "step": 522 }, { "epoch": 0.20666831316374412, "grad_norm": 0.5800631862306052, "learning_rate": 4.9999862099548186e-06, "loss": 0.7323, "step": 523 }, { "epoch": 0.2070634724623364, "grad_norm": 0.7534794475732837, "learning_rate": 4.9999856846670195e-06, "loss": 0.7228, "step": 524 }, { "epoch": 0.20745863176092863, "grad_norm": 0.5812327339287102, "learning_rate": 4.999985149560796e-06, "loss": 0.7072, "step": 525 }, { "epoch": 0.20785379105952087, "grad_norm": 0.5768618184120087, "learning_rate": 4.999984604636148e-06, "loss": 0.7088, "step": 526 }, { "epoch": 0.2082489503581131, "grad_norm": 0.6399784431138891, "learning_rate": 4.999984049893078e-06, "loss": 0.7149, "step": 527 }, { "epoch": 0.20864410965670535, "grad_norm": 0.5623651744286807, "learning_rate": 4.99998348533159e-06, "loss": 0.7226, "step": 528 }, { "epoch": 0.2090392689552976, "grad_norm": 0.6963865673449156, "learning_rate": 4.999982910951684e-06, "loss": 0.7233, "step": 529 }, { "epoch": 0.20943442825388986, "grad_norm": 0.6478334973673012, "learning_rate": 4.999982326753363e-06, "loss": 0.7262, "step": 530 }, { "epoch": 0.2098295875524821, "grad_norm": 0.5449147628310747, "learning_rate": 4.9999817327366305e-06, "loss": 0.7072, "step": 531 }, { "epoch": 0.21022474685107434, "grad_norm": 0.5817196907571065, "learning_rate": 4.9999811289014876e-06, "loss": 0.7127, "step": 532 }, { "epoch": 0.21061990614966658, "grad_norm": 0.5908688502826027, "learning_rate": 4.999980515247936e-06, "loss": 0.7232, "step": 533 }, { "epoch": 0.21101506544825882, "grad_norm": 0.5636398728531682, "learning_rate": 4.99997989177598e-06, "loss": 0.7351, "step": 534 }, { "epoch": 0.21141022474685106, "grad_norm": 0.626447595665311, "learning_rate": 4.9999792584856204e-06, "loss": 0.7069, "step": 535 }, { "epoch": 0.21180538404544333, "grad_norm": 0.5779788065467234, "learning_rate": 4.999978615376861e-06, "loss": 0.7109, "step": 536 }, { "epoch": 0.21220054334403557, "grad_norm": 0.5720137928958787, "learning_rate": 4.999977962449703e-06, "loss": 0.7117, "step": 537 }, { "epoch": 0.2125957026426278, "grad_norm": 0.5741484180144606, "learning_rate": 4.99997729970415e-06, "loss": 0.7004, "step": 538 }, { "epoch": 0.21299086194122005, "grad_norm": 0.6385491107900715, "learning_rate": 4.999976627140204e-06, "loss": 0.7269, "step": 539 }, { "epoch": 0.2133860212398123, "grad_norm": 0.5816500717084315, "learning_rate": 4.999975944757868e-06, "loss": 0.7036, "step": 540 }, { "epoch": 0.21378118053840453, "grad_norm": 0.5659777534165991, "learning_rate": 4.999975252557145e-06, "loss": 0.7105, "step": 541 }, { "epoch": 0.2141763398369968, "grad_norm": 0.5749663433605801, "learning_rate": 4.999974550538037e-06, "loss": 0.7264, "step": 542 }, { "epoch": 0.21457149913558904, "grad_norm": 0.6112261810413965, "learning_rate": 4.999973838700547e-06, "loss": 0.7153, "step": 543 }, { "epoch": 0.21496665843418128, "grad_norm": 0.5490028470815621, "learning_rate": 4.999973117044678e-06, "loss": 0.7083, "step": 544 }, { "epoch": 0.21536181773277352, "grad_norm": 0.7622632899364211, "learning_rate": 4.9999723855704315e-06, "loss": 0.736, "step": 545 }, { "epoch": 0.21575697703136576, "grad_norm": 0.596735586828792, "learning_rate": 4.999971644277812e-06, "loss": 0.6858, "step": 546 }, { "epoch": 0.21615213632995803, "grad_norm": 0.6527535984449617, "learning_rate": 4.999970893166823e-06, "loss": 0.722, "step": 547 }, { "epoch": 0.21654729562855027, "grad_norm": 0.652324953771554, "learning_rate": 4.999970132237466e-06, "loss": 0.7424, "step": 548 }, { "epoch": 0.2169424549271425, "grad_norm": 0.5908339949824033, "learning_rate": 4.999969361489744e-06, "loss": 0.7227, "step": 549 }, { "epoch": 0.21733761422573475, "grad_norm": 0.6629809077478138, "learning_rate": 4.99996858092366e-06, "loss": 0.6958, "step": 550 }, { "epoch": 0.217732773524327, "grad_norm": 0.5704721132671529, "learning_rate": 4.9999677905392185e-06, "loss": 0.7056, "step": 551 }, { "epoch": 0.21812793282291923, "grad_norm": 0.5883161489370882, "learning_rate": 4.9999669903364215e-06, "loss": 0.7131, "step": 552 }, { "epoch": 0.2185230921215115, "grad_norm": 0.6166470675766057, "learning_rate": 4.999966180315271e-06, "loss": 0.7088, "step": 553 }, { "epoch": 0.21891825142010374, "grad_norm": 0.5742237649639247, "learning_rate": 4.999965360475773e-06, "loss": 0.6977, "step": 554 }, { "epoch": 0.21931341071869598, "grad_norm": 0.6751101103583117, "learning_rate": 4.9999645308179275e-06, "loss": 0.7072, "step": 555 }, { "epoch": 0.21970857001728822, "grad_norm": 0.5984737347193803, "learning_rate": 4.999963691341741e-06, "loss": 0.7147, "step": 556 }, { "epoch": 0.22010372931588046, "grad_norm": 0.6110992520398403, "learning_rate": 4.999962842047214e-06, "loss": 0.7013, "step": 557 }, { "epoch": 0.2204988886144727, "grad_norm": 0.768651010687167, "learning_rate": 4.999961982934352e-06, "loss": 0.708, "step": 558 }, { "epoch": 0.22089404791306497, "grad_norm": 0.5750080834238912, "learning_rate": 4.999961114003156e-06, "loss": 0.7163, "step": 559 }, { "epoch": 0.2212892072116572, "grad_norm": 0.6908491657265623, "learning_rate": 4.9999602352536316e-06, "loss": 0.703, "step": 560 }, { "epoch": 0.22168436651024945, "grad_norm": 0.6747348114487975, "learning_rate": 4.999959346685782e-06, "loss": 0.7156, "step": 561 }, { "epoch": 0.2220795258088417, "grad_norm": 0.6777010126648408, "learning_rate": 4.999958448299609e-06, "loss": 0.7221, "step": 562 }, { "epoch": 0.22247468510743393, "grad_norm": 0.7149124473295017, "learning_rate": 4.9999575400951185e-06, "loss": 0.7433, "step": 563 }, { "epoch": 0.22286984440602617, "grad_norm": 0.5509899586140755, "learning_rate": 4.999956622072312e-06, "loss": 0.7023, "step": 564 }, { "epoch": 0.22326500370461844, "grad_norm": 0.7820264763430183, "learning_rate": 4.999955694231194e-06, "loss": 0.7288, "step": 565 }, { "epoch": 0.22366016300321068, "grad_norm": 0.5919945717677796, "learning_rate": 4.999954756571769e-06, "loss": 0.7152, "step": 566 }, { "epoch": 0.22405532230180292, "grad_norm": 0.6535677585414817, "learning_rate": 4.999953809094038e-06, "loss": 0.7083, "step": 567 }, { "epoch": 0.22445048160039516, "grad_norm": 0.7785050994873816, "learning_rate": 4.999952851798008e-06, "loss": 0.7062, "step": 568 }, { "epoch": 0.2248456408989874, "grad_norm": 0.5482528691036233, "learning_rate": 4.99995188468368e-06, "loss": 0.6968, "step": 569 }, { "epoch": 0.22524080019757964, "grad_norm": 0.7313401830631631, "learning_rate": 4.99995090775106e-06, "loss": 0.7196, "step": 570 }, { "epoch": 0.2256359594961719, "grad_norm": 0.7311084419182904, "learning_rate": 4.999949921000151e-06, "loss": 0.6959, "step": 571 }, { "epoch": 0.22603111879476415, "grad_norm": 0.640055542754839, "learning_rate": 4.999948924430956e-06, "loss": 0.7233, "step": 572 }, { "epoch": 0.2264262780933564, "grad_norm": 0.7564396262457012, "learning_rate": 4.9999479180434805e-06, "loss": 0.7259, "step": 573 }, { "epoch": 0.22682143739194863, "grad_norm": 0.5567836517749442, "learning_rate": 4.999946901837727e-06, "loss": 0.732, "step": 574 }, { "epoch": 0.22721659669054087, "grad_norm": 0.6565826035434957, "learning_rate": 4.999945875813701e-06, "loss": 0.7107, "step": 575 }, { "epoch": 0.2276117559891331, "grad_norm": 0.6555275013117167, "learning_rate": 4.999944839971404e-06, "loss": 0.7065, "step": 576 }, { "epoch": 0.22800691528772538, "grad_norm": 0.5750121597094802, "learning_rate": 4.999943794310844e-06, "loss": 0.7168, "step": 577 }, { "epoch": 0.22840207458631762, "grad_norm": 0.7172808104044253, "learning_rate": 4.999942738832021e-06, "loss": 0.7035, "step": 578 }, { "epoch": 0.22879723388490986, "grad_norm": 0.681813584446898, "learning_rate": 4.999941673534941e-06, "loss": 0.7278, "step": 579 }, { "epoch": 0.2291923931835021, "grad_norm": 0.6129736074024745, "learning_rate": 4.999940598419609e-06, "loss": 0.7207, "step": 580 }, { "epoch": 0.22958755248209434, "grad_norm": 0.6921872225622865, "learning_rate": 4.999939513486028e-06, "loss": 0.7079, "step": 581 }, { "epoch": 0.22998271178068658, "grad_norm": 0.6070410601571897, "learning_rate": 4.9999384187342035e-06, "loss": 0.7149, "step": 582 }, { "epoch": 0.23037787107927885, "grad_norm": 0.6700679528371282, "learning_rate": 4.999937314164138e-06, "loss": 0.7091, "step": 583 }, { "epoch": 0.23077303037787109, "grad_norm": 0.7175503981303464, "learning_rate": 4.999936199775836e-06, "loss": 0.7044, "step": 584 }, { "epoch": 0.23116818967646333, "grad_norm": 0.5372712731465968, "learning_rate": 4.999935075569304e-06, "loss": 0.7185, "step": 585 }, { "epoch": 0.23156334897505557, "grad_norm": 0.6626410452980375, "learning_rate": 4.999933941544544e-06, "loss": 0.7061, "step": 586 }, { "epoch": 0.2319585082736478, "grad_norm": 0.6219349768601438, "learning_rate": 4.999932797701563e-06, "loss": 0.7027, "step": 587 }, { "epoch": 0.23235366757224005, "grad_norm": 0.622944974108998, "learning_rate": 4.999931644040363e-06, "loss": 0.7041, "step": 588 }, { "epoch": 0.23274882687083231, "grad_norm": 0.6403160681420452, "learning_rate": 4.999930480560949e-06, "loss": 0.713, "step": 589 }, { "epoch": 0.23314398616942456, "grad_norm": 0.5810884528951928, "learning_rate": 4.999929307263327e-06, "loss": 0.6959, "step": 590 }, { "epoch": 0.2335391454680168, "grad_norm": 0.5617194856604512, "learning_rate": 4.999928124147501e-06, "loss": 0.7134, "step": 591 }, { "epoch": 0.23393430476660904, "grad_norm": 0.5794486892578056, "learning_rate": 4.999926931213475e-06, "loss": 0.6581, "step": 592 }, { "epoch": 0.23432946406520128, "grad_norm": 0.6010638355759388, "learning_rate": 4.999925728461254e-06, "loss": 0.6905, "step": 593 }, { "epoch": 0.23472462336379352, "grad_norm": 0.5698458903870885, "learning_rate": 4.999924515890843e-06, "loss": 0.6929, "step": 594 }, { "epoch": 0.23511978266238578, "grad_norm": 0.6038481326467747, "learning_rate": 4.9999232935022455e-06, "loss": 0.7134, "step": 595 }, { "epoch": 0.23551494196097802, "grad_norm": 0.5575437681930879, "learning_rate": 4.9999220612954685e-06, "loss": 0.686, "step": 596 }, { "epoch": 0.23591010125957027, "grad_norm": 0.5582827483407896, "learning_rate": 4.999920819270515e-06, "loss": 0.677, "step": 597 }, { "epoch": 0.2363052605581625, "grad_norm": 0.5581039778387297, "learning_rate": 4.999919567427391e-06, "loss": 0.7041, "step": 598 }, { "epoch": 0.23670041985675475, "grad_norm": 0.5926274884146744, "learning_rate": 4.999918305766101e-06, "loss": 0.7131, "step": 599 }, { "epoch": 0.23709557915534699, "grad_norm": 0.5683616438789352, "learning_rate": 4.99991703428665e-06, "loss": 0.7004, "step": 600 }, { "epoch": 0.23749073845393925, "grad_norm": 0.5730277632443862, "learning_rate": 4.999915752989042e-06, "loss": 0.7056, "step": 601 }, { "epoch": 0.2378858977525315, "grad_norm": 0.582494416565038, "learning_rate": 4.999914461873283e-06, "loss": 0.7116, "step": 602 }, { "epoch": 0.23828105705112373, "grad_norm": 0.5966610530038993, "learning_rate": 4.9999131609393795e-06, "loss": 0.7284, "step": 603 }, { "epoch": 0.23867621634971597, "grad_norm": 0.5558683547691745, "learning_rate": 4.999911850187335e-06, "loss": 0.6998, "step": 604 }, { "epoch": 0.23907137564830822, "grad_norm": 0.6258177810251072, "learning_rate": 4.999910529617154e-06, "loss": 0.6855, "step": 605 }, { "epoch": 0.23946653494690046, "grad_norm": 0.5318478473731086, "learning_rate": 4.999909199228841e-06, "loss": 0.6795, "step": 606 }, { "epoch": 0.23986169424549272, "grad_norm": 0.5547878659686439, "learning_rate": 4.999907859022405e-06, "loss": 0.6953, "step": 607 }, { "epoch": 0.24025685354408496, "grad_norm": 0.5931673987268137, "learning_rate": 4.9999065089978485e-06, "loss": 0.7148, "step": 608 }, { "epoch": 0.2406520128426772, "grad_norm": 0.568231220260507, "learning_rate": 4.9999051491551766e-06, "loss": 0.695, "step": 609 }, { "epoch": 0.24104717214126944, "grad_norm": 0.5808912372610399, "learning_rate": 4.999903779494395e-06, "loss": 0.7074, "step": 610 }, { "epoch": 0.24144233143986168, "grad_norm": 0.5824501699853736, "learning_rate": 4.9999024000155104e-06, "loss": 0.7017, "step": 611 }, { "epoch": 0.24183749073845395, "grad_norm": 0.5393723920629134, "learning_rate": 4.9999010107185264e-06, "loss": 0.6646, "step": 612 }, { "epoch": 0.2422326500370462, "grad_norm": 0.5980491294226428, "learning_rate": 4.99989961160345e-06, "loss": 0.6794, "step": 613 }, { "epoch": 0.24262780933563843, "grad_norm": 0.6188242993162584, "learning_rate": 4.999898202670286e-06, "loss": 0.705, "step": 614 }, { "epoch": 0.24302296863423067, "grad_norm": 0.5890575366759165, "learning_rate": 4.9998967839190384e-06, "loss": 0.7118, "step": 615 }, { "epoch": 0.24341812793282291, "grad_norm": 0.6031363408649272, "learning_rate": 4.999895355349716e-06, "loss": 0.7248, "step": 616 }, { "epoch": 0.24381328723141515, "grad_norm": 0.5917614651923631, "learning_rate": 4.9998939169623215e-06, "loss": 0.696, "step": 617 }, { "epoch": 0.24420844653000742, "grad_norm": 0.594908607458186, "learning_rate": 4.9998924687568625e-06, "loss": 0.7036, "step": 618 }, { "epoch": 0.24460360582859966, "grad_norm": 0.6727052500603081, "learning_rate": 4.999891010733343e-06, "loss": 0.6824, "step": 619 }, { "epoch": 0.2449987651271919, "grad_norm": 0.598235520036059, "learning_rate": 4.99988954289177e-06, "loss": 0.7238, "step": 620 }, { "epoch": 0.24539392442578414, "grad_norm": 0.7176084933336286, "learning_rate": 4.999888065232149e-06, "loss": 0.7076, "step": 621 }, { "epoch": 0.24578908372437638, "grad_norm": 0.5872860940803798, "learning_rate": 4.999886577754486e-06, "loss": 0.702, "step": 622 }, { "epoch": 0.24618424302296862, "grad_norm": 0.6143484421432265, "learning_rate": 4.999885080458786e-06, "loss": 0.6923, "step": 623 }, { "epoch": 0.2465794023215609, "grad_norm": 0.6537296010257171, "learning_rate": 4.9998835733450564e-06, "loss": 0.6996, "step": 624 }, { "epoch": 0.24697456162015313, "grad_norm": 0.5652281166809588, "learning_rate": 4.9998820564133016e-06, "loss": 0.6926, "step": 625 }, { "epoch": 0.24736972091874537, "grad_norm": 0.6841779465916256, "learning_rate": 4.999880529663528e-06, "loss": 0.708, "step": 626 }, { "epoch": 0.2477648802173376, "grad_norm": 0.618999997435258, "learning_rate": 4.9998789930957415e-06, "loss": 0.7009, "step": 627 }, { "epoch": 0.24816003951592985, "grad_norm": 0.5814213814123862, "learning_rate": 4.999877446709949e-06, "loss": 0.7166, "step": 628 }, { "epoch": 0.2485551988145221, "grad_norm": 0.6410458092239222, "learning_rate": 4.999875890506155e-06, "loss": 0.701, "step": 629 }, { "epoch": 0.24895035811311436, "grad_norm": 0.5643287481712287, "learning_rate": 4.999874324484367e-06, "loss": 0.6781, "step": 630 }, { "epoch": 0.2493455174117066, "grad_norm": 0.719129519371788, "learning_rate": 4.9998727486445895e-06, "loss": 0.71, "step": 631 }, { "epoch": 0.24974067671029884, "grad_norm": 0.643749171305181, "learning_rate": 4.999871162986831e-06, "loss": 0.6954, "step": 632 }, { "epoch": 0.2501358360088911, "grad_norm": 0.5312855320703463, "learning_rate": 4.999869567511097e-06, "loss": 0.6619, "step": 633 }, { "epoch": 0.25053099530748335, "grad_norm": 0.5908234255298699, "learning_rate": 4.9998679622173925e-06, "loss": 0.7128, "step": 634 }, { "epoch": 0.2509261546060756, "grad_norm": 0.6073203092125553, "learning_rate": 4.999866347105725e-06, "loss": 0.6908, "step": 635 }, { "epoch": 0.25132131390466783, "grad_norm": 0.5785049800687088, "learning_rate": 4.999864722176101e-06, "loss": 0.6863, "step": 636 }, { "epoch": 0.25171647320326007, "grad_norm": 0.6312245040925751, "learning_rate": 4.999863087428526e-06, "loss": 0.6905, "step": 637 }, { "epoch": 0.2521116325018523, "grad_norm": 0.596727469474082, "learning_rate": 4.999861442863005e-06, "loss": 0.6822, "step": 638 }, { "epoch": 0.25250679180044455, "grad_norm": 0.5678026535695422, "learning_rate": 4.999859788479548e-06, "loss": 0.6999, "step": 639 }, { "epoch": 0.2529019510990368, "grad_norm": 0.548613923585835, "learning_rate": 4.99985812427816e-06, "loss": 0.7132, "step": 640 }, { "epoch": 0.25329711039762903, "grad_norm": 0.5873264929153031, "learning_rate": 4.999856450258847e-06, "loss": 0.7207, "step": 641 }, { "epoch": 0.2536922696962213, "grad_norm": 0.5678140101223359, "learning_rate": 4.9998547664216155e-06, "loss": 0.7106, "step": 642 }, { "epoch": 0.2540874289948135, "grad_norm": 0.5578970132548404, "learning_rate": 4.999853072766473e-06, "loss": 0.7125, "step": 643 }, { "epoch": 0.25448258829340575, "grad_norm": 0.6041251604589954, "learning_rate": 4.999851369293425e-06, "loss": 0.6901, "step": 644 }, { "epoch": 0.25487774759199805, "grad_norm": 0.5736028260838277, "learning_rate": 4.99984965600248e-06, "loss": 0.7116, "step": 645 }, { "epoch": 0.2552729068905903, "grad_norm": 0.532105428102255, "learning_rate": 4.999847932893642e-06, "loss": 0.6927, "step": 646 }, { "epoch": 0.25566806618918253, "grad_norm": 0.6413099649877119, "learning_rate": 4.99984619996692e-06, "loss": 0.6916, "step": 647 }, { "epoch": 0.25606322548777477, "grad_norm": 0.5690991015722213, "learning_rate": 4.9998444572223205e-06, "loss": 0.7192, "step": 648 }, { "epoch": 0.256458384786367, "grad_norm": 0.5557660152572236, "learning_rate": 4.999842704659849e-06, "loss": 0.6832, "step": 649 }, { "epoch": 0.25685354408495925, "grad_norm": 0.5617270616167598, "learning_rate": 4.999840942279514e-06, "loss": 0.6878, "step": 650 }, { "epoch": 0.2572487033835515, "grad_norm": 0.5666842673944367, "learning_rate": 4.999839170081322e-06, "loss": 0.6672, "step": 651 }, { "epoch": 0.25764386268214373, "grad_norm": 0.5623407827389078, "learning_rate": 4.999837388065279e-06, "loss": 0.689, "step": 652 }, { "epoch": 0.25803902198073597, "grad_norm": 0.5610766033323613, "learning_rate": 4.999835596231392e-06, "loss": 0.7217, "step": 653 }, { "epoch": 0.2584341812793282, "grad_norm": 0.5941036719520371, "learning_rate": 4.999833794579671e-06, "loss": 0.704, "step": 654 }, { "epoch": 0.25882934057792045, "grad_norm": 0.5527677491351838, "learning_rate": 4.999831983110119e-06, "loss": 0.7063, "step": 655 }, { "epoch": 0.2592244998765127, "grad_norm": 0.5896815383284895, "learning_rate": 4.999830161822746e-06, "loss": 0.6823, "step": 656 }, { "epoch": 0.259619659175105, "grad_norm": 0.5617257867146465, "learning_rate": 4.999828330717558e-06, "loss": 0.6952, "step": 657 }, { "epoch": 0.26001481847369723, "grad_norm": 0.5841626881089839, "learning_rate": 4.999826489794562e-06, "loss": 0.6801, "step": 658 }, { "epoch": 0.26040997777228947, "grad_norm": 0.5646406469988224, "learning_rate": 4.9998246390537655e-06, "loss": 0.6812, "step": 659 }, { "epoch": 0.2608051370708817, "grad_norm": 0.5750406721528946, "learning_rate": 4.999822778495176e-06, "loss": 0.6943, "step": 660 }, { "epoch": 0.26120029636947395, "grad_norm": 0.5559705390104022, "learning_rate": 4.999820908118801e-06, "loss": 0.6763, "step": 661 }, { "epoch": 0.2615954556680662, "grad_norm": 0.5562879316008681, "learning_rate": 4.999819027924646e-06, "loss": 0.6832, "step": 662 }, { "epoch": 0.26199061496665843, "grad_norm": 0.5921377833036137, "learning_rate": 4.999817137912721e-06, "loss": 0.6858, "step": 663 }, { "epoch": 0.26238577426525067, "grad_norm": 0.567426550861614, "learning_rate": 4.9998152380830325e-06, "loss": 0.7036, "step": 664 }, { "epoch": 0.2627809335638429, "grad_norm": 0.6061252730535107, "learning_rate": 4.999813328435587e-06, "loss": 0.6858, "step": 665 }, { "epoch": 0.26317609286243515, "grad_norm": 0.5678734043512008, "learning_rate": 4.999811408970392e-06, "loss": 0.7152, "step": 666 }, { "epoch": 0.2635712521610274, "grad_norm": 0.6033593786362805, "learning_rate": 4.999809479687457e-06, "loss": 0.6913, "step": 667 }, { "epoch": 0.26396641145961963, "grad_norm": 0.5744590883863819, "learning_rate": 4.999807540586788e-06, "loss": 0.7029, "step": 668 }, { "epoch": 0.26436157075821193, "grad_norm": 0.5281778655339753, "learning_rate": 4.999805591668393e-06, "loss": 0.7049, "step": 669 }, { "epoch": 0.26475673005680417, "grad_norm": 0.5585259313584738, "learning_rate": 4.99980363293228e-06, "loss": 0.673, "step": 670 }, { "epoch": 0.2651518893553964, "grad_norm": 0.5594694999229931, "learning_rate": 4.999801664378455e-06, "loss": 0.6827, "step": 671 }, { "epoch": 0.26554704865398865, "grad_norm": 0.5996606992971393, "learning_rate": 4.999799686006928e-06, "loss": 0.7072, "step": 672 }, { "epoch": 0.2659422079525809, "grad_norm": 0.5580082863684926, "learning_rate": 4.999797697817706e-06, "loss": 0.7039, "step": 673 }, { "epoch": 0.26633736725117313, "grad_norm": 0.521406203903939, "learning_rate": 4.999795699810796e-06, "loss": 0.6886, "step": 674 }, { "epoch": 0.26673252654976537, "grad_norm": 0.5672071403136022, "learning_rate": 4.9997936919862065e-06, "loss": 0.6799, "step": 675 }, { "epoch": 0.2671276858483576, "grad_norm": 0.5568748196172835, "learning_rate": 4.999791674343945e-06, "loss": 0.7012, "step": 676 }, { "epoch": 0.26752284514694985, "grad_norm": 0.5593131719229575, "learning_rate": 4.999789646884021e-06, "loss": 0.6841, "step": 677 }, { "epoch": 0.2679180044455421, "grad_norm": 0.5731216777632498, "learning_rate": 4.99978760960644e-06, "loss": 0.7111, "step": 678 }, { "epoch": 0.26831316374413433, "grad_norm": 0.6026088473088514, "learning_rate": 4.999785562511212e-06, "loss": 0.7065, "step": 679 }, { "epoch": 0.2687083230427266, "grad_norm": 0.5516806935780276, "learning_rate": 4.999783505598344e-06, "loss": 0.6909, "step": 680 }, { "epoch": 0.26910348234131887, "grad_norm": 0.5702038400823601, "learning_rate": 4.999781438867844e-06, "loss": 0.6734, "step": 681 }, { "epoch": 0.2694986416399111, "grad_norm": 0.5746782762066555, "learning_rate": 4.999779362319721e-06, "loss": 0.7042, "step": 682 }, { "epoch": 0.26989380093850335, "grad_norm": 0.6007328759815763, "learning_rate": 4.9997772759539825e-06, "loss": 0.6971, "step": 683 }, { "epoch": 0.2702889602370956, "grad_norm": 0.5658032828435371, "learning_rate": 4.999775179770637e-06, "loss": 0.6741, "step": 684 }, { "epoch": 0.27068411953568783, "grad_norm": 0.5445910362376495, "learning_rate": 4.999773073769692e-06, "loss": 0.6914, "step": 685 }, { "epoch": 0.27107927883428007, "grad_norm": 0.5625355418392146, "learning_rate": 4.9997709579511566e-06, "loss": 0.6778, "step": 686 }, { "epoch": 0.2714744381328723, "grad_norm": 0.5558770779852484, "learning_rate": 4.999768832315038e-06, "loss": 0.675, "step": 687 }, { "epoch": 0.27186959743146455, "grad_norm": 0.5384961627107955, "learning_rate": 4.999766696861346e-06, "loss": 0.6881, "step": 688 }, { "epoch": 0.2722647567300568, "grad_norm": 0.5646517085086373, "learning_rate": 4.9997645515900895e-06, "loss": 0.6737, "step": 689 }, { "epoch": 0.27265991602864903, "grad_norm": 0.5769838182468013, "learning_rate": 4.9997623965012745e-06, "loss": 0.6718, "step": 690 }, { "epoch": 0.27305507532724127, "grad_norm": 0.7282357435191615, "learning_rate": 4.999760231594911e-06, "loss": 0.7049, "step": 691 }, { "epoch": 0.27345023462583357, "grad_norm": 0.5279126762131758, "learning_rate": 4.999758056871007e-06, "loss": 0.6621, "step": 692 }, { "epoch": 0.2738453939244258, "grad_norm": 0.5706899106672201, "learning_rate": 4.999755872329571e-06, "loss": 0.6883, "step": 693 }, { "epoch": 0.27424055322301805, "grad_norm": 0.5563908836582794, "learning_rate": 4.999753677970614e-06, "loss": 0.7143, "step": 694 }, { "epoch": 0.2746357125216103, "grad_norm": 0.5362870289511571, "learning_rate": 4.9997514737941396e-06, "loss": 0.684, "step": 695 }, { "epoch": 0.2750308718202025, "grad_norm": 0.6153860851792133, "learning_rate": 4.9997492598001605e-06, "loss": 0.6883, "step": 696 }, { "epoch": 0.27542603111879477, "grad_norm": 0.546338501558778, "learning_rate": 4.999747035988684e-06, "loss": 0.6853, "step": 697 }, { "epoch": 0.275821190417387, "grad_norm": 0.5411393398480797, "learning_rate": 4.999744802359721e-06, "loss": 0.6759, "step": 698 }, { "epoch": 0.27621634971597925, "grad_norm": 0.5820635145009669, "learning_rate": 4.999742558913276e-06, "loss": 0.69, "step": 699 }, { "epoch": 0.2766115090145715, "grad_norm": 0.519802903820158, "learning_rate": 4.999740305649361e-06, "loss": 0.689, "step": 700 }, { "epoch": 0.27700666831316373, "grad_norm": 0.5335002406037318, "learning_rate": 4.999738042567984e-06, "loss": 0.6733, "step": 701 }, { "epoch": 0.27740182761175597, "grad_norm": 0.6251340494736801, "learning_rate": 4.999735769669154e-06, "loss": 0.6947, "step": 702 }, { "epoch": 0.2777969869103482, "grad_norm": 0.5568465012685252, "learning_rate": 4.999733486952879e-06, "loss": 0.6791, "step": 703 }, { "epoch": 0.2781921462089405, "grad_norm": 0.5677313063564176, "learning_rate": 4.9997311944191695e-06, "loss": 0.6653, "step": 704 }, { "epoch": 0.27858730550753275, "grad_norm": 0.5398469806124272, "learning_rate": 4.9997288920680345e-06, "loss": 0.6708, "step": 705 }, { "epoch": 0.278982464806125, "grad_norm": 0.5249202618448952, "learning_rate": 4.999726579899481e-06, "loss": 0.6714, "step": 706 }, { "epoch": 0.2793776241047172, "grad_norm": 0.5479464511682306, "learning_rate": 4.99972425791352e-06, "loss": 0.6884, "step": 707 }, { "epoch": 0.27977278340330947, "grad_norm": 0.612951514131732, "learning_rate": 4.99972192611016e-06, "loss": 0.6934, "step": 708 }, { "epoch": 0.2801679427019017, "grad_norm": 0.5472856818707729, "learning_rate": 4.999719584489409e-06, "loss": 0.6927, "step": 709 }, { "epoch": 0.28056310200049395, "grad_norm": 0.5711839364829995, "learning_rate": 4.99971723305128e-06, "loss": 0.6993, "step": 710 }, { "epoch": 0.2809582612990862, "grad_norm": 0.5344843617908501, "learning_rate": 4.999714871795778e-06, "loss": 0.6713, "step": 711 }, { "epoch": 0.2813534205976784, "grad_norm": 0.5725098490316971, "learning_rate": 4.9997125007229144e-06, "loss": 0.7028, "step": 712 }, { "epoch": 0.28174857989627067, "grad_norm": 0.5706604408107072, "learning_rate": 4.999710119832697e-06, "loss": 0.7134, "step": 713 }, { "epoch": 0.2821437391948629, "grad_norm": 0.5261160242380305, "learning_rate": 4.9997077291251366e-06, "loss": 0.6767, "step": 714 }, { "epoch": 0.28253889849345515, "grad_norm": 0.5469719762517126, "learning_rate": 4.999705328600243e-06, "loss": 0.7025, "step": 715 }, { "epoch": 0.28293405779204744, "grad_norm": 0.5324472833124879, "learning_rate": 4.9997029182580236e-06, "loss": 0.6743, "step": 716 }, { "epoch": 0.2833292170906397, "grad_norm": 0.5401067135045965, "learning_rate": 4.99970049809849e-06, "loss": 0.6931, "step": 717 }, { "epoch": 0.2837243763892319, "grad_norm": 0.5544406474161367, "learning_rate": 4.99969806812165e-06, "loss": 0.6968, "step": 718 }, { "epoch": 0.28411953568782417, "grad_norm": 0.5506360149437247, "learning_rate": 4.999695628327514e-06, "loss": 0.6981, "step": 719 }, { "epoch": 0.2845146949864164, "grad_norm": 0.5478511076951501, "learning_rate": 4.999693178716092e-06, "loss": 0.6782, "step": 720 }, { "epoch": 0.28490985428500865, "grad_norm": 0.5487601492507348, "learning_rate": 4.999690719287392e-06, "loss": 0.682, "step": 721 }, { "epoch": 0.2853050135836009, "grad_norm": 0.5568074889646241, "learning_rate": 4.999688250041426e-06, "loss": 0.7005, "step": 722 }, { "epoch": 0.2857001728821931, "grad_norm": 0.5211276732961236, "learning_rate": 4.999685770978202e-06, "loss": 0.685, "step": 723 }, { "epoch": 0.28609533218078537, "grad_norm": 0.5238564370246029, "learning_rate": 4.9996832820977305e-06, "loss": 0.6733, "step": 724 }, { "epoch": 0.2864904914793776, "grad_norm": 0.5488717039839817, "learning_rate": 4.999680783400021e-06, "loss": 0.6824, "step": 725 }, { "epoch": 0.28688565077796985, "grad_norm": 0.5590783909424277, "learning_rate": 4.999678274885082e-06, "loss": 0.6852, "step": 726 }, { "epoch": 0.2872808100765621, "grad_norm": 0.5718775481303447, "learning_rate": 4.999675756552926e-06, "loss": 0.6684, "step": 727 }, { "epoch": 0.2876759693751544, "grad_norm": 0.5537561512785655, "learning_rate": 4.99967322840356e-06, "loss": 0.7021, "step": 728 }, { "epoch": 0.2880711286737466, "grad_norm": 0.595756815576963, "learning_rate": 4.999670690436997e-06, "loss": 0.7033, "step": 729 }, { "epoch": 0.28846628797233886, "grad_norm": 0.5734638717038859, "learning_rate": 4.999668142653244e-06, "loss": 0.6825, "step": 730 }, { "epoch": 0.2888614472709311, "grad_norm": 0.5649785931782922, "learning_rate": 4.999665585052314e-06, "loss": 0.7142, "step": 731 }, { "epoch": 0.28925660656952334, "grad_norm": 0.6530890960010839, "learning_rate": 4.999663017634214e-06, "loss": 0.6788, "step": 732 }, { "epoch": 0.2896517658681156, "grad_norm": 0.6271597238579767, "learning_rate": 4.999660440398957e-06, "loss": 0.6949, "step": 733 }, { "epoch": 0.2900469251667078, "grad_norm": 0.5695950102858286, "learning_rate": 4.999657853346551e-06, "loss": 0.6833, "step": 734 }, { "epoch": 0.29044208446530007, "grad_norm": 0.6916391987946482, "learning_rate": 4.999655256477006e-06, "loss": 0.6961, "step": 735 }, { "epoch": 0.2908372437638923, "grad_norm": 0.6074484081013837, "learning_rate": 4.9996526497903345e-06, "loss": 0.7025, "step": 736 }, { "epoch": 0.29123240306248455, "grad_norm": 0.5356095341699845, "learning_rate": 4.9996500332865445e-06, "loss": 0.6576, "step": 737 }, { "epoch": 0.2916275623610768, "grad_norm": 0.5270122623233058, "learning_rate": 4.999647406965647e-06, "loss": 0.6493, "step": 738 }, { "epoch": 0.292022721659669, "grad_norm": 0.5844224564006655, "learning_rate": 4.999644770827652e-06, "loss": 0.7082, "step": 739 }, { "epoch": 0.2924178809582613, "grad_norm": 0.5731795031934965, "learning_rate": 4.999642124872571e-06, "loss": 0.6796, "step": 740 }, { "epoch": 0.29281304025685356, "grad_norm": 0.5464280422780536, "learning_rate": 4.999639469100413e-06, "loss": 0.6919, "step": 741 }, { "epoch": 0.2932081995554458, "grad_norm": 0.5488021923081339, "learning_rate": 4.99963680351119e-06, "loss": 0.6991, "step": 742 }, { "epoch": 0.29360335885403804, "grad_norm": 0.5999425296366285, "learning_rate": 4.99963412810491e-06, "loss": 0.6892, "step": 743 }, { "epoch": 0.2939985181526303, "grad_norm": 0.5301599791894791, "learning_rate": 4.999631442881586e-06, "loss": 0.7089, "step": 744 }, { "epoch": 0.2943936774512225, "grad_norm": 0.5563197028629331, "learning_rate": 4.999628747841227e-06, "loss": 0.6775, "step": 745 }, { "epoch": 0.29478883674981476, "grad_norm": 0.5446511718424091, "learning_rate": 4.999626042983845e-06, "loss": 0.6963, "step": 746 }, { "epoch": 0.295183996048407, "grad_norm": 0.5507487342171807, "learning_rate": 4.9996233283094485e-06, "loss": 0.6755, "step": 747 }, { "epoch": 0.29557915534699924, "grad_norm": 0.5266055784439764, "learning_rate": 4.999620603818051e-06, "loss": 0.6929, "step": 748 }, { "epoch": 0.2959743146455915, "grad_norm": 0.5626530509305415, "learning_rate": 4.99961786950966e-06, "loss": 0.6898, "step": 749 }, { "epoch": 0.2963694739441837, "grad_norm": 0.5399861793258945, "learning_rate": 4.999615125384289e-06, "loss": 0.6801, "step": 750 }, { "epoch": 0.296764633242776, "grad_norm": 0.5610645829088754, "learning_rate": 4.999612371441947e-06, "loss": 0.6709, "step": 751 }, { "epoch": 0.29715979254136826, "grad_norm": 0.5394716972805861, "learning_rate": 4.999609607682645e-06, "loss": 0.6514, "step": 752 }, { "epoch": 0.2975549518399605, "grad_norm": 0.5430055925792929, "learning_rate": 4.999606834106395e-06, "loss": 0.6849, "step": 753 }, { "epoch": 0.29795011113855274, "grad_norm": 0.544962481828332, "learning_rate": 4.999604050713207e-06, "loss": 0.7015, "step": 754 }, { "epoch": 0.298345270437145, "grad_norm": 0.5344852748367742, "learning_rate": 4.999601257503093e-06, "loss": 0.6949, "step": 755 }, { "epoch": 0.2987404297357372, "grad_norm": 0.5719742664992333, "learning_rate": 4.999598454476062e-06, "loss": 0.6901, "step": 756 }, { "epoch": 0.29913558903432946, "grad_norm": 0.584662964003699, "learning_rate": 4.999595641632126e-06, "loss": 0.7033, "step": 757 }, { "epoch": 0.2995307483329217, "grad_norm": 0.552326717376869, "learning_rate": 4.999592818971297e-06, "loss": 0.6863, "step": 758 }, { "epoch": 0.29992590763151394, "grad_norm": 0.5475655007217732, "learning_rate": 4.999589986493585e-06, "loss": 0.66, "step": 759 }, { "epoch": 0.3003210669301062, "grad_norm": 0.5882240228280694, "learning_rate": 4.999587144199001e-06, "loss": 0.6962, "step": 760 }, { "epoch": 0.3007162262286984, "grad_norm": 0.5699153356709309, "learning_rate": 4.9995842920875575e-06, "loss": 0.6803, "step": 761 }, { "epoch": 0.30111138552729066, "grad_norm": 0.6280558268501941, "learning_rate": 4.9995814301592635e-06, "loss": 0.6793, "step": 762 }, { "epoch": 0.30150654482588296, "grad_norm": 0.6650094210932691, "learning_rate": 4.999578558414132e-06, "loss": 0.6975, "step": 763 }, { "epoch": 0.3019017041244752, "grad_norm": 0.5542990218134582, "learning_rate": 4.999575676852174e-06, "loss": 0.6897, "step": 764 }, { "epoch": 0.30229686342306744, "grad_norm": 0.585799531977493, "learning_rate": 4.9995727854734e-06, "loss": 0.6721, "step": 765 }, { "epoch": 0.3026920227216597, "grad_norm": 0.7770451245382635, "learning_rate": 4.9995698842778225e-06, "loss": 0.6981, "step": 766 }, { "epoch": 0.3030871820202519, "grad_norm": 0.5471559718041984, "learning_rate": 4.999566973265452e-06, "loss": 0.6524, "step": 767 }, { "epoch": 0.30348234131884416, "grad_norm": 0.5680704664062982, "learning_rate": 4.999564052436301e-06, "loss": 0.6481, "step": 768 }, { "epoch": 0.3038775006174364, "grad_norm": 0.5983428486623749, "learning_rate": 4.999561121790379e-06, "loss": 0.6848, "step": 769 }, { "epoch": 0.30427265991602864, "grad_norm": 0.5641224737953592, "learning_rate": 4.9995581813276995e-06, "loss": 0.6703, "step": 770 }, { "epoch": 0.3046678192146209, "grad_norm": 0.5647226711303925, "learning_rate": 4.999555231048273e-06, "loss": 0.7158, "step": 771 }, { "epoch": 0.3050629785132131, "grad_norm": 0.7023254822593299, "learning_rate": 4.999552270952112e-06, "loss": 0.684, "step": 772 }, { "epoch": 0.30545813781180536, "grad_norm": 0.6146604133584318, "learning_rate": 4.999549301039226e-06, "loss": 0.7085, "step": 773 }, { "epoch": 0.3058532971103976, "grad_norm": 0.562728967949529, "learning_rate": 4.999546321309629e-06, "loss": 0.6681, "step": 774 }, { "epoch": 0.3062484564089899, "grad_norm": 1.4500324454053806, "learning_rate": 4.999543331763332e-06, "loss": 0.6917, "step": 775 }, { "epoch": 0.30664361570758214, "grad_norm": 0.6576023968965049, "learning_rate": 4.999540332400346e-06, "loss": 0.6953, "step": 776 }, { "epoch": 0.3070387750061744, "grad_norm": 0.5706530250137147, "learning_rate": 4.999537323220684e-06, "loss": 0.6794, "step": 777 }, { "epoch": 0.3074339343047666, "grad_norm": 0.5428676827482191, "learning_rate": 4.999534304224357e-06, "loss": 0.6851, "step": 778 }, { "epoch": 0.30782909360335886, "grad_norm": 0.5981739162493036, "learning_rate": 4.999531275411377e-06, "loss": 0.671, "step": 779 }, { "epoch": 0.3082242529019511, "grad_norm": 0.6170758554603312, "learning_rate": 4.999528236781757e-06, "loss": 0.6957, "step": 780 }, { "epoch": 0.30861941220054334, "grad_norm": 0.5823666001801212, "learning_rate": 4.999525188335507e-06, "loss": 0.6719, "step": 781 }, { "epoch": 0.3090145714991356, "grad_norm": 0.5997874539667468, "learning_rate": 4.99952213007264e-06, "loss": 0.6917, "step": 782 }, { "epoch": 0.3094097307977278, "grad_norm": 0.5821609633692683, "learning_rate": 4.999519061993166e-06, "loss": 0.6838, "step": 783 }, { "epoch": 0.30980489009632006, "grad_norm": 0.5479337929157194, "learning_rate": 4.999515984097101e-06, "loss": 0.6837, "step": 784 }, { "epoch": 0.3102000493949123, "grad_norm": 0.5937117790026203, "learning_rate": 4.999512896384455e-06, "loss": 0.6899, "step": 785 }, { "epoch": 0.31059520869350454, "grad_norm": 0.5782838146122935, "learning_rate": 4.99950979885524e-06, "loss": 0.6802, "step": 786 }, { "epoch": 0.31099036799209684, "grad_norm": 0.546006734321019, "learning_rate": 4.999506691509467e-06, "loss": 0.6776, "step": 787 }, { "epoch": 0.3113855272906891, "grad_norm": 0.5621083736104908, "learning_rate": 4.999503574347151e-06, "loss": 0.679, "step": 788 }, { "epoch": 0.3117806865892813, "grad_norm": 0.6018666658407275, "learning_rate": 4.9995004473683016e-06, "loss": 0.7095, "step": 789 }, { "epoch": 0.31217584588787356, "grad_norm": 0.5665313992465836, "learning_rate": 4.999497310572933e-06, "loss": 0.6906, "step": 790 }, { "epoch": 0.3125710051864658, "grad_norm": 0.5795816586087552, "learning_rate": 4.999494163961057e-06, "loss": 0.6792, "step": 791 }, { "epoch": 0.31296616448505804, "grad_norm": 0.5868236665755461, "learning_rate": 4.999491007532685e-06, "loss": 0.6851, "step": 792 }, { "epoch": 0.3133613237836503, "grad_norm": 0.5722398773037819, "learning_rate": 4.999487841287831e-06, "loss": 0.6667, "step": 793 }, { "epoch": 0.3137564830822425, "grad_norm": 0.5946352610623089, "learning_rate": 4.999484665226506e-06, "loss": 0.6909, "step": 794 }, { "epoch": 0.31415164238083476, "grad_norm": 1.2564866117315103, "learning_rate": 4.999481479348722e-06, "loss": 0.6852, "step": 795 }, { "epoch": 0.314546801679427, "grad_norm": 0.5575268193016416, "learning_rate": 4.999478283654494e-06, "loss": 0.6742, "step": 796 }, { "epoch": 0.31494196097801924, "grad_norm": 0.5545559884431172, "learning_rate": 4.999475078143832e-06, "loss": 0.6743, "step": 797 }, { "epoch": 0.3153371202766115, "grad_norm": 0.5460260402768534, "learning_rate": 4.999471862816751e-06, "loss": 0.6682, "step": 798 }, { "epoch": 0.3157322795752038, "grad_norm": 0.545727768192459, "learning_rate": 4.9994686376732605e-06, "loss": 0.675, "step": 799 }, { "epoch": 0.316127438873796, "grad_norm": 0.5493756349564834, "learning_rate": 4.999465402713376e-06, "loss": 0.6785, "step": 800 }, { "epoch": 0.31652259817238826, "grad_norm": 0.5521530157011729, "learning_rate": 4.99946215793711e-06, "loss": 0.6723, "step": 801 }, { "epoch": 0.3169177574709805, "grad_norm": 0.6919194319875192, "learning_rate": 4.999458903344473e-06, "loss": 0.6801, "step": 802 }, { "epoch": 0.31731291676957274, "grad_norm": 0.5635837786929486, "learning_rate": 4.99945563893548e-06, "loss": 0.6983, "step": 803 }, { "epoch": 0.317708076068165, "grad_norm": 0.5577079529859765, "learning_rate": 4.999452364710142e-06, "loss": 0.6869, "step": 804 }, { "epoch": 0.3181032353667572, "grad_norm": 0.522245723084247, "learning_rate": 4.999449080668473e-06, "loss": 0.6711, "step": 805 }, { "epoch": 0.31849839466534946, "grad_norm": 0.5543341617543934, "learning_rate": 4.999445786810486e-06, "loss": 0.6528, "step": 806 }, { "epoch": 0.3188935539639417, "grad_norm": 0.5404102043718793, "learning_rate": 4.999442483136194e-06, "loss": 0.6706, "step": 807 }, { "epoch": 0.31928871326253394, "grad_norm": 0.5463105642491327, "learning_rate": 4.99943916964561e-06, "loss": 0.6675, "step": 808 }, { "epoch": 0.3196838725611262, "grad_norm": 0.6177688829664617, "learning_rate": 4.999435846338746e-06, "loss": 0.6753, "step": 809 }, { "epoch": 0.3200790318597184, "grad_norm": 0.5490308724780782, "learning_rate": 4.999432513215616e-06, "loss": 0.6751, "step": 810 }, { "epoch": 0.3204741911583107, "grad_norm": 0.6090150943921979, "learning_rate": 4.999429170276233e-06, "loss": 0.6852, "step": 811 }, { "epoch": 0.32086935045690296, "grad_norm": 0.5611702933211862, "learning_rate": 4.99942581752061e-06, "loss": 0.6857, "step": 812 }, { "epoch": 0.3212645097554952, "grad_norm": 0.5471158991492043, "learning_rate": 4.999422454948759e-06, "loss": 0.668, "step": 813 }, { "epoch": 0.32165966905408744, "grad_norm": 0.6040235246117969, "learning_rate": 4.9994190825606955e-06, "loss": 0.6738, "step": 814 }, { "epoch": 0.3220548283526797, "grad_norm": 0.5461114975582116, "learning_rate": 4.999415700356431e-06, "loss": 0.6935, "step": 815 }, { "epoch": 0.3224499876512719, "grad_norm": 0.5531585597793667, "learning_rate": 4.99941230833598e-06, "loss": 0.6964, "step": 816 }, { "epoch": 0.32284514694986416, "grad_norm": 0.555832857990718, "learning_rate": 4.999408906499355e-06, "loss": 0.6878, "step": 817 }, { "epoch": 0.3232403062484564, "grad_norm": 0.5159924465583967, "learning_rate": 4.999405494846569e-06, "loss": 0.6666, "step": 818 }, { "epoch": 0.32363546554704864, "grad_norm": 0.5242278141632275, "learning_rate": 4.999402073377637e-06, "loss": 0.6556, "step": 819 }, { "epoch": 0.3240306248456409, "grad_norm": 0.5485685565466117, "learning_rate": 4.99939864209257e-06, "loss": 0.6867, "step": 820 }, { "epoch": 0.3244257841442331, "grad_norm": 0.5599138503166448, "learning_rate": 4.999395200991384e-06, "loss": 0.6741, "step": 821 }, { "epoch": 0.3248209434428254, "grad_norm": 0.5314314737207193, "learning_rate": 4.999391750074091e-06, "loss": 0.6597, "step": 822 }, { "epoch": 0.32521610274141766, "grad_norm": 0.5505001683410817, "learning_rate": 4.999388289340705e-06, "loss": 0.6734, "step": 823 }, { "epoch": 0.3256112620400099, "grad_norm": 0.5483028368326526, "learning_rate": 4.999384818791239e-06, "loss": 0.6871, "step": 824 }, { "epoch": 0.32600642133860214, "grad_norm": 0.5553659611188733, "learning_rate": 4.9993813384257075e-06, "loss": 0.6879, "step": 825 }, { "epoch": 0.3264015806371944, "grad_norm": 0.5508245461247865, "learning_rate": 4.999377848244123e-06, "loss": 0.6608, "step": 826 }, { "epoch": 0.3267967399357866, "grad_norm": 0.5468555275628424, "learning_rate": 4.9993743482465015e-06, "loss": 0.6885, "step": 827 }, { "epoch": 0.32719189923437886, "grad_norm": 0.5518450089645637, "learning_rate": 4.999370838432854e-06, "loss": 0.6784, "step": 828 }, { "epoch": 0.3275870585329711, "grad_norm": 0.5381677755962232, "learning_rate": 4.999367318803197e-06, "loss": 0.6813, "step": 829 }, { "epoch": 0.32798221783156334, "grad_norm": 0.5400069304709476, "learning_rate": 4.999363789357542e-06, "loss": 0.6664, "step": 830 }, { "epoch": 0.3283773771301556, "grad_norm": 0.6129541548516995, "learning_rate": 4.999360250095903e-06, "loss": 0.6813, "step": 831 }, { "epoch": 0.3287725364287478, "grad_norm": 0.551379733965531, "learning_rate": 4.999356701018295e-06, "loss": 0.6698, "step": 832 }, { "epoch": 0.32916769572734006, "grad_norm": 0.530243445703882, "learning_rate": 4.999353142124732e-06, "loss": 0.682, "step": 833 }, { "epoch": 0.32956285502593236, "grad_norm": 0.5348358317715631, "learning_rate": 4.999349573415227e-06, "loss": 0.6489, "step": 834 }, { "epoch": 0.3299580143245246, "grad_norm": 0.5547085181902088, "learning_rate": 4.999345994889796e-06, "loss": 0.6938, "step": 835 }, { "epoch": 0.33035317362311684, "grad_norm": 0.5243281588616451, "learning_rate": 4.99934240654845e-06, "loss": 0.6671, "step": 836 }, { "epoch": 0.3307483329217091, "grad_norm": 0.5493088801401717, "learning_rate": 4.9993388083912054e-06, "loss": 0.6706, "step": 837 }, { "epoch": 0.3311434922203013, "grad_norm": 0.545645002523389, "learning_rate": 4.999335200418076e-06, "loss": 0.6901, "step": 838 }, { "epoch": 0.33153865151889356, "grad_norm": 0.5613907384738281, "learning_rate": 4.999331582629075e-06, "loss": 0.6908, "step": 839 }, { "epoch": 0.3319338108174858, "grad_norm": 0.6000984842384804, "learning_rate": 4.999327955024217e-06, "loss": 0.6828, "step": 840 }, { "epoch": 0.33232897011607804, "grad_norm": 0.51885138747256, "learning_rate": 4.9993243176035175e-06, "loss": 0.6597, "step": 841 }, { "epoch": 0.3327241294146703, "grad_norm": 0.6231011537404787, "learning_rate": 4.999320670366989e-06, "loss": 0.6571, "step": 842 }, { "epoch": 0.3331192887132625, "grad_norm": 0.5214424709505693, "learning_rate": 4.999317013314646e-06, "loss": 0.673, "step": 843 }, { "epoch": 0.33351444801185476, "grad_norm": 0.5410014959338921, "learning_rate": 4.999313346446505e-06, "loss": 0.6747, "step": 844 }, { "epoch": 0.333909607310447, "grad_norm": 0.6300322367055629, "learning_rate": 4.999309669762578e-06, "loss": 0.6944, "step": 845 }, { "epoch": 0.3343047666090393, "grad_norm": 0.6141171186975185, "learning_rate": 4.99930598326288e-06, "loss": 0.6746, "step": 846 }, { "epoch": 0.33469992590763153, "grad_norm": 0.5592903856619154, "learning_rate": 4.9993022869474264e-06, "loss": 0.6846, "step": 847 }, { "epoch": 0.3350950852062238, "grad_norm": 0.5914019521534136, "learning_rate": 4.99929858081623e-06, "loss": 0.6562, "step": 848 }, { "epoch": 0.335490244504816, "grad_norm": 0.5556986261868628, "learning_rate": 4.999294864869307e-06, "loss": 0.695, "step": 849 }, { "epoch": 0.33588540380340826, "grad_norm": 0.5361036550501209, "learning_rate": 4.999291139106672e-06, "loss": 0.6733, "step": 850 }, { "epoch": 0.3362805631020005, "grad_norm": 0.5663681947389955, "learning_rate": 4.9992874035283375e-06, "loss": 0.6727, "step": 851 }, { "epoch": 0.33667572240059274, "grad_norm": 0.5385125766471879, "learning_rate": 4.999283658134322e-06, "loss": 0.6717, "step": 852 }, { "epoch": 0.337070881699185, "grad_norm": 0.58237023128874, "learning_rate": 4.999279902924636e-06, "loss": 0.6751, "step": 853 }, { "epoch": 0.3374660409977772, "grad_norm": 0.6219007403124622, "learning_rate": 4.999276137899297e-06, "loss": 0.6515, "step": 854 }, { "epoch": 0.33786120029636946, "grad_norm": 0.5581232382993533, "learning_rate": 4.999272363058319e-06, "loss": 0.6876, "step": 855 }, { "epoch": 0.3382563595949617, "grad_norm": 0.5570097230753517, "learning_rate": 4.999268578401717e-06, "loss": 0.6696, "step": 856 }, { "epoch": 0.33865151889355394, "grad_norm": 0.5502292291243254, "learning_rate": 4.999264783929505e-06, "loss": 0.6651, "step": 857 }, { "epoch": 0.33904667819214623, "grad_norm": 0.5289187848980834, "learning_rate": 4.999260979641699e-06, "loss": 0.6586, "step": 858 }, { "epoch": 0.3394418374907385, "grad_norm": 0.5677219337773182, "learning_rate": 4.999257165538314e-06, "loss": 0.6764, "step": 859 }, { "epoch": 0.3398369967893307, "grad_norm": 0.6248356948027501, "learning_rate": 4.999253341619363e-06, "loss": 0.6685, "step": 860 }, { "epoch": 0.34023215608792295, "grad_norm": 0.5245870998609313, "learning_rate": 4.999249507884864e-06, "loss": 0.6637, "step": 861 }, { "epoch": 0.3406273153865152, "grad_norm": 0.5633998533768565, "learning_rate": 4.9992456643348296e-06, "loss": 0.6626, "step": 862 }, { "epoch": 0.34102247468510744, "grad_norm": 0.6163731317461442, "learning_rate": 4.999241810969276e-06, "loss": 0.7031, "step": 863 }, { "epoch": 0.3414176339836997, "grad_norm": 0.5319060256897468, "learning_rate": 4.999237947788218e-06, "loss": 0.6661, "step": 864 }, { "epoch": 0.3418127932822919, "grad_norm": 0.5747234912624848, "learning_rate": 4.999234074791673e-06, "loss": 0.6637, "step": 865 }, { "epoch": 0.34220795258088416, "grad_norm": 0.6217626123594077, "learning_rate": 4.9992301919796515e-06, "loss": 0.6956, "step": 866 }, { "epoch": 0.3426031118794764, "grad_norm": 0.5291571135659683, "learning_rate": 4.999226299352172e-06, "loss": 0.6592, "step": 867 }, { "epoch": 0.34299827117806864, "grad_norm": 0.5929800922433651, "learning_rate": 4.99922239690925e-06, "loss": 0.6758, "step": 868 }, { "epoch": 0.3433934304766609, "grad_norm": 0.8263016280842782, "learning_rate": 4.999218484650899e-06, "loss": 0.6924, "step": 869 }, { "epoch": 0.3437885897752532, "grad_norm": 0.5373424767279488, "learning_rate": 4.999214562577137e-06, "loss": 0.6778, "step": 870 }, { "epoch": 0.3441837490738454, "grad_norm": 0.6161942738080999, "learning_rate": 4.999210630687976e-06, "loss": 0.6868, "step": 871 }, { "epoch": 0.34457890837243765, "grad_norm": 0.5544247743850119, "learning_rate": 4.999206688983435e-06, "loss": 0.6472, "step": 872 }, { "epoch": 0.3449740676710299, "grad_norm": 0.5436735454536356, "learning_rate": 4.9992027374635265e-06, "loss": 0.6681, "step": 873 }, { "epoch": 0.34536922696962213, "grad_norm": 0.5870467900357642, "learning_rate": 4.999198776128268e-06, "loss": 0.6488, "step": 874 }, { "epoch": 0.3457643862682144, "grad_norm": 0.5226483330016569, "learning_rate": 4.999194804977674e-06, "loss": 0.6474, "step": 875 }, { "epoch": 0.3461595455668066, "grad_norm": 0.5480980063898397, "learning_rate": 4.99919082401176e-06, "loss": 0.6809, "step": 876 }, { "epoch": 0.34655470486539885, "grad_norm": 0.5845327636648194, "learning_rate": 4.999186833230542e-06, "loss": 0.6461, "step": 877 }, { "epoch": 0.3469498641639911, "grad_norm": 0.700267095825688, "learning_rate": 4.999182832634036e-06, "loss": 0.6864, "step": 878 }, { "epoch": 0.34734502346258334, "grad_norm": 0.5704163766747798, "learning_rate": 4.999178822222258e-06, "loss": 0.6624, "step": 879 }, { "epoch": 0.3477401827611756, "grad_norm": 0.5688194389353117, "learning_rate": 4.999174801995222e-06, "loss": 0.6597, "step": 880 }, { "epoch": 0.34813534205976787, "grad_norm": 0.5210130312006719, "learning_rate": 4.999170771952946e-06, "loss": 0.6618, "step": 881 }, { "epoch": 0.3485305013583601, "grad_norm": 0.5429969490828442, "learning_rate": 4.999166732095445e-06, "loss": 0.6677, "step": 882 }, { "epoch": 0.34892566065695235, "grad_norm": 0.5679202548589564, "learning_rate": 4.999162682422733e-06, "loss": 0.6668, "step": 883 }, { "epoch": 0.3493208199555446, "grad_norm": 0.5391777536262067, "learning_rate": 4.999158622934829e-06, "loss": 0.6704, "step": 884 }, { "epoch": 0.34971597925413683, "grad_norm": 0.5622016243906001, "learning_rate": 4.999154553631748e-06, "loss": 0.6435, "step": 885 }, { "epoch": 0.3501111385527291, "grad_norm": 0.5628428119759383, "learning_rate": 4.999150474513504e-06, "loss": 0.6618, "step": 886 }, { "epoch": 0.3505062978513213, "grad_norm": 0.5303772562973009, "learning_rate": 4.999146385580114e-06, "loss": 0.6628, "step": 887 }, { "epoch": 0.35090145714991355, "grad_norm": 0.5813782031762684, "learning_rate": 4.999142286831596e-06, "loss": 0.6876, "step": 888 }, { "epoch": 0.3512966164485058, "grad_norm": 0.563823831771851, "learning_rate": 4.999138178267965e-06, "loss": 0.6604, "step": 889 }, { "epoch": 0.35169177574709803, "grad_norm": 0.5253757499381441, "learning_rate": 4.999134059889236e-06, "loss": 0.6639, "step": 890 }, { "epoch": 0.3520869350456903, "grad_norm": 0.5593726055000605, "learning_rate": 4.9991299316954255e-06, "loss": 0.6476, "step": 891 }, { "epoch": 0.3524820943442825, "grad_norm": 0.5367006366591701, "learning_rate": 4.9991257936865515e-06, "loss": 0.6937, "step": 892 }, { "epoch": 0.3528772536428748, "grad_norm": 0.5567610106868094, "learning_rate": 4.999121645862628e-06, "loss": 0.6731, "step": 893 }, { "epoch": 0.35327241294146705, "grad_norm": 0.5324611058908338, "learning_rate": 4.999117488223672e-06, "loss": 0.6735, "step": 894 }, { "epoch": 0.3536675722400593, "grad_norm": 0.845928580999717, "learning_rate": 4.999113320769701e-06, "loss": 0.6675, "step": 895 }, { "epoch": 0.35406273153865153, "grad_norm": 0.5597594694408724, "learning_rate": 4.999109143500729e-06, "loss": 0.6982, "step": 896 }, { "epoch": 0.35445789083724377, "grad_norm": 0.5742575981877034, "learning_rate": 4.999104956416775e-06, "loss": 0.6431, "step": 897 }, { "epoch": 0.354853050135836, "grad_norm": 0.5446258761551195, "learning_rate": 4.999100759517854e-06, "loss": 0.6749, "step": 898 }, { "epoch": 0.35524820943442825, "grad_norm": 0.5292659599253334, "learning_rate": 4.999096552803983e-06, "loss": 0.6775, "step": 899 }, { "epoch": 0.3556433687330205, "grad_norm": 0.5292261917552322, "learning_rate": 4.9990923362751776e-06, "loss": 0.6716, "step": 900 }, { "epoch": 0.35603852803161273, "grad_norm": 0.5470660498231109, "learning_rate": 4.999088109931456e-06, "loss": 0.6599, "step": 901 }, { "epoch": 0.356433687330205, "grad_norm": 0.5338026521581444, "learning_rate": 4.999083873772833e-06, "loss": 0.6691, "step": 902 }, { "epoch": 0.3568288466287972, "grad_norm": 0.5613881734474877, "learning_rate": 4.9990796277993255e-06, "loss": 0.6798, "step": 903 }, { "epoch": 0.35722400592738945, "grad_norm": 0.5478029825120281, "learning_rate": 4.999075372010952e-06, "loss": 0.6623, "step": 904 }, { "epoch": 0.35761916522598175, "grad_norm": 0.5165063149329737, "learning_rate": 4.999071106407728e-06, "loss": 0.661, "step": 905 }, { "epoch": 0.358014324524574, "grad_norm": 0.5992216629583358, "learning_rate": 4.999066830989669e-06, "loss": 0.653, "step": 906 }, { "epoch": 0.35840948382316623, "grad_norm": 0.5526267591056174, "learning_rate": 4.999062545756794e-06, "loss": 0.6657, "step": 907 }, { "epoch": 0.35880464312175847, "grad_norm": 0.5231560128614834, "learning_rate": 4.999058250709119e-06, "loss": 0.6427, "step": 908 }, { "epoch": 0.3591998024203507, "grad_norm": 0.5496671975565968, "learning_rate": 4.99905394584666e-06, "loss": 0.6728, "step": 909 }, { "epoch": 0.35959496171894295, "grad_norm": 0.5814232320207219, "learning_rate": 4.999049631169435e-06, "loss": 0.6716, "step": 910 }, { "epoch": 0.3599901210175352, "grad_norm": 0.5284531514261371, "learning_rate": 4.99904530667746e-06, "loss": 0.6784, "step": 911 }, { "epoch": 0.36038528031612743, "grad_norm": 0.5830847990400584, "learning_rate": 4.999040972370753e-06, "loss": 0.6728, "step": 912 }, { "epoch": 0.3607804396147197, "grad_norm": 0.5265753919458841, "learning_rate": 4.999036628249331e-06, "loss": 0.6958, "step": 913 }, { "epoch": 0.3611755989133119, "grad_norm": 0.528588620481184, "learning_rate": 4.99903227431321e-06, "loss": 0.6548, "step": 914 }, { "epoch": 0.36157075821190415, "grad_norm": 0.5182035242903082, "learning_rate": 4.9990279105624076e-06, "loss": 0.6598, "step": 915 }, { "epoch": 0.3619659175104964, "grad_norm": 0.5286689369003267, "learning_rate": 4.9990235369969406e-06, "loss": 0.6872, "step": 916 }, { "epoch": 0.3623610768090887, "grad_norm": 0.5386957143651905, "learning_rate": 4.9990191536168274e-06, "loss": 0.6491, "step": 917 }, { "epoch": 0.36275623610768093, "grad_norm": 0.5514686370106001, "learning_rate": 4.999014760422085e-06, "loss": 0.6651, "step": 918 }, { "epoch": 0.36315139540627317, "grad_norm": 0.5648373707016645, "learning_rate": 4.999010357412729e-06, "loss": 0.6731, "step": 919 }, { "epoch": 0.3635465547048654, "grad_norm": 0.5152295186463809, "learning_rate": 4.999005944588779e-06, "loss": 0.6428, "step": 920 }, { "epoch": 0.36394171400345765, "grad_norm": 0.5066022878135411, "learning_rate": 4.9990015219502505e-06, "loss": 0.658, "step": 921 }, { "epoch": 0.3643368733020499, "grad_norm": 0.5428977376504636, "learning_rate": 4.998997089497161e-06, "loss": 0.6613, "step": 922 }, { "epoch": 0.36473203260064213, "grad_norm": 0.5678413901240047, "learning_rate": 4.998992647229529e-06, "loss": 0.6737, "step": 923 }, { "epoch": 0.36512719189923437, "grad_norm": 0.536401567098352, "learning_rate": 4.9989881951473706e-06, "loss": 0.652, "step": 924 }, { "epoch": 0.3655223511978266, "grad_norm": 0.5768953101789457, "learning_rate": 4.998983733250705e-06, "loss": 0.6314, "step": 925 }, { "epoch": 0.36591751049641885, "grad_norm": 0.5352996178968072, "learning_rate": 4.998979261539548e-06, "loss": 0.6683, "step": 926 }, { "epoch": 0.3663126697950111, "grad_norm": 0.5174696386468567, "learning_rate": 4.998974780013919e-06, "loss": 0.6607, "step": 927 }, { "epoch": 0.36670782909360333, "grad_norm": 0.6030535589781871, "learning_rate": 4.998970288673833e-06, "loss": 0.6578, "step": 928 }, { "epoch": 0.36710298839219563, "grad_norm": 0.5500821850702614, "learning_rate": 4.99896578751931e-06, "loss": 0.6622, "step": 929 }, { "epoch": 0.36749814769078787, "grad_norm": 0.5283486083712549, "learning_rate": 4.998961276550367e-06, "loss": 0.6518, "step": 930 }, { "epoch": 0.3678933069893801, "grad_norm": 0.5663497776849602, "learning_rate": 4.998956755767021e-06, "loss": 0.6697, "step": 931 }, { "epoch": 0.36828846628797235, "grad_norm": 0.5211239249646628, "learning_rate": 4.998952225169291e-06, "loss": 0.6658, "step": 932 }, { "epoch": 0.3686836255865646, "grad_norm": 0.5375278433902325, "learning_rate": 4.9989476847571935e-06, "loss": 0.6672, "step": 933 }, { "epoch": 0.36907878488515683, "grad_norm": 0.5125111796062831, "learning_rate": 4.998943134530748e-06, "loss": 0.6664, "step": 934 }, { "epoch": 0.36947394418374907, "grad_norm": 0.7149723112166187, "learning_rate": 4.9989385744899705e-06, "loss": 0.6792, "step": 935 }, { "epoch": 0.3698691034823413, "grad_norm": 0.5195495635576596, "learning_rate": 4.99893400463488e-06, "loss": 0.6424, "step": 936 }, { "epoch": 0.37026426278093355, "grad_norm": 0.5261355467385814, "learning_rate": 4.998929424965494e-06, "loss": 0.6794, "step": 937 }, { "epoch": 0.3706594220795258, "grad_norm": 0.5371603404887052, "learning_rate": 4.99892483548183e-06, "loss": 0.6556, "step": 938 }, { "epoch": 0.37105458137811803, "grad_norm": 0.5288627513817326, "learning_rate": 4.998920236183908e-06, "loss": 0.6408, "step": 939 }, { "epoch": 0.37144974067671027, "grad_norm": 0.5366598263301546, "learning_rate": 4.998915627071743e-06, "loss": 0.6745, "step": 940 }, { "epoch": 0.37184489997530257, "grad_norm": 0.5209858322992073, "learning_rate": 4.998911008145357e-06, "loss": 0.6716, "step": 941 }, { "epoch": 0.3722400592738948, "grad_norm": 0.5092473497788766, "learning_rate": 4.998906379404764e-06, "loss": 0.6588, "step": 942 }, { "epoch": 0.37263521857248705, "grad_norm": 0.5301240404384194, "learning_rate": 4.998901740849985e-06, "loss": 0.6475, "step": 943 }, { "epoch": 0.3730303778710793, "grad_norm": 0.5369766245496886, "learning_rate": 4.998897092481037e-06, "loss": 0.6704, "step": 944 }, { "epoch": 0.37342553716967153, "grad_norm": 0.5677722223194425, "learning_rate": 4.998892434297939e-06, "loss": 0.6489, "step": 945 }, { "epoch": 0.37382069646826377, "grad_norm": 0.5231100890683938, "learning_rate": 4.998887766300708e-06, "loss": 0.6746, "step": 946 }, { "epoch": 0.374215855766856, "grad_norm": 0.5295898234845133, "learning_rate": 4.998883088489365e-06, "loss": 0.6814, "step": 947 }, { "epoch": 0.37461101506544825, "grad_norm": 0.5785367347699937, "learning_rate": 4.9988784008639254e-06, "loss": 0.6677, "step": 948 }, { "epoch": 0.3750061743640405, "grad_norm": 0.5052116039346995, "learning_rate": 4.99887370342441e-06, "loss": 0.6693, "step": 949 }, { "epoch": 0.37540133366263273, "grad_norm": 0.5090885753544528, "learning_rate": 4.998868996170835e-06, "loss": 0.6308, "step": 950 }, { "epoch": 0.37579649296122497, "grad_norm": 0.6095866408280952, "learning_rate": 4.9988642791032205e-06, "loss": 0.6728, "step": 951 }, { "epoch": 0.37619165225981727, "grad_norm": 0.5542782397017351, "learning_rate": 4.998859552221584e-06, "loss": 0.6645, "step": 952 }, { "epoch": 0.3765868115584095, "grad_norm": 0.6102273000867942, "learning_rate": 4.9988548155259446e-06, "loss": 0.6714, "step": 953 }, { "epoch": 0.37698197085700175, "grad_norm": 0.5788528638282513, "learning_rate": 4.998850069016321e-06, "loss": 0.6676, "step": 954 }, { "epoch": 0.377377130155594, "grad_norm": 0.5259169323825259, "learning_rate": 4.998845312692732e-06, "loss": 0.6705, "step": 955 }, { "epoch": 0.3777722894541862, "grad_norm": 0.5396876703479768, "learning_rate": 4.998840546555196e-06, "loss": 0.6619, "step": 956 }, { "epoch": 0.37816744875277847, "grad_norm": 0.5417300333578844, "learning_rate": 4.9988357706037315e-06, "loss": 0.675, "step": 957 }, { "epoch": 0.3785626080513707, "grad_norm": 0.5212701698383503, "learning_rate": 4.998830984838358e-06, "loss": 0.6446, "step": 958 }, { "epoch": 0.37895776734996295, "grad_norm": 0.5585604547421766, "learning_rate": 4.9988261892590925e-06, "loss": 0.6649, "step": 959 }, { "epoch": 0.3793529266485552, "grad_norm": 0.5356740144146942, "learning_rate": 4.998821383865956e-06, "loss": 0.6612, "step": 960 }, { "epoch": 0.37974808594714743, "grad_norm": 0.5389707866617842, "learning_rate": 4.9988165686589665e-06, "loss": 0.6612, "step": 961 }, { "epoch": 0.38014324524573967, "grad_norm": 0.5470563785584632, "learning_rate": 4.998811743638142e-06, "loss": 0.6502, "step": 962 }, { "epoch": 0.3805384045443319, "grad_norm": 0.5497560302772653, "learning_rate": 4.998806908803504e-06, "loss": 0.6656, "step": 963 }, { "epoch": 0.3809335638429242, "grad_norm": 0.5504911983499313, "learning_rate": 4.998802064155068e-06, "loss": 0.6502, "step": 964 }, { "epoch": 0.38132872314151645, "grad_norm": 0.5526354169398469, "learning_rate": 4.998797209692856e-06, "loss": 0.6807, "step": 965 }, { "epoch": 0.3817238824401087, "grad_norm": 0.5608969389308869, "learning_rate": 4.998792345416886e-06, "loss": 0.6562, "step": 966 }, { "epoch": 0.3821190417387009, "grad_norm": 0.5361562466884632, "learning_rate": 4.998787471327177e-06, "loss": 0.6781, "step": 967 }, { "epoch": 0.38251420103729317, "grad_norm": 0.6165770215935255, "learning_rate": 4.998782587423747e-06, "loss": 0.668, "step": 968 }, { "epoch": 0.3829093603358854, "grad_norm": 0.6202442629402494, "learning_rate": 4.9987776937066175e-06, "loss": 0.6746, "step": 969 }, { "epoch": 0.38330451963447765, "grad_norm": 0.5192514425229897, "learning_rate": 4.998772790175806e-06, "loss": 0.6578, "step": 970 }, { "epoch": 0.3836996789330699, "grad_norm": 0.621919978812942, "learning_rate": 4.998767876831333e-06, "loss": 0.6718, "step": 971 }, { "epoch": 0.38409483823166213, "grad_norm": 0.5374983489076336, "learning_rate": 4.998762953673216e-06, "loss": 0.6818, "step": 972 }, { "epoch": 0.38448999753025437, "grad_norm": 0.5336474258984356, "learning_rate": 4.998758020701476e-06, "loss": 0.6751, "step": 973 }, { "epoch": 0.3848851568288466, "grad_norm": 0.5621510282866113, "learning_rate": 4.998753077916132e-06, "loss": 0.6844, "step": 974 }, { "epoch": 0.38528031612743885, "grad_norm": 0.5441555376095901, "learning_rate": 4.998748125317203e-06, "loss": 0.6607, "step": 975 }, { "epoch": 0.38567547542603114, "grad_norm": 0.5419600341782435, "learning_rate": 4.998743162904709e-06, "loss": 0.6722, "step": 976 }, { "epoch": 0.3860706347246234, "grad_norm": 0.5739218284794436, "learning_rate": 4.998738190678669e-06, "loss": 0.6466, "step": 977 }, { "epoch": 0.3864657940232156, "grad_norm": 0.5369050064214791, "learning_rate": 4.998733208639103e-06, "loss": 0.6476, "step": 978 }, { "epoch": 0.38686095332180787, "grad_norm": 0.5377019972492644, "learning_rate": 4.99872821678603e-06, "loss": 0.6768, "step": 979 }, { "epoch": 0.3872561126204001, "grad_norm": 0.6081827614560933, "learning_rate": 4.99872321511947e-06, "loss": 0.6694, "step": 980 }, { "epoch": 0.38765127191899235, "grad_norm": 0.5640672539104963, "learning_rate": 4.998718203639442e-06, "loss": 0.6632, "step": 981 }, { "epoch": 0.3880464312175846, "grad_norm": 0.5004089536671517, "learning_rate": 4.998713182345967e-06, "loss": 0.6379, "step": 982 }, { "epoch": 0.3884415905161768, "grad_norm": 0.5836270463868607, "learning_rate": 4.998708151239063e-06, "loss": 0.658, "step": 983 }, { "epoch": 0.38883674981476907, "grad_norm": 0.5251488931681385, "learning_rate": 4.998703110318751e-06, "loss": 0.696, "step": 984 }, { "epoch": 0.3892319091133613, "grad_norm": 0.5242683309546348, "learning_rate": 4.998698059585051e-06, "loss": 0.6634, "step": 985 }, { "epoch": 0.38962706841195355, "grad_norm": 0.5380044203359831, "learning_rate": 4.998692999037982e-06, "loss": 0.639, "step": 986 }, { "epoch": 0.3900222277105458, "grad_norm": 0.5462572647473274, "learning_rate": 4.998687928677563e-06, "loss": 0.6645, "step": 987 }, { "epoch": 0.3904173870091381, "grad_norm": 0.5464260530979489, "learning_rate": 4.998682848503817e-06, "loss": 0.6602, "step": 988 }, { "epoch": 0.3908125463077303, "grad_norm": 0.535112702170672, "learning_rate": 4.998677758516761e-06, "loss": 0.6743, "step": 989 }, { "epoch": 0.39120770560632256, "grad_norm": 0.5295319813464742, "learning_rate": 4.998672658716416e-06, "loss": 0.6655, "step": 990 }, { "epoch": 0.3916028649049148, "grad_norm": 0.541811550848016, "learning_rate": 4.998667549102803e-06, "loss": 0.6758, "step": 991 }, { "epoch": 0.39199802420350705, "grad_norm": 0.5451452325701546, "learning_rate": 4.99866242967594e-06, "loss": 0.6884, "step": 992 }, { "epoch": 0.3923931835020993, "grad_norm": 0.5341028641904682, "learning_rate": 4.998657300435849e-06, "loss": 0.6585, "step": 993 }, { "epoch": 0.3927883428006915, "grad_norm": 0.5871334739980254, "learning_rate": 4.9986521613825486e-06, "loss": 0.6512, "step": 994 }, { "epoch": 0.39318350209928377, "grad_norm": 0.5236524401205415, "learning_rate": 4.9986470125160605e-06, "loss": 0.668, "step": 995 }, { "epoch": 0.393578661397876, "grad_norm": 0.5213997123202212, "learning_rate": 4.998641853836404e-06, "loss": 0.6478, "step": 996 }, { "epoch": 0.39397382069646825, "grad_norm": 0.5407197664544282, "learning_rate": 4.9986366853436e-06, "loss": 0.6936, "step": 997 }, { "epoch": 0.3943689799950605, "grad_norm": 0.5280672011633306, "learning_rate": 4.9986315070376675e-06, "loss": 0.6498, "step": 998 }, { "epoch": 0.3947641392936527, "grad_norm": 0.5204342792742052, "learning_rate": 4.998626318918628e-06, "loss": 0.6775, "step": 999 }, { "epoch": 0.395159298592245, "grad_norm": 0.5029429193286309, "learning_rate": 4.998621120986502e-06, "loss": 0.6573, "step": 1000 }, { "epoch": 0.39555445789083726, "grad_norm": 0.5297718137035743, "learning_rate": 4.998615913241309e-06, "loss": 0.6645, "step": 1001 }, { "epoch": 0.3959496171894295, "grad_norm": 0.5082079772490311, "learning_rate": 4.9986106956830705e-06, "loss": 0.6609, "step": 1002 }, { "epoch": 0.39634477648802174, "grad_norm": 0.5349674831031332, "learning_rate": 4.998605468311805e-06, "loss": 0.6563, "step": 1003 }, { "epoch": 0.396739935786614, "grad_norm": 0.5733947545706467, "learning_rate": 4.9986002311275365e-06, "loss": 0.6363, "step": 1004 }, { "epoch": 0.3971350950852062, "grad_norm": 0.5182307185611646, "learning_rate": 4.9985949841302825e-06, "loss": 0.6516, "step": 1005 }, { "epoch": 0.39753025438379846, "grad_norm": 0.5062700575538346, "learning_rate": 4.9985897273200645e-06, "loss": 0.6529, "step": 1006 }, { "epoch": 0.3979254136823907, "grad_norm": 0.5384297745518919, "learning_rate": 4.998584460696904e-06, "loss": 0.6737, "step": 1007 }, { "epoch": 0.39832057298098295, "grad_norm": 0.5816595934297174, "learning_rate": 4.99857918426082e-06, "loss": 0.6739, "step": 1008 }, { "epoch": 0.3987157322795752, "grad_norm": 0.5179842596066364, "learning_rate": 4.998573898011835e-06, "loss": 0.6551, "step": 1009 }, { "epoch": 0.3991108915781674, "grad_norm": 0.5782261510920916, "learning_rate": 4.998568601949968e-06, "loss": 0.6653, "step": 1010 }, { "epoch": 0.39950605087675967, "grad_norm": 0.5070377777043106, "learning_rate": 4.998563296075241e-06, "loss": 0.6637, "step": 1011 }, { "epoch": 0.39990121017535196, "grad_norm": 0.5348184367082748, "learning_rate": 4.998557980387675e-06, "loss": 0.6573, "step": 1012 }, { "epoch": 0.4002963694739442, "grad_norm": 0.5533134020078849, "learning_rate": 4.99855265488729e-06, "loss": 0.6513, "step": 1013 }, { "epoch": 0.40069152877253644, "grad_norm": 0.5214559414090275, "learning_rate": 4.998547319574108e-06, "loss": 0.6607, "step": 1014 }, { "epoch": 0.4010866880711287, "grad_norm": 0.5454794781961575, "learning_rate": 4.998541974448149e-06, "loss": 0.657, "step": 1015 }, { "epoch": 0.4014818473697209, "grad_norm": 0.5323970053630921, "learning_rate": 4.998536619509434e-06, "loss": 0.6727, "step": 1016 }, { "epoch": 0.40187700666831316, "grad_norm": 0.5677904629043107, "learning_rate": 4.998531254757984e-06, "loss": 0.6439, "step": 1017 }, { "epoch": 0.4022721659669054, "grad_norm": 0.5203197455098383, "learning_rate": 4.998525880193822e-06, "loss": 0.6694, "step": 1018 }, { "epoch": 0.40266732526549764, "grad_norm": 0.5251699628155582, "learning_rate": 4.998520495816967e-06, "loss": 0.6546, "step": 1019 }, { "epoch": 0.4030624845640899, "grad_norm": 0.5539885023636764, "learning_rate": 4.99851510162744e-06, "loss": 0.6802, "step": 1020 }, { "epoch": 0.4034576438626821, "grad_norm": 0.5328047319862138, "learning_rate": 4.998509697625262e-06, "loss": 0.65, "step": 1021 }, { "epoch": 0.40385280316127437, "grad_norm": 0.5505034070242261, "learning_rate": 4.998504283810457e-06, "loss": 0.6507, "step": 1022 }, { "epoch": 0.40424796245986666, "grad_norm": 0.571221870438257, "learning_rate": 4.998498860183043e-06, "loss": 0.6449, "step": 1023 }, { "epoch": 0.4046431217584589, "grad_norm": 0.534352298409385, "learning_rate": 4.998493426743044e-06, "loss": 0.6806, "step": 1024 }, { "epoch": 0.40503828105705114, "grad_norm": 0.5487946183182154, "learning_rate": 4.9984879834904785e-06, "loss": 0.6471, "step": 1025 }, { "epoch": 0.4054334403556434, "grad_norm": 0.5244300620347428, "learning_rate": 4.99848253042537e-06, "loss": 0.6555, "step": 1026 }, { "epoch": 0.4058285996542356, "grad_norm": 0.5408629309328332, "learning_rate": 4.99847706754774e-06, "loss": 0.6642, "step": 1027 }, { "epoch": 0.40622375895282786, "grad_norm": 0.5333823797340622, "learning_rate": 4.998471594857608e-06, "loss": 0.6543, "step": 1028 }, { "epoch": 0.4066189182514201, "grad_norm": 0.512514585835694, "learning_rate": 4.998466112354998e-06, "loss": 0.663, "step": 1029 }, { "epoch": 0.40701407755001234, "grad_norm": 0.5066646847695025, "learning_rate": 4.998460620039929e-06, "loss": 0.6483, "step": 1030 }, { "epoch": 0.4074092368486046, "grad_norm": 0.5408686381957716, "learning_rate": 4.998455117912425e-06, "loss": 0.6556, "step": 1031 }, { "epoch": 0.4078043961471968, "grad_norm": 0.5118425141052046, "learning_rate": 4.998449605972505e-06, "loss": 0.6727, "step": 1032 }, { "epoch": 0.40819955544578906, "grad_norm": 0.5596018207482388, "learning_rate": 4.9984440842201935e-06, "loss": 0.662, "step": 1033 }, { "epoch": 0.4085947147443813, "grad_norm": 0.5081268236908408, "learning_rate": 4.99843855265551e-06, "loss": 0.6472, "step": 1034 }, { "epoch": 0.4089898740429736, "grad_norm": 0.5208429105965913, "learning_rate": 4.998433011278477e-06, "loss": 0.6542, "step": 1035 }, { "epoch": 0.40938503334156584, "grad_norm": 0.5476716325972726, "learning_rate": 4.998427460089117e-06, "loss": 0.6636, "step": 1036 }, { "epoch": 0.4097801926401581, "grad_norm": 0.5581628717427036, "learning_rate": 4.9984218990874504e-06, "loss": 0.6688, "step": 1037 }, { "epoch": 0.4101753519387503, "grad_norm": 0.5313422288525836, "learning_rate": 4.9984163282734995e-06, "loss": 0.6614, "step": 1038 }, { "epoch": 0.41057051123734256, "grad_norm": 0.5152033372832628, "learning_rate": 4.998410747647287e-06, "loss": 0.6793, "step": 1039 }, { "epoch": 0.4109656705359348, "grad_norm": 0.5352102398766864, "learning_rate": 4.998405157208833e-06, "loss": 0.6502, "step": 1040 }, { "epoch": 0.41136082983452704, "grad_norm": 0.6112119061779702, "learning_rate": 4.998399556958162e-06, "loss": 0.6765, "step": 1041 }, { "epoch": 0.4117559891331193, "grad_norm": 0.5012000160841078, "learning_rate": 4.9983939468952945e-06, "loss": 0.6602, "step": 1042 }, { "epoch": 0.4121511484317115, "grad_norm": 0.5623209902661767, "learning_rate": 4.9983883270202525e-06, "loss": 0.6458, "step": 1043 }, { "epoch": 0.41254630773030376, "grad_norm": 0.52453782255717, "learning_rate": 4.998382697333058e-06, "loss": 0.6542, "step": 1044 }, { "epoch": 0.412941467028896, "grad_norm": 0.5822299190642851, "learning_rate": 4.998377057833733e-06, "loss": 0.679, "step": 1045 }, { "epoch": 0.41333662632748824, "grad_norm": 0.5421354227434607, "learning_rate": 4.998371408522302e-06, "loss": 0.6717, "step": 1046 }, { "epoch": 0.41373178562608054, "grad_norm": 0.5286757169293261, "learning_rate": 4.998365749398783e-06, "loss": 0.6711, "step": 1047 }, { "epoch": 0.4141269449246728, "grad_norm": 0.5177607288874907, "learning_rate": 4.9983600804632e-06, "loss": 0.6686, "step": 1048 }, { "epoch": 0.414522104223265, "grad_norm": 0.5543980251697767, "learning_rate": 4.998354401715577e-06, "loss": 0.6663, "step": 1049 }, { "epoch": 0.41491726352185726, "grad_norm": 0.5459993812213396, "learning_rate": 4.9983487131559354e-06, "loss": 0.6407, "step": 1050 }, { "epoch": 0.4153124228204495, "grad_norm": 0.5359069513585107, "learning_rate": 4.998343014784296e-06, "loss": 0.6575, "step": 1051 }, { "epoch": 0.41570758211904174, "grad_norm": 0.5300909673600657, "learning_rate": 4.998337306600683e-06, "loss": 0.6667, "step": 1052 }, { "epoch": 0.416102741417634, "grad_norm": 0.5392983958526001, "learning_rate": 4.9983315886051185e-06, "loss": 0.6721, "step": 1053 }, { "epoch": 0.4164979007162262, "grad_norm": 0.5070297189799001, "learning_rate": 4.998325860797624e-06, "loss": 0.6561, "step": 1054 }, { "epoch": 0.41689306001481846, "grad_norm": 0.5224307231811263, "learning_rate": 4.998320123178223e-06, "loss": 0.6618, "step": 1055 }, { "epoch": 0.4172882193134107, "grad_norm": 0.604920198130287, "learning_rate": 4.998314375746937e-06, "loss": 0.6532, "step": 1056 }, { "epoch": 0.41768337861200294, "grad_norm": 0.5168038782011498, "learning_rate": 4.9983086185037896e-06, "loss": 0.6651, "step": 1057 }, { "epoch": 0.4180785379105952, "grad_norm": 0.5297744100346249, "learning_rate": 4.998302851448803e-06, "loss": 0.6608, "step": 1058 }, { "epoch": 0.4184736972091875, "grad_norm": 0.5574740674977203, "learning_rate": 4.998297074581999e-06, "loss": 0.6881, "step": 1059 }, { "epoch": 0.4188688565077797, "grad_norm": 0.521702687451242, "learning_rate": 4.9982912879034025e-06, "loss": 0.6839, "step": 1060 }, { "epoch": 0.41926401580637196, "grad_norm": 0.5188090920413589, "learning_rate": 4.9982854914130345e-06, "loss": 0.6602, "step": 1061 }, { "epoch": 0.4196591751049642, "grad_norm": 0.5270636307682602, "learning_rate": 4.998279685110917e-06, "loss": 0.6606, "step": 1062 }, { "epoch": 0.42005433440355644, "grad_norm": 0.5366763635966227, "learning_rate": 4.998273868997075e-06, "loss": 0.6704, "step": 1063 }, { "epoch": 0.4204494937021487, "grad_norm": 0.5031111535411266, "learning_rate": 4.9982680430715305e-06, "loss": 0.6747, "step": 1064 }, { "epoch": 0.4208446530007409, "grad_norm": 0.5151445274947559, "learning_rate": 4.998262207334306e-06, "loss": 0.6645, "step": 1065 }, { "epoch": 0.42123981229933316, "grad_norm": 0.5210208888874347, "learning_rate": 4.998256361785424e-06, "loss": 0.65, "step": 1066 }, { "epoch": 0.4216349715979254, "grad_norm": 0.5169090685583989, "learning_rate": 4.998250506424908e-06, "loss": 0.6517, "step": 1067 }, { "epoch": 0.42203013089651764, "grad_norm": 0.5413286895905355, "learning_rate": 4.998244641252781e-06, "loss": 0.6795, "step": 1068 }, { "epoch": 0.4224252901951099, "grad_norm": 0.6433702861587849, "learning_rate": 4.998238766269067e-06, "loss": 0.6951, "step": 1069 }, { "epoch": 0.4228204494937021, "grad_norm": 0.5023044231984531, "learning_rate": 4.998232881473787e-06, "loss": 0.6694, "step": 1070 }, { "epoch": 0.4232156087922944, "grad_norm": 0.5994864572308269, "learning_rate": 4.998226986866966e-06, "loss": 0.6532, "step": 1071 }, { "epoch": 0.42361076809088666, "grad_norm": 0.5126827044965871, "learning_rate": 4.998221082448627e-06, "loss": 0.6496, "step": 1072 }, { "epoch": 0.4240059273894789, "grad_norm": 0.6035162881851404, "learning_rate": 4.998215168218791e-06, "loss": 0.6633, "step": 1073 }, { "epoch": 0.42440108668807114, "grad_norm": 0.5296590133461134, "learning_rate": 4.998209244177484e-06, "loss": 0.6691, "step": 1074 }, { "epoch": 0.4247962459866634, "grad_norm": 0.5435187950208628, "learning_rate": 4.998203310324727e-06, "loss": 0.6636, "step": 1075 }, { "epoch": 0.4251914052852556, "grad_norm": 0.5931608594717878, "learning_rate": 4.998197366660546e-06, "loss": 0.6772, "step": 1076 }, { "epoch": 0.42558656458384786, "grad_norm": 0.524768630466991, "learning_rate": 4.9981914131849614e-06, "loss": 0.657, "step": 1077 }, { "epoch": 0.4259817238824401, "grad_norm": 0.5518814321211608, "learning_rate": 4.998185449897999e-06, "loss": 0.6483, "step": 1078 }, { "epoch": 0.42637688318103234, "grad_norm": 0.536584834748668, "learning_rate": 4.998179476799679e-06, "loss": 0.6678, "step": 1079 }, { "epoch": 0.4267720424796246, "grad_norm": 0.5621190263657815, "learning_rate": 4.998173493890029e-06, "loss": 0.6416, "step": 1080 }, { "epoch": 0.4271672017782168, "grad_norm": 0.5755170038945299, "learning_rate": 4.99816750116907e-06, "loss": 0.6638, "step": 1081 }, { "epoch": 0.42756236107680906, "grad_norm": 0.6441072902907758, "learning_rate": 4.998161498636826e-06, "loss": 0.6661, "step": 1082 }, { "epoch": 0.42795752037540136, "grad_norm": 0.565628041779372, "learning_rate": 4.998155486293321e-06, "loss": 0.6417, "step": 1083 }, { "epoch": 0.4283526796739936, "grad_norm": 1.8488838880474752, "learning_rate": 4.9981494641385775e-06, "loss": 0.6564, "step": 1084 }, { "epoch": 0.42874783897258584, "grad_norm": 0.5160251022562632, "learning_rate": 4.99814343217262e-06, "loss": 0.6748, "step": 1085 }, { "epoch": 0.4291429982711781, "grad_norm": 0.5216784182870835, "learning_rate": 4.998137390395472e-06, "loss": 0.6496, "step": 1086 }, { "epoch": 0.4295381575697703, "grad_norm": 0.569161680789975, "learning_rate": 4.998131338807158e-06, "loss": 0.6629, "step": 1087 }, { "epoch": 0.42993331686836256, "grad_norm": 0.5479677358649104, "learning_rate": 4.9981252774077e-06, "loss": 0.658, "step": 1088 }, { "epoch": 0.4303284761669548, "grad_norm": 0.5267588064158593, "learning_rate": 4.998119206197124e-06, "loss": 0.646, "step": 1089 }, { "epoch": 0.43072363546554704, "grad_norm": 0.6215218453962831, "learning_rate": 4.9981131251754516e-06, "loss": 0.6473, "step": 1090 }, { "epoch": 0.4311187947641393, "grad_norm": 0.5829418685005663, "learning_rate": 4.998107034342708e-06, "loss": 0.6489, "step": 1091 }, { "epoch": 0.4315139540627315, "grad_norm": 0.5445716063606678, "learning_rate": 4.998100933698917e-06, "loss": 0.6563, "step": 1092 }, { "epoch": 0.43190911336132376, "grad_norm": 0.5717920439154024, "learning_rate": 4.998094823244103e-06, "loss": 0.6455, "step": 1093 }, { "epoch": 0.43230427265991606, "grad_norm": 0.5194203985812613, "learning_rate": 4.9980887029782895e-06, "loss": 0.6441, "step": 1094 }, { "epoch": 0.4326994319585083, "grad_norm": 0.5380215301586405, "learning_rate": 4.9980825729015e-06, "loss": 0.6592, "step": 1095 }, { "epoch": 0.43309459125710054, "grad_norm": 0.5656627049153228, "learning_rate": 4.998076433013758e-06, "loss": 0.6538, "step": 1096 }, { "epoch": 0.4334897505556928, "grad_norm": 0.5266370469773144, "learning_rate": 4.998070283315091e-06, "loss": 0.6631, "step": 1097 }, { "epoch": 0.433884909854285, "grad_norm": 0.6484381198976229, "learning_rate": 4.998064123805519e-06, "loss": 0.6539, "step": 1098 }, { "epoch": 0.43428006915287726, "grad_norm": 0.5474765769152128, "learning_rate": 4.99805795448507e-06, "loss": 0.6589, "step": 1099 }, { "epoch": 0.4346752284514695, "grad_norm": 0.532857291701254, "learning_rate": 4.998051775353764e-06, "loss": 0.6592, "step": 1100 }, { "epoch": 0.43507038775006174, "grad_norm": 0.58383601467703, "learning_rate": 4.998045586411629e-06, "loss": 0.6857, "step": 1101 }, { "epoch": 0.435465547048654, "grad_norm": 0.5473181451591314, "learning_rate": 4.998039387658686e-06, "loss": 0.6637, "step": 1102 }, { "epoch": 0.4358607063472462, "grad_norm": 0.5385435795744777, "learning_rate": 4.998033179094963e-06, "loss": 0.6259, "step": 1103 }, { "epoch": 0.43625586564583846, "grad_norm": 0.5531552648985082, "learning_rate": 4.998026960720483e-06, "loss": 0.6586, "step": 1104 }, { "epoch": 0.4366510249444307, "grad_norm": 0.528398105412856, "learning_rate": 4.998020732535268e-06, "loss": 0.6423, "step": 1105 }, { "epoch": 0.437046184243023, "grad_norm": 0.5211440156999154, "learning_rate": 4.998014494539345e-06, "loss": 0.6316, "step": 1106 }, { "epoch": 0.43744134354161524, "grad_norm": 0.547888131353762, "learning_rate": 4.998008246732739e-06, "loss": 0.6342, "step": 1107 }, { "epoch": 0.4378365028402075, "grad_norm": 0.5557989460207667, "learning_rate": 4.998001989115473e-06, "loss": 0.6574, "step": 1108 }, { "epoch": 0.4382316621387997, "grad_norm": 0.5500706961475933, "learning_rate": 4.997995721687572e-06, "loss": 0.6471, "step": 1109 }, { "epoch": 0.43862682143739196, "grad_norm": 0.5504155370955836, "learning_rate": 4.997989444449061e-06, "loss": 0.6576, "step": 1110 }, { "epoch": 0.4390219807359842, "grad_norm": 0.5547627447818888, "learning_rate": 4.997983157399963e-06, "loss": 0.6764, "step": 1111 }, { "epoch": 0.43941714003457644, "grad_norm": 0.5804481448072446, "learning_rate": 4.997976860540305e-06, "loss": 0.6682, "step": 1112 }, { "epoch": 0.4398122993331687, "grad_norm": 0.5397528058437261, "learning_rate": 4.997970553870111e-06, "loss": 0.6572, "step": 1113 }, { "epoch": 0.4402074586317609, "grad_norm": 0.6196448584341162, "learning_rate": 4.997964237389405e-06, "loss": 0.658, "step": 1114 }, { "epoch": 0.44060261793035316, "grad_norm": 0.6066819778157864, "learning_rate": 4.997957911098212e-06, "loss": 0.6799, "step": 1115 }, { "epoch": 0.4409977772289454, "grad_norm": 0.5478230864583113, "learning_rate": 4.997951574996558e-06, "loss": 0.6656, "step": 1116 }, { "epoch": 0.44139293652753764, "grad_norm": 0.6388534140119478, "learning_rate": 4.997945229084467e-06, "loss": 0.6733, "step": 1117 }, { "epoch": 0.44178809582612993, "grad_norm": 0.5602979323078272, "learning_rate": 4.997938873361964e-06, "loss": 0.6499, "step": 1118 }, { "epoch": 0.4421832551247222, "grad_norm": 0.6110344898706578, "learning_rate": 4.997932507829073e-06, "loss": 0.6486, "step": 1119 }, { "epoch": 0.4425784144233144, "grad_norm": 0.5439975635095607, "learning_rate": 4.997926132485821e-06, "loss": 0.6615, "step": 1120 }, { "epoch": 0.44297357372190665, "grad_norm": 0.5670773229357888, "learning_rate": 4.9979197473322315e-06, "loss": 0.6644, "step": 1121 }, { "epoch": 0.4433687330204989, "grad_norm": 0.6135668184552048, "learning_rate": 4.99791335236833e-06, "loss": 0.6586, "step": 1122 }, { "epoch": 0.44376389231909114, "grad_norm": 0.6934295929969644, "learning_rate": 4.997906947594142e-06, "loss": 0.6446, "step": 1123 }, { "epoch": 0.4441590516176834, "grad_norm": 0.5521875793962009, "learning_rate": 4.997900533009692e-06, "loss": 0.6845, "step": 1124 }, { "epoch": 0.4445542109162756, "grad_norm": 0.5802844349362782, "learning_rate": 4.9978941086150055e-06, "loss": 0.6313, "step": 1125 }, { "epoch": 0.44494937021486786, "grad_norm": 0.5443010267154513, "learning_rate": 4.997887674410108e-06, "loss": 0.6602, "step": 1126 }, { "epoch": 0.4453445295134601, "grad_norm": 0.5391556371847563, "learning_rate": 4.997881230395024e-06, "loss": 0.6605, "step": 1127 }, { "epoch": 0.44573968881205234, "grad_norm": 0.6625220158972982, "learning_rate": 4.99787477656978e-06, "loss": 0.6627, "step": 1128 }, { "epoch": 0.4461348481106446, "grad_norm": 0.5147363265798429, "learning_rate": 4.9978683129344e-06, "loss": 0.6382, "step": 1129 }, { "epoch": 0.4465300074092369, "grad_norm": 0.5482662281477236, "learning_rate": 4.99786183948891e-06, "loss": 0.6558, "step": 1130 }, { "epoch": 0.4469251667078291, "grad_norm": 0.5604183997920332, "learning_rate": 4.997855356233337e-06, "loss": 0.6517, "step": 1131 }, { "epoch": 0.44732032600642135, "grad_norm": 0.5353748475279002, "learning_rate": 4.997848863167703e-06, "loss": 0.6341, "step": 1132 }, { "epoch": 0.4477154853050136, "grad_norm": 0.5488398069149468, "learning_rate": 4.997842360292036e-06, "loss": 0.6676, "step": 1133 }, { "epoch": 0.44811064460360583, "grad_norm": 0.5663070948772903, "learning_rate": 4.997835847606361e-06, "loss": 0.64, "step": 1134 }, { "epoch": 0.4485058039021981, "grad_norm": 0.5403481201660154, "learning_rate": 4.997829325110705e-06, "loss": 0.6599, "step": 1135 }, { "epoch": 0.4489009632007903, "grad_norm": 0.5222149042010814, "learning_rate": 4.997822792805091e-06, "loss": 0.6262, "step": 1136 }, { "epoch": 0.44929612249938256, "grad_norm": 0.6013913868061602, "learning_rate": 4.997816250689545e-06, "loss": 0.6661, "step": 1137 }, { "epoch": 0.4496912817979748, "grad_norm": 0.6787491243452577, "learning_rate": 4.997809698764094e-06, "loss": 0.653, "step": 1138 }, { "epoch": 0.45008644109656704, "grad_norm": 0.6230119957241225, "learning_rate": 4.997803137028764e-06, "loss": 0.6444, "step": 1139 }, { "epoch": 0.4504816003951593, "grad_norm": 0.5712264595241187, "learning_rate": 4.9977965654835795e-06, "loss": 0.6115, "step": 1140 }, { "epoch": 0.4508767596937515, "grad_norm": 0.5074125803443317, "learning_rate": 4.997789984128567e-06, "loss": 0.6439, "step": 1141 }, { "epoch": 0.4512719189923438, "grad_norm": 0.5525141812309801, "learning_rate": 4.997783392963752e-06, "loss": 0.6223, "step": 1142 }, { "epoch": 0.45166707829093605, "grad_norm": 0.5882221184228417, "learning_rate": 4.997776791989161e-06, "loss": 0.6506, "step": 1143 }, { "epoch": 0.4520622375895283, "grad_norm": 0.5063672919950463, "learning_rate": 4.9977701812048185e-06, "loss": 0.6345, "step": 1144 }, { "epoch": 0.45245739688812053, "grad_norm": 0.5980686687629019, "learning_rate": 4.997763560610752e-06, "loss": 0.6694, "step": 1145 }, { "epoch": 0.4528525561867128, "grad_norm": 0.5737769969500374, "learning_rate": 4.997756930206987e-06, "loss": 0.6576, "step": 1146 }, { "epoch": 0.453247715485305, "grad_norm": 0.5254065661728536, "learning_rate": 4.99775028999355e-06, "loss": 0.6406, "step": 1147 }, { "epoch": 0.45364287478389725, "grad_norm": 0.5556443229887936, "learning_rate": 4.997743639970466e-06, "loss": 0.6353, "step": 1148 }, { "epoch": 0.4540380340824895, "grad_norm": 0.6029582429283775, "learning_rate": 4.997736980137762e-06, "loss": 0.6525, "step": 1149 }, { "epoch": 0.45443319338108173, "grad_norm": 0.5498673723447964, "learning_rate": 4.997730310495464e-06, "loss": 0.6499, "step": 1150 }, { "epoch": 0.454828352679674, "grad_norm": 0.5478846740663137, "learning_rate": 4.997723631043597e-06, "loss": 0.6598, "step": 1151 }, { "epoch": 0.4552235119782662, "grad_norm": 0.5257136286019838, "learning_rate": 4.997716941782189e-06, "loss": 0.642, "step": 1152 }, { "epoch": 0.4556186712768585, "grad_norm": 0.5545888280208816, "learning_rate": 4.997710242711266e-06, "loss": 0.6351, "step": 1153 }, { "epoch": 0.45601383057545075, "grad_norm": 0.537711633021132, "learning_rate": 4.997703533830853e-06, "loss": 0.6504, "step": 1154 }, { "epoch": 0.456408989874043, "grad_norm": 0.8090471632091599, "learning_rate": 4.997696815140978e-06, "loss": 0.6535, "step": 1155 }, { "epoch": 0.45680414917263523, "grad_norm": 0.5180994899415312, "learning_rate": 4.997690086641666e-06, "loss": 0.6526, "step": 1156 }, { "epoch": 0.4571993084712275, "grad_norm": 0.5504445838265242, "learning_rate": 4.997683348332945e-06, "loss": 0.6491, "step": 1157 }, { "epoch": 0.4575944677698197, "grad_norm": 0.5189150345527932, "learning_rate": 4.997676600214839e-06, "loss": 0.6358, "step": 1158 }, { "epoch": 0.45798962706841195, "grad_norm": 0.5120657160011092, "learning_rate": 4.997669842287377e-06, "loss": 0.6447, "step": 1159 }, { "epoch": 0.4583847863670042, "grad_norm": 0.5051586925759178, "learning_rate": 4.997663074550584e-06, "loss": 0.6385, "step": 1160 }, { "epoch": 0.45877994566559643, "grad_norm": 0.5168398913045842, "learning_rate": 4.997656297004487e-06, "loss": 0.6743, "step": 1161 }, { "epoch": 0.4591751049641887, "grad_norm": 0.5043203559885473, "learning_rate": 4.997649509649114e-06, "loss": 0.6449, "step": 1162 }, { "epoch": 0.4595702642627809, "grad_norm": 0.5459507585407142, "learning_rate": 4.997642712484489e-06, "loss": 0.6476, "step": 1163 }, { "epoch": 0.45996542356137315, "grad_norm": 0.5245847133766435, "learning_rate": 4.99763590551064e-06, "loss": 0.6596, "step": 1164 }, { "epoch": 0.46036058285996545, "grad_norm": 0.4986446564658458, "learning_rate": 4.997629088727594e-06, "loss": 0.6525, "step": 1165 }, { "epoch": 0.4607557421585577, "grad_norm": 0.507187864739981, "learning_rate": 4.997622262135379e-06, "loss": 0.6522, "step": 1166 }, { "epoch": 0.46115090145714993, "grad_norm": 0.5093539527497493, "learning_rate": 4.997615425734019e-06, "loss": 0.6483, "step": 1167 }, { "epoch": 0.46154606075574217, "grad_norm": 0.5152474618083938, "learning_rate": 4.997608579523543e-06, "loss": 0.6435, "step": 1168 }, { "epoch": 0.4619412200543344, "grad_norm": 0.5340544828015156, "learning_rate": 4.997601723503977e-06, "loss": 0.661, "step": 1169 }, { "epoch": 0.46233637935292665, "grad_norm": 0.5035751988607956, "learning_rate": 4.997594857675347e-06, "loss": 0.6365, "step": 1170 }, { "epoch": 0.4627315386515189, "grad_norm": 0.4881941716047279, "learning_rate": 4.997587982037682e-06, "loss": 0.6539, "step": 1171 }, { "epoch": 0.46312669795011113, "grad_norm": 0.5200636790139908, "learning_rate": 4.997581096591007e-06, "loss": 0.66, "step": 1172 }, { "epoch": 0.4635218572487034, "grad_norm": 0.575490383484796, "learning_rate": 4.9975742013353515e-06, "loss": 0.6833, "step": 1173 }, { "epoch": 0.4639170165472956, "grad_norm": 0.5118745281561939, "learning_rate": 4.99756729627074e-06, "loss": 0.6538, "step": 1174 }, { "epoch": 0.46431217584588785, "grad_norm": 0.5207712676596821, "learning_rate": 4.997560381397201e-06, "loss": 0.6575, "step": 1175 }, { "epoch": 0.4647073351444801, "grad_norm": 0.5196060135878352, "learning_rate": 4.997553456714762e-06, "loss": 0.6606, "step": 1176 }, { "epoch": 0.4651024944430724, "grad_norm": 0.4993473638807208, "learning_rate": 4.997546522223449e-06, "loss": 0.6473, "step": 1177 }, { "epoch": 0.46549765374166463, "grad_norm": 0.6632587575022358, "learning_rate": 4.99753957792329e-06, "loss": 0.6479, "step": 1178 }, { "epoch": 0.46589281304025687, "grad_norm": 0.5118246850008393, "learning_rate": 4.997532623814312e-06, "loss": 0.6508, "step": 1179 }, { "epoch": 0.4662879723388491, "grad_norm": 0.5216848433144801, "learning_rate": 4.997525659896543e-06, "loss": 0.6507, "step": 1180 }, { "epoch": 0.46668313163744135, "grad_norm": 0.5116873956882975, "learning_rate": 4.99751868617001e-06, "loss": 0.6321, "step": 1181 }, { "epoch": 0.4670782909360336, "grad_norm": 0.5465288573676182, "learning_rate": 4.997511702634739e-06, "loss": 0.6723, "step": 1182 }, { "epoch": 0.46747345023462583, "grad_norm": 0.5229836616121926, "learning_rate": 4.997504709290759e-06, "loss": 0.6572, "step": 1183 }, { "epoch": 0.46786860953321807, "grad_norm": 0.5055148283070077, "learning_rate": 4.997497706138098e-06, "loss": 0.651, "step": 1184 }, { "epoch": 0.4682637688318103, "grad_norm": 0.5073671397211292, "learning_rate": 4.997490693176782e-06, "loss": 0.6663, "step": 1185 }, { "epoch": 0.46865892813040255, "grad_norm": 0.5187411381755869, "learning_rate": 4.997483670406839e-06, "loss": 0.6536, "step": 1186 }, { "epoch": 0.4690540874289948, "grad_norm": 0.4896382189093635, "learning_rate": 4.9974766378282964e-06, "loss": 0.6376, "step": 1187 }, { "epoch": 0.46944924672758703, "grad_norm": 0.5403722513951374, "learning_rate": 4.997469595441182e-06, "loss": 0.6384, "step": 1188 }, { "epoch": 0.46984440602617933, "grad_norm": 0.5283649386398254, "learning_rate": 4.9974625432455245e-06, "loss": 0.6619, "step": 1189 }, { "epoch": 0.47023956532477157, "grad_norm": 0.5145771307045552, "learning_rate": 4.99745548124135e-06, "loss": 0.6608, "step": 1190 }, { "epoch": 0.4706347246233638, "grad_norm": 0.5088786133567975, "learning_rate": 4.997448409428687e-06, "loss": 0.652, "step": 1191 }, { "epoch": 0.47102988392195605, "grad_norm": 0.5298568145169689, "learning_rate": 4.997441327807563e-06, "loss": 0.626, "step": 1192 }, { "epoch": 0.4714250432205483, "grad_norm": 0.525542401872513, "learning_rate": 4.997434236378006e-06, "loss": 0.6535, "step": 1193 }, { "epoch": 0.47182020251914053, "grad_norm": 0.5112712732618009, "learning_rate": 4.997427135140045e-06, "loss": 0.6463, "step": 1194 }, { "epoch": 0.47221536181773277, "grad_norm": 0.5333131112367618, "learning_rate": 4.997420024093705e-06, "loss": 0.6479, "step": 1195 }, { "epoch": 0.472610521116325, "grad_norm": 0.5314791312504851, "learning_rate": 4.997412903239017e-06, "loss": 0.6412, "step": 1196 }, { "epoch": 0.47300568041491725, "grad_norm": 0.49879520819742884, "learning_rate": 4.997405772576007e-06, "loss": 0.6429, "step": 1197 }, { "epoch": 0.4734008397135095, "grad_norm": 0.5301052895426869, "learning_rate": 4.997398632104703e-06, "loss": 0.662, "step": 1198 }, { "epoch": 0.47379599901210173, "grad_norm": 0.5114526203823233, "learning_rate": 4.997391481825135e-06, "loss": 0.6488, "step": 1199 }, { "epoch": 0.47419115831069397, "grad_norm": 0.7049934317889363, "learning_rate": 4.9973843217373295e-06, "loss": 0.6367, "step": 1200 }, { "epoch": 0.47458631760928627, "grad_norm": 0.5070332705862394, "learning_rate": 4.997377151841314e-06, "loss": 0.6449, "step": 1201 }, { "epoch": 0.4749814769078785, "grad_norm": 0.5056739665660747, "learning_rate": 4.997369972137118e-06, "loss": 0.6098, "step": 1202 }, { "epoch": 0.47537663620647075, "grad_norm": 0.5196505706221262, "learning_rate": 4.9973627826247695e-06, "loss": 0.6412, "step": 1203 }, { "epoch": 0.475771795505063, "grad_norm": 0.542890120550589, "learning_rate": 4.997355583304297e-06, "loss": 0.6798, "step": 1204 }, { "epoch": 0.47616695480365523, "grad_norm": 0.5360546233685582, "learning_rate": 4.997348374175727e-06, "loss": 0.6472, "step": 1205 }, { "epoch": 0.47656211410224747, "grad_norm": 0.5130309786882297, "learning_rate": 4.997341155239089e-06, "loss": 0.6628, "step": 1206 }, { "epoch": 0.4769572734008397, "grad_norm": 0.5155911732166051, "learning_rate": 4.997333926494412e-06, "loss": 0.6589, "step": 1207 }, { "epoch": 0.47735243269943195, "grad_norm": 0.5151851351284802, "learning_rate": 4.997326687941724e-06, "loss": 0.6426, "step": 1208 }, { "epoch": 0.4777475919980242, "grad_norm": 0.5421565514451022, "learning_rate": 4.997319439581053e-06, "loss": 0.6286, "step": 1209 }, { "epoch": 0.47814275129661643, "grad_norm": 0.5426134587047164, "learning_rate": 4.997312181412428e-06, "loss": 0.657, "step": 1210 }, { "epoch": 0.47853791059520867, "grad_norm": 0.5155176144597221, "learning_rate": 4.997304913435876e-06, "loss": 0.6545, "step": 1211 }, { "epoch": 0.4789330698938009, "grad_norm": 0.510072359951836, "learning_rate": 4.997297635651428e-06, "loss": 0.669, "step": 1212 }, { "epoch": 0.4793282291923932, "grad_norm": 0.5446367757282072, "learning_rate": 4.997290348059111e-06, "loss": 0.6428, "step": 1213 }, { "epoch": 0.47972338849098545, "grad_norm": 0.4942516591572794, "learning_rate": 4.997283050658954e-06, "loss": 0.6276, "step": 1214 }, { "epoch": 0.4801185477895777, "grad_norm": 0.5495732208117189, "learning_rate": 4.997275743450986e-06, "loss": 0.6542, "step": 1215 }, { "epoch": 0.48051370708816993, "grad_norm": 0.5299388692393306, "learning_rate": 4.997268426435234e-06, "loss": 0.6659, "step": 1216 }, { "epoch": 0.48090886638676217, "grad_norm": 0.5053176342784106, "learning_rate": 4.99726109961173e-06, "loss": 0.6309, "step": 1217 }, { "epoch": 0.4813040256853544, "grad_norm": 0.6098039985268289, "learning_rate": 4.997253762980499e-06, "loss": 0.667, "step": 1218 }, { "epoch": 0.48169918498394665, "grad_norm": 0.5376800287875747, "learning_rate": 4.9972464165415726e-06, "loss": 0.6506, "step": 1219 }, { "epoch": 0.4820943442825389, "grad_norm": 0.5696775883802918, "learning_rate": 4.997239060294978e-06, "loss": 0.6567, "step": 1220 }, { "epoch": 0.48248950358113113, "grad_norm": 0.5370925937539444, "learning_rate": 4.997231694240745e-06, "loss": 0.6395, "step": 1221 }, { "epoch": 0.48288466287972337, "grad_norm": 0.5518524860262873, "learning_rate": 4.997224318378903e-06, "loss": 0.6545, "step": 1222 }, { "epoch": 0.4832798221783156, "grad_norm": 0.6934350958942014, "learning_rate": 4.99721693270948e-06, "loss": 0.6425, "step": 1223 }, { "epoch": 0.4836749814769079, "grad_norm": 0.5319811886445309, "learning_rate": 4.997209537232505e-06, "loss": 0.6848, "step": 1224 }, { "epoch": 0.48407014077550015, "grad_norm": 0.5133175351958729, "learning_rate": 4.9972021319480065e-06, "loss": 0.6422, "step": 1225 }, { "epoch": 0.4844653000740924, "grad_norm": 0.515963610700411, "learning_rate": 4.997194716856016e-06, "loss": 0.6544, "step": 1226 }, { "epoch": 0.4848604593726846, "grad_norm": 0.5152566753981194, "learning_rate": 4.99718729195656e-06, "loss": 0.6651, "step": 1227 }, { "epoch": 0.48525561867127687, "grad_norm": 0.51916191994593, "learning_rate": 4.997179857249669e-06, "loss": 0.6539, "step": 1228 }, { "epoch": 0.4856507779698691, "grad_norm": 0.550013608585077, "learning_rate": 4.9971724127353725e-06, "loss": 0.6719, "step": 1229 }, { "epoch": 0.48604593726846135, "grad_norm": 0.5152452965696306, "learning_rate": 4.997164958413698e-06, "loss": 0.6558, "step": 1230 }, { "epoch": 0.4864410965670536, "grad_norm": 0.5149055111216204, "learning_rate": 4.997157494284677e-06, "loss": 0.6652, "step": 1231 }, { "epoch": 0.48683625586564583, "grad_norm": 0.5332858943526043, "learning_rate": 4.997150020348337e-06, "loss": 0.6394, "step": 1232 }, { "epoch": 0.48723141516423807, "grad_norm": 0.56714602215336, "learning_rate": 4.997142536604708e-06, "loss": 0.6531, "step": 1233 }, { "epoch": 0.4876265744628303, "grad_norm": 0.5545157033195995, "learning_rate": 4.99713504305382e-06, "loss": 0.6412, "step": 1234 }, { "epoch": 0.48802173376142255, "grad_norm": 0.5587339548936653, "learning_rate": 4.997127539695701e-06, "loss": 0.6749, "step": 1235 }, { "epoch": 0.48841689306001485, "grad_norm": 0.5115432181953685, "learning_rate": 4.997120026530382e-06, "loss": 0.6409, "step": 1236 }, { "epoch": 0.4888120523586071, "grad_norm": 0.5464551003108359, "learning_rate": 4.997112503557892e-06, "loss": 0.6289, "step": 1237 }, { "epoch": 0.4892072116571993, "grad_norm": 0.5486875395190994, "learning_rate": 4.99710497077826e-06, "loss": 0.6764, "step": 1238 }, { "epoch": 0.48960237095579157, "grad_norm": 0.5032189382025016, "learning_rate": 4.997097428191516e-06, "loss": 0.658, "step": 1239 }, { "epoch": 0.4899975302543838, "grad_norm": 0.6362327602346085, "learning_rate": 4.99708987579769e-06, "loss": 0.6591, "step": 1240 }, { "epoch": 0.49039268955297605, "grad_norm": 0.5560505192991964, "learning_rate": 4.9970823135968115e-06, "loss": 0.6808, "step": 1241 }, { "epoch": 0.4907878488515683, "grad_norm": 0.5180049644630822, "learning_rate": 4.997074741588909e-06, "loss": 0.6491, "step": 1242 }, { "epoch": 0.4911830081501605, "grad_norm": 0.5090961137400876, "learning_rate": 4.997067159774014e-06, "loss": 0.6519, "step": 1243 }, { "epoch": 0.49157816744875277, "grad_norm": 0.5358018751041574, "learning_rate": 4.997059568152155e-06, "loss": 0.6689, "step": 1244 }, { "epoch": 0.491973326747345, "grad_norm": 0.5417084482265679, "learning_rate": 4.997051966723363e-06, "loss": 0.6412, "step": 1245 }, { "epoch": 0.49236848604593725, "grad_norm": 0.524183120106596, "learning_rate": 4.997044355487667e-06, "loss": 0.6574, "step": 1246 }, { "epoch": 0.4927636453445295, "grad_norm": 0.5104588248452702, "learning_rate": 4.9970367344450966e-06, "loss": 0.6336, "step": 1247 }, { "epoch": 0.4931588046431218, "grad_norm": 0.5390658040510667, "learning_rate": 4.997029103595682e-06, "loss": 0.6575, "step": 1248 }, { "epoch": 0.493553963941714, "grad_norm": 0.5085283720065422, "learning_rate": 4.997021462939454e-06, "loss": 0.644, "step": 1249 }, { "epoch": 0.49394912324030626, "grad_norm": 0.5039511304830326, "learning_rate": 4.997013812476442e-06, "loss": 0.6587, "step": 1250 }, { "epoch": 0.4943442825388985, "grad_norm": 0.530739785584054, "learning_rate": 4.997006152206675e-06, "loss": 0.6646, "step": 1251 }, { "epoch": 0.49473944183749075, "grad_norm": 0.5419758659886152, "learning_rate": 4.9969984821301855e-06, "loss": 0.6505, "step": 1252 }, { "epoch": 0.495134601136083, "grad_norm": 0.49355655865907666, "learning_rate": 4.996990802247002e-06, "loss": 0.6272, "step": 1253 }, { "epoch": 0.4955297604346752, "grad_norm": 0.5263427398496581, "learning_rate": 4.996983112557154e-06, "loss": 0.6216, "step": 1254 }, { "epoch": 0.49592491973326747, "grad_norm": 0.5361314380353814, "learning_rate": 4.996975413060673e-06, "loss": 0.6352, "step": 1255 }, { "epoch": 0.4963200790318597, "grad_norm": 0.5697308230357576, "learning_rate": 4.996967703757589e-06, "loss": 0.6623, "step": 1256 }, { "epoch": 0.49671523833045195, "grad_norm": 0.5281054703403194, "learning_rate": 4.996959984647931e-06, "loss": 0.6538, "step": 1257 }, { "epoch": 0.4971103976290442, "grad_norm": 0.5251657598014784, "learning_rate": 4.996952255731732e-06, "loss": 0.6471, "step": 1258 }, { "epoch": 0.4975055569276364, "grad_norm": 0.5279909788842021, "learning_rate": 4.99694451700902e-06, "loss": 0.6367, "step": 1259 }, { "epoch": 0.4979007162262287, "grad_norm": 0.5181820037607305, "learning_rate": 4.996936768479826e-06, "loss": 0.661, "step": 1260 }, { "epoch": 0.49829587552482096, "grad_norm": 0.526657102607614, "learning_rate": 4.9969290101441815e-06, "loss": 0.6481, "step": 1261 }, { "epoch": 0.4986910348234132, "grad_norm": 0.5547575204328595, "learning_rate": 4.996921242002115e-06, "loss": 0.6392, "step": 1262 }, { "epoch": 0.49908619412200544, "grad_norm": 0.5292550322998774, "learning_rate": 4.996913464053659e-06, "loss": 0.6462, "step": 1263 }, { "epoch": 0.4994813534205977, "grad_norm": 0.5051213982225501, "learning_rate": 4.996905676298843e-06, "loss": 0.6243, "step": 1264 }, { "epoch": 0.4998765127191899, "grad_norm": 0.5404121143898882, "learning_rate": 4.996897878737697e-06, "loss": 0.6508, "step": 1265 }, { "epoch": 0.5002716720177822, "grad_norm": 0.5853244704579353, "learning_rate": 4.996890071370253e-06, "loss": 0.6692, "step": 1266 }, { "epoch": 0.5006668313163745, "grad_norm": 0.49722504428159636, "learning_rate": 4.99688225419654e-06, "loss": 0.6241, "step": 1267 }, { "epoch": 0.5010619906149667, "grad_norm": 0.5391014672758346, "learning_rate": 4.996874427216591e-06, "loss": 0.6517, "step": 1268 }, { "epoch": 0.5014571499135589, "grad_norm": 0.5284804206913505, "learning_rate": 4.996866590430435e-06, "loss": 0.6511, "step": 1269 }, { "epoch": 0.5018523092121512, "grad_norm": 0.5270981555373312, "learning_rate": 4.996858743838103e-06, "loss": 0.6426, "step": 1270 }, { "epoch": 0.5022474685107434, "grad_norm": 0.5246251269006239, "learning_rate": 4.996850887439626e-06, "loss": 0.6558, "step": 1271 }, { "epoch": 0.5026426278093357, "grad_norm": 0.5432048241827167, "learning_rate": 4.996843021235035e-06, "loss": 0.6398, "step": 1272 }, { "epoch": 0.5030377871079279, "grad_norm": 0.5293220925783506, "learning_rate": 4.9968351452243605e-06, "loss": 0.6422, "step": 1273 }, { "epoch": 0.5034329464065201, "grad_norm": 0.5133980649059295, "learning_rate": 4.996827259407634e-06, "loss": 0.6459, "step": 1274 }, { "epoch": 0.5038281057051124, "grad_norm": 0.5382972682689461, "learning_rate": 4.996819363784886e-06, "loss": 0.6625, "step": 1275 }, { "epoch": 0.5042232650037046, "grad_norm": 0.9692398606608912, "learning_rate": 4.996811458356148e-06, "loss": 0.6399, "step": 1276 }, { "epoch": 0.5046184243022969, "grad_norm": 0.5168785866731923, "learning_rate": 4.99680354312145e-06, "loss": 0.6608, "step": 1277 }, { "epoch": 0.5050135836008891, "grad_norm": 0.5770550825296323, "learning_rate": 4.996795618080824e-06, "loss": 0.626, "step": 1278 }, { "epoch": 0.5054087428994813, "grad_norm": 0.5677109698383079, "learning_rate": 4.996787683234302e-06, "loss": 0.6436, "step": 1279 }, { "epoch": 0.5058039021980736, "grad_norm": 0.5046529589482918, "learning_rate": 4.9967797385819135e-06, "loss": 0.632, "step": 1280 }, { "epoch": 0.5061990614966658, "grad_norm": 0.5511822639718909, "learning_rate": 4.99677178412369e-06, "loss": 0.6504, "step": 1281 }, { "epoch": 0.5065942207952581, "grad_norm": 0.5162230611570326, "learning_rate": 4.996763819859663e-06, "loss": 0.6545, "step": 1282 }, { "epoch": 0.5069893800938503, "grad_norm": 0.5229128926155301, "learning_rate": 4.996755845789865e-06, "loss": 0.6812, "step": 1283 }, { "epoch": 0.5073845393924425, "grad_norm": 0.5205567049268548, "learning_rate": 4.9967478619143244e-06, "loss": 0.6311, "step": 1284 }, { "epoch": 0.5077796986910348, "grad_norm": 0.5104439487974587, "learning_rate": 4.996739868233076e-06, "loss": 0.6355, "step": 1285 }, { "epoch": 0.508174857989627, "grad_norm": 0.5216314503599059, "learning_rate": 4.996731864746148e-06, "loss": 0.6466, "step": 1286 }, { "epoch": 0.5085700172882193, "grad_norm": 0.6055026988137685, "learning_rate": 4.9967238514535745e-06, "loss": 0.6442, "step": 1287 }, { "epoch": 0.5089651765868115, "grad_norm": 0.5106442297323203, "learning_rate": 4.9967158283553856e-06, "loss": 0.6419, "step": 1288 }, { "epoch": 0.5093603358854039, "grad_norm": 0.5231952656265989, "learning_rate": 4.996707795451612e-06, "loss": 0.6562, "step": 1289 }, { "epoch": 0.5097554951839961, "grad_norm": 0.5138942047359026, "learning_rate": 4.996699752742287e-06, "loss": 0.6576, "step": 1290 }, { "epoch": 0.5101506544825883, "grad_norm": 0.5320229519854904, "learning_rate": 4.996691700227441e-06, "loss": 0.6552, "step": 1291 }, { "epoch": 0.5105458137811806, "grad_norm": 0.523798873093951, "learning_rate": 4.996683637907107e-06, "loss": 0.657, "step": 1292 }, { "epoch": 0.5109409730797728, "grad_norm": 0.4798254266039342, "learning_rate": 4.996675565781315e-06, "loss": 0.6393, "step": 1293 }, { "epoch": 0.5113361323783651, "grad_norm": 0.493096648178815, "learning_rate": 4.996667483850097e-06, "loss": 0.6446, "step": 1294 }, { "epoch": 0.5117312916769573, "grad_norm": 0.49595320560319645, "learning_rate": 4.996659392113486e-06, "loss": 0.6405, "step": 1295 }, { "epoch": 0.5121264509755495, "grad_norm": 0.5364956991750699, "learning_rate": 4.9966512905715135e-06, "loss": 0.6527, "step": 1296 }, { "epoch": 0.5125216102741418, "grad_norm": 0.5294308228967807, "learning_rate": 4.99664317922421e-06, "loss": 0.6481, "step": 1297 }, { "epoch": 0.512916769572734, "grad_norm": 0.5414401474478188, "learning_rate": 4.996635058071609e-06, "loss": 0.6669, "step": 1298 }, { "epoch": 0.5133119288713263, "grad_norm": 0.5036105503103926, "learning_rate": 4.99662692711374e-06, "loss": 0.6214, "step": 1299 }, { "epoch": 0.5137070881699185, "grad_norm": 0.4980125974467828, "learning_rate": 4.996618786350637e-06, "loss": 0.6462, "step": 1300 }, { "epoch": 0.5141022474685107, "grad_norm": 0.5214248630142702, "learning_rate": 4.996610635782332e-06, "loss": 0.6398, "step": 1301 }, { "epoch": 0.514497406767103, "grad_norm": 0.500280326327944, "learning_rate": 4.996602475408856e-06, "loss": 0.6231, "step": 1302 }, { "epoch": 0.5148925660656952, "grad_norm": 0.5289763112497039, "learning_rate": 4.996594305230241e-06, "loss": 0.6456, "step": 1303 }, { "epoch": 0.5152877253642875, "grad_norm": 0.4963706186676594, "learning_rate": 4.996586125246521e-06, "loss": 0.6507, "step": 1304 }, { "epoch": 0.5156828846628797, "grad_norm": 0.5107270935037411, "learning_rate": 4.9965779354577254e-06, "loss": 0.6505, "step": 1305 }, { "epoch": 0.5160780439614719, "grad_norm": 0.5263648487154836, "learning_rate": 4.996569735863888e-06, "loss": 0.6441, "step": 1306 }, { "epoch": 0.5164732032600642, "grad_norm": 0.5235885042012381, "learning_rate": 4.9965615264650416e-06, "loss": 0.6108, "step": 1307 }, { "epoch": 0.5168683625586564, "grad_norm": 0.5141835232080435, "learning_rate": 4.996553307261216e-06, "loss": 0.6605, "step": 1308 }, { "epoch": 0.5172635218572487, "grad_norm": 0.5388043396561775, "learning_rate": 4.996545078252446e-06, "loss": 0.6464, "step": 1309 }, { "epoch": 0.5176586811558409, "grad_norm": 0.5082281116024677, "learning_rate": 4.996536839438763e-06, "loss": 0.6323, "step": 1310 }, { "epoch": 0.5180538404544331, "grad_norm": 0.5635524885120492, "learning_rate": 4.996528590820199e-06, "loss": 0.6691, "step": 1311 }, { "epoch": 0.5184489997530254, "grad_norm": 0.5229240130623062, "learning_rate": 4.996520332396786e-06, "loss": 0.6452, "step": 1312 }, { "epoch": 0.5188441590516177, "grad_norm": 0.5812217140701571, "learning_rate": 4.996512064168558e-06, "loss": 0.6566, "step": 1313 }, { "epoch": 0.51923931835021, "grad_norm": 0.5219118851284731, "learning_rate": 4.996503786135546e-06, "loss": 0.6663, "step": 1314 }, { "epoch": 0.5196344776488022, "grad_norm": 0.507821654017062, "learning_rate": 4.996495498297783e-06, "loss": 0.6512, "step": 1315 }, { "epoch": 0.5200296369473945, "grad_norm": 0.5307845396993616, "learning_rate": 4.9964872006553025e-06, "loss": 0.6269, "step": 1316 }, { "epoch": 0.5204247962459867, "grad_norm": 0.5240077658361868, "learning_rate": 4.996478893208135e-06, "loss": 0.6816, "step": 1317 }, { "epoch": 0.5208199555445789, "grad_norm": 0.5167433367959745, "learning_rate": 4.996470575956316e-06, "loss": 0.6555, "step": 1318 }, { "epoch": 0.5212151148431712, "grad_norm": 0.5185457506960603, "learning_rate": 4.996462248899876e-06, "loss": 0.6492, "step": 1319 }, { "epoch": 0.5216102741417634, "grad_norm": 0.5259749401931311, "learning_rate": 4.9964539120388475e-06, "loss": 0.6385, "step": 1320 }, { "epoch": 0.5220054334403557, "grad_norm": 0.5363760379563364, "learning_rate": 4.996445565373264e-06, "loss": 0.6563, "step": 1321 }, { "epoch": 0.5224005927389479, "grad_norm": 0.5118945344997703, "learning_rate": 4.996437208903159e-06, "loss": 0.6388, "step": 1322 }, { "epoch": 0.5227957520375401, "grad_norm": 0.5105505561913978, "learning_rate": 4.996428842628563e-06, "loss": 0.6374, "step": 1323 }, { "epoch": 0.5231909113361324, "grad_norm": 0.545025639173082, "learning_rate": 4.996420466549512e-06, "loss": 0.6304, "step": 1324 }, { "epoch": 0.5235860706347246, "grad_norm": 0.5181486179482868, "learning_rate": 4.996412080666036e-06, "loss": 0.6607, "step": 1325 }, { "epoch": 0.5239812299333169, "grad_norm": 0.5918376928214296, "learning_rate": 4.99640368497817e-06, "loss": 0.65, "step": 1326 }, { "epoch": 0.5243763892319091, "grad_norm": 0.6440341218450398, "learning_rate": 4.9963952794859475e-06, "loss": 0.6314, "step": 1327 }, { "epoch": 0.5247715485305013, "grad_norm": 0.506859989727924, "learning_rate": 4.996386864189399e-06, "loss": 0.6405, "step": 1328 }, { "epoch": 0.5251667078290936, "grad_norm": 0.604693581937193, "learning_rate": 4.9963784390885585e-06, "loss": 0.6301, "step": 1329 }, { "epoch": 0.5255618671276858, "grad_norm": 0.5388769958954528, "learning_rate": 4.99637000418346e-06, "loss": 0.6563, "step": 1330 }, { "epoch": 0.5259570264262781, "grad_norm": 0.5525987392165671, "learning_rate": 4.996361559474135e-06, "loss": 0.6408, "step": 1331 }, { "epoch": 0.5263521857248703, "grad_norm": 0.5438403662996905, "learning_rate": 4.996353104960619e-06, "loss": 0.6463, "step": 1332 }, { "epoch": 0.5267473450234625, "grad_norm": 0.5205416563435727, "learning_rate": 4.996344640642943e-06, "loss": 0.6281, "step": 1333 }, { "epoch": 0.5271425043220548, "grad_norm": 0.49717641720078637, "learning_rate": 4.9963361665211404e-06, "loss": 0.6438, "step": 1334 }, { "epoch": 0.527537663620647, "grad_norm": 0.5720531214872092, "learning_rate": 4.996327682595247e-06, "loss": 0.6549, "step": 1335 }, { "epoch": 0.5279328229192393, "grad_norm": 0.5083062127313356, "learning_rate": 4.996319188865293e-06, "loss": 0.6343, "step": 1336 }, { "epoch": 0.5283279822178316, "grad_norm": 0.5326331393036251, "learning_rate": 4.996310685331314e-06, "loss": 0.6654, "step": 1337 }, { "epoch": 0.5287231415164239, "grad_norm": 0.5263704388039211, "learning_rate": 4.996302171993341e-06, "loss": 0.6319, "step": 1338 }, { "epoch": 0.5291183008150161, "grad_norm": 0.5004961835420784, "learning_rate": 4.99629364885141e-06, "loss": 0.6383, "step": 1339 }, { "epoch": 0.5295134601136083, "grad_norm": 0.5474008702250441, "learning_rate": 4.996285115905554e-06, "loss": 0.644, "step": 1340 }, { "epoch": 0.5299086194122006, "grad_norm": 0.5214317652571568, "learning_rate": 4.996276573155805e-06, "loss": 0.6278, "step": 1341 }, { "epoch": 0.5303037787107928, "grad_norm": 0.5178793183599104, "learning_rate": 4.996268020602198e-06, "loss": 0.6527, "step": 1342 }, { "epoch": 0.5306989380093851, "grad_norm": 0.5106695795609324, "learning_rate": 4.9962594582447654e-06, "loss": 0.6194, "step": 1343 }, { "epoch": 0.5310940973079773, "grad_norm": 0.5553450350073675, "learning_rate": 4.996250886083541e-06, "loss": 0.6651, "step": 1344 }, { "epoch": 0.5314892566065695, "grad_norm": 0.5020511547000661, "learning_rate": 4.996242304118561e-06, "loss": 0.6579, "step": 1345 }, { "epoch": 0.5318844159051618, "grad_norm": 0.5056151563934147, "learning_rate": 4.996233712349855e-06, "loss": 0.6522, "step": 1346 }, { "epoch": 0.532279575203754, "grad_norm": 0.5141576495869976, "learning_rate": 4.996225110777459e-06, "loss": 0.6233, "step": 1347 }, { "epoch": 0.5326747345023463, "grad_norm": 0.5060024011484795, "learning_rate": 4.996216499401408e-06, "loss": 0.6496, "step": 1348 }, { "epoch": 0.5330698938009385, "grad_norm": 0.513393743699311, "learning_rate": 4.996207878221732e-06, "loss": 0.6363, "step": 1349 }, { "epoch": 0.5334650530995307, "grad_norm": 0.516451601423634, "learning_rate": 4.9961992472384695e-06, "loss": 0.6144, "step": 1350 }, { "epoch": 0.533860212398123, "grad_norm": 0.543940796081213, "learning_rate": 4.99619060645165e-06, "loss": 0.6636, "step": 1351 }, { "epoch": 0.5342553716967152, "grad_norm": 0.523839395723129, "learning_rate": 4.996181955861311e-06, "loss": 0.6522, "step": 1352 }, { "epoch": 0.5346505309953075, "grad_norm": 0.5346665570634227, "learning_rate": 4.996173295467485e-06, "loss": 0.6247, "step": 1353 }, { "epoch": 0.5350456902938997, "grad_norm": 0.5053515545286174, "learning_rate": 4.996164625270206e-06, "loss": 0.628, "step": 1354 }, { "epoch": 0.5354408495924919, "grad_norm": 0.5385469662689469, "learning_rate": 4.9961559452695075e-06, "loss": 0.6403, "step": 1355 }, { "epoch": 0.5358360088910842, "grad_norm": 0.530821893645594, "learning_rate": 4.996147255465425e-06, "loss": 0.6493, "step": 1356 }, { "epoch": 0.5362311681896764, "grad_norm": 0.49520095478056697, "learning_rate": 4.9961385558579915e-06, "loss": 0.6391, "step": 1357 }, { "epoch": 0.5366263274882687, "grad_norm": 0.5012636197893197, "learning_rate": 4.996129846447241e-06, "loss": 0.6265, "step": 1358 }, { "epoch": 0.5370214867868609, "grad_norm": 0.5412008937924457, "learning_rate": 4.996121127233209e-06, "loss": 0.6583, "step": 1359 }, { "epoch": 0.5374166460854533, "grad_norm": 0.51146837063799, "learning_rate": 4.996112398215929e-06, "loss": 0.6552, "step": 1360 }, { "epoch": 0.5378118053840455, "grad_norm": 0.5164037939572933, "learning_rate": 4.996103659395434e-06, "loss": 0.6441, "step": 1361 }, { "epoch": 0.5382069646826377, "grad_norm": 0.507221251387215, "learning_rate": 4.99609491077176e-06, "loss": 0.6183, "step": 1362 }, { "epoch": 0.53860212398123, "grad_norm": 0.5110021327827016, "learning_rate": 4.996086152344942e-06, "loss": 0.6445, "step": 1363 }, { "epoch": 0.5389972832798222, "grad_norm": 0.4950203261367757, "learning_rate": 4.996077384115012e-06, "loss": 0.6317, "step": 1364 }, { "epoch": 0.5393924425784145, "grad_norm": 0.513880385000124, "learning_rate": 4.9960686060820065e-06, "loss": 0.6537, "step": 1365 }, { "epoch": 0.5397876018770067, "grad_norm": 0.5047955828045061, "learning_rate": 4.99605981824596e-06, "loss": 0.6385, "step": 1366 }, { "epoch": 0.5401827611755989, "grad_norm": 0.5444433922369591, "learning_rate": 4.996051020606904e-06, "loss": 0.6333, "step": 1367 }, { "epoch": 0.5405779204741912, "grad_norm": 0.5391605933331952, "learning_rate": 4.9960422131648765e-06, "loss": 0.6553, "step": 1368 }, { "epoch": 0.5409730797727834, "grad_norm": 0.5490109139703936, "learning_rate": 4.996033395919911e-06, "loss": 0.6352, "step": 1369 }, { "epoch": 0.5413682390713757, "grad_norm": 0.5044155900096984, "learning_rate": 4.996024568872042e-06, "loss": 0.6328, "step": 1370 }, { "epoch": 0.5417633983699679, "grad_norm": 0.5426896659207923, "learning_rate": 4.9960157320213046e-06, "loss": 0.6419, "step": 1371 }, { "epoch": 0.5421585576685601, "grad_norm": 0.5475008282374658, "learning_rate": 4.996006885367732e-06, "loss": 0.661, "step": 1372 }, { "epoch": 0.5425537169671524, "grad_norm": 0.50456290195716, "learning_rate": 4.995998028911361e-06, "loss": 0.6411, "step": 1373 }, { "epoch": 0.5429488762657446, "grad_norm": 0.517404197086199, "learning_rate": 4.995989162652224e-06, "loss": 0.6244, "step": 1374 }, { "epoch": 0.5433440355643369, "grad_norm": 0.52119175194665, "learning_rate": 4.995980286590358e-06, "loss": 0.6285, "step": 1375 }, { "epoch": 0.5437391948629291, "grad_norm": 0.5535606068992925, "learning_rate": 4.995971400725798e-06, "loss": 0.6512, "step": 1376 }, { "epoch": 0.5441343541615213, "grad_norm": 0.5096581348064602, "learning_rate": 4.995962505058577e-06, "loss": 0.6209, "step": 1377 }, { "epoch": 0.5445295134601136, "grad_norm": 0.507954709115174, "learning_rate": 4.995953599588731e-06, "loss": 0.6347, "step": 1378 }, { "epoch": 0.5449246727587058, "grad_norm": 0.5455172501120015, "learning_rate": 4.995944684316295e-06, "loss": 0.6544, "step": 1379 }, { "epoch": 0.5453198320572981, "grad_norm": 0.5195045840736692, "learning_rate": 4.995935759241304e-06, "loss": 0.6308, "step": 1380 }, { "epoch": 0.5457149913558903, "grad_norm": 0.5128980270231307, "learning_rate": 4.995926824363793e-06, "loss": 0.6451, "step": 1381 }, { "epoch": 0.5461101506544825, "grad_norm": 0.5694454666753539, "learning_rate": 4.995917879683796e-06, "loss": 0.6544, "step": 1382 }, { "epoch": 0.5465053099530748, "grad_norm": 0.5222383396969622, "learning_rate": 4.995908925201351e-06, "loss": 0.6215, "step": 1383 }, { "epoch": 0.5469004692516671, "grad_norm": 0.5033825276170869, "learning_rate": 4.995899960916489e-06, "loss": 0.6485, "step": 1384 }, { "epoch": 0.5472956285502594, "grad_norm": 0.5217250381657061, "learning_rate": 4.9958909868292495e-06, "loss": 0.6392, "step": 1385 }, { "epoch": 0.5476907878488516, "grad_norm": 0.5275739257953245, "learning_rate": 4.995882002939665e-06, "loss": 0.6309, "step": 1386 }, { "epoch": 0.5480859471474439, "grad_norm": 0.4888893706726107, "learning_rate": 4.995873009247771e-06, "loss": 0.6266, "step": 1387 }, { "epoch": 0.5484811064460361, "grad_norm": 0.6091283696485257, "learning_rate": 4.995864005753605e-06, "loss": 0.6451, "step": 1388 }, { "epoch": 0.5488762657446283, "grad_norm": 0.5292867693675212, "learning_rate": 4.9958549924572e-06, "loss": 0.6369, "step": 1389 }, { "epoch": 0.5492714250432206, "grad_norm": 0.516530358022225, "learning_rate": 4.995845969358591e-06, "loss": 0.6427, "step": 1390 }, { "epoch": 0.5496665843418128, "grad_norm": 0.5302839204130153, "learning_rate": 4.995836936457816e-06, "loss": 0.6581, "step": 1391 }, { "epoch": 0.550061743640405, "grad_norm": 0.5392670526192246, "learning_rate": 4.995827893754909e-06, "loss": 0.6354, "step": 1392 }, { "epoch": 0.5504569029389973, "grad_norm": 0.533934176126299, "learning_rate": 4.995818841249905e-06, "loss": 0.6514, "step": 1393 }, { "epoch": 0.5508520622375895, "grad_norm": 0.5423235074943602, "learning_rate": 4.99580977894284e-06, "loss": 0.652, "step": 1394 }, { "epoch": 0.5512472215361818, "grad_norm": 0.5548664924819643, "learning_rate": 4.995800706833751e-06, "loss": 0.6514, "step": 1395 }, { "epoch": 0.551642380834774, "grad_norm": 0.5230139409187969, "learning_rate": 4.995791624922671e-06, "loss": 0.6449, "step": 1396 }, { "epoch": 0.5520375401333663, "grad_norm": 0.4882535391793784, "learning_rate": 4.995782533209638e-06, "loss": 0.6171, "step": 1397 }, { "epoch": 0.5524326994319585, "grad_norm": 0.5602125565323922, "learning_rate": 4.995773431694686e-06, "loss": 0.6535, "step": 1398 }, { "epoch": 0.5528278587305507, "grad_norm": 0.5396171131081425, "learning_rate": 4.995764320377852e-06, "loss": 0.6597, "step": 1399 }, { "epoch": 0.553223018029143, "grad_norm": 0.5233921034680511, "learning_rate": 4.99575519925917e-06, "loss": 0.6498, "step": 1400 }, { "epoch": 0.5536181773277352, "grad_norm": 0.49235649050585056, "learning_rate": 4.995746068338679e-06, "loss": 0.6381, "step": 1401 }, { "epoch": 0.5540133366263275, "grad_norm": 0.5307247825728258, "learning_rate": 4.995736927616412e-06, "loss": 0.6624, "step": 1402 }, { "epoch": 0.5544084959249197, "grad_norm": 0.5423744236754792, "learning_rate": 4.995727777092406e-06, "loss": 0.6447, "step": 1403 }, { "epoch": 0.5548036552235119, "grad_norm": 0.5784643067877503, "learning_rate": 4.995718616766696e-06, "loss": 0.661, "step": 1404 }, { "epoch": 0.5551988145221042, "grad_norm": 0.5122057994466735, "learning_rate": 4.995709446639319e-06, "loss": 0.656, "step": 1405 }, { "epoch": 0.5555939738206964, "grad_norm": 0.5174768644606775, "learning_rate": 4.995700266710312e-06, "loss": 0.6455, "step": 1406 }, { "epoch": 0.5559891331192887, "grad_norm": 0.5496336917317313, "learning_rate": 4.9956910769797085e-06, "loss": 0.6422, "step": 1407 }, { "epoch": 0.556384292417881, "grad_norm": 0.502618362332198, "learning_rate": 4.9956818774475465e-06, "loss": 0.6368, "step": 1408 }, { "epoch": 0.5567794517164733, "grad_norm": 0.5691680887325605, "learning_rate": 4.995672668113861e-06, "loss": 0.6551, "step": 1409 }, { "epoch": 0.5571746110150655, "grad_norm": 0.4970626341833687, "learning_rate": 4.99566344897869e-06, "loss": 0.6235, "step": 1410 }, { "epoch": 0.5575697703136577, "grad_norm": 0.5189759440475544, "learning_rate": 4.995654220042067e-06, "loss": 0.6591, "step": 1411 }, { "epoch": 0.55796492961225, "grad_norm": 0.5200276974925322, "learning_rate": 4.99564498130403e-06, "loss": 0.627, "step": 1412 }, { "epoch": 0.5583600889108422, "grad_norm": 0.4968048227090689, "learning_rate": 4.9956357327646155e-06, "loss": 0.6456, "step": 1413 }, { "epoch": 0.5587552482094345, "grad_norm": 0.5006447246560516, "learning_rate": 4.995626474423859e-06, "loss": 0.624, "step": 1414 }, { "epoch": 0.5591504075080267, "grad_norm": 0.5408193413913738, "learning_rate": 4.995617206281797e-06, "loss": 0.6521, "step": 1415 }, { "epoch": 0.5595455668066189, "grad_norm": 0.5102142633356043, "learning_rate": 4.995607928338466e-06, "loss": 0.6271, "step": 1416 }, { "epoch": 0.5599407261052112, "grad_norm": 0.7566967338630805, "learning_rate": 4.9955986405939025e-06, "loss": 0.667, "step": 1417 }, { "epoch": 0.5603358854038034, "grad_norm": 0.5002937738203951, "learning_rate": 4.995589343048144e-06, "loss": 0.5957, "step": 1418 }, { "epoch": 0.5607310447023957, "grad_norm": 0.5307076787963949, "learning_rate": 4.9955800357012245e-06, "loss": 0.6303, "step": 1419 }, { "epoch": 0.5611262040009879, "grad_norm": 0.5707224007581209, "learning_rate": 4.995570718553182e-06, "loss": 0.6358, "step": 1420 }, { "epoch": 0.5615213632995801, "grad_norm": 0.5184302142265548, "learning_rate": 4.995561391604054e-06, "loss": 0.6429, "step": 1421 }, { "epoch": 0.5619165225981724, "grad_norm": 0.5242472513502078, "learning_rate": 4.995552054853876e-06, "loss": 0.6352, "step": 1422 }, { "epoch": 0.5623116818967646, "grad_norm": 0.5984667209493643, "learning_rate": 4.995542708302684e-06, "loss": 0.6449, "step": 1423 }, { "epoch": 0.5627068411953569, "grad_norm": 0.5211234764982716, "learning_rate": 4.995533351950517e-06, "loss": 0.6336, "step": 1424 }, { "epoch": 0.5631020004939491, "grad_norm": 0.5005552076190557, "learning_rate": 4.9955239857974095e-06, "loss": 0.6315, "step": 1425 }, { "epoch": 0.5634971597925413, "grad_norm": 0.5156815083287939, "learning_rate": 4.995514609843399e-06, "loss": 0.6128, "step": 1426 }, { "epoch": 0.5638923190911336, "grad_norm": 0.8025016430790596, "learning_rate": 4.995505224088524e-06, "loss": 0.6324, "step": 1427 }, { "epoch": 0.5642874783897258, "grad_norm": 0.5143928596776913, "learning_rate": 4.995495828532818e-06, "loss": 0.6298, "step": 1428 }, { "epoch": 0.5646826376883181, "grad_norm": 0.5420804076927369, "learning_rate": 4.99548642317632e-06, "loss": 0.6263, "step": 1429 }, { "epoch": 0.5650777969869103, "grad_norm": 0.5455732492128239, "learning_rate": 4.995477008019067e-06, "loss": 0.6599, "step": 1430 }, { "epoch": 0.5654729562855026, "grad_norm": 0.4976835769324547, "learning_rate": 4.995467583061096e-06, "loss": 0.6287, "step": 1431 }, { "epoch": 0.5658681155840949, "grad_norm": 0.4891898546027407, "learning_rate": 4.995458148302444e-06, "loss": 0.6389, "step": 1432 }, { "epoch": 0.5662632748826871, "grad_norm": 0.5330421535897775, "learning_rate": 4.995448703743147e-06, "loss": 0.6551, "step": 1433 }, { "epoch": 0.5666584341812794, "grad_norm": 0.4928122884897478, "learning_rate": 4.995439249383243e-06, "loss": 0.6453, "step": 1434 }, { "epoch": 0.5670535934798716, "grad_norm": 0.4982212210514846, "learning_rate": 4.995429785222768e-06, "loss": 0.6364, "step": 1435 }, { "epoch": 0.5674487527784638, "grad_norm": 0.5090391879426845, "learning_rate": 4.995420311261761e-06, "loss": 0.6527, "step": 1436 }, { "epoch": 0.5678439120770561, "grad_norm": 0.49179123014556764, "learning_rate": 4.9954108275002586e-06, "loss": 0.6198, "step": 1437 }, { "epoch": 0.5682390713756483, "grad_norm": 0.49426838823272856, "learning_rate": 4.9954013339382975e-06, "loss": 0.6373, "step": 1438 }, { "epoch": 0.5686342306742406, "grad_norm": 0.51127919787096, "learning_rate": 4.995391830575915e-06, "loss": 0.6447, "step": 1439 }, { "epoch": 0.5690293899728328, "grad_norm": 0.5080627757706936, "learning_rate": 4.995382317413149e-06, "loss": 0.6239, "step": 1440 }, { "epoch": 0.569424549271425, "grad_norm": 0.49636389856600227, "learning_rate": 4.995372794450037e-06, "loss": 0.6433, "step": 1441 }, { "epoch": 0.5698197085700173, "grad_norm": 0.5337319531991955, "learning_rate": 4.995363261686615e-06, "loss": 0.6335, "step": 1442 }, { "epoch": 0.5702148678686095, "grad_norm": 0.5283909447365106, "learning_rate": 4.995353719122921e-06, "loss": 0.6246, "step": 1443 }, { "epoch": 0.5706100271672018, "grad_norm": 0.5050664542458486, "learning_rate": 4.995344166758994e-06, "loss": 0.6547, "step": 1444 }, { "epoch": 0.571005186465794, "grad_norm": 0.47674622000290146, "learning_rate": 4.99533460459487e-06, "loss": 0.637, "step": 1445 }, { "epoch": 0.5714003457643863, "grad_norm": 0.522395740013937, "learning_rate": 4.995325032630588e-06, "loss": 0.6294, "step": 1446 }, { "epoch": 0.5717955050629785, "grad_norm": 0.5076861343641647, "learning_rate": 4.995315450866183e-06, "loss": 0.6478, "step": 1447 }, { "epoch": 0.5721906643615707, "grad_norm": 0.5123340353219106, "learning_rate": 4.995305859301695e-06, "loss": 0.6324, "step": 1448 }, { "epoch": 0.572585823660163, "grad_norm": 0.5116830386496904, "learning_rate": 4.9952962579371595e-06, "loss": 0.6541, "step": 1449 }, { "epoch": 0.5729809829587552, "grad_norm": 0.5029055259684767, "learning_rate": 4.995286646772616e-06, "loss": 0.6411, "step": 1450 }, { "epoch": 0.5733761422573475, "grad_norm": 0.54340913147798, "learning_rate": 4.995277025808103e-06, "loss": 0.6318, "step": 1451 }, { "epoch": 0.5737713015559397, "grad_norm": 0.6008588857149761, "learning_rate": 4.995267395043656e-06, "loss": 0.6313, "step": 1452 }, { "epoch": 0.5741664608545319, "grad_norm": 0.5046550392300445, "learning_rate": 4.995257754479313e-06, "loss": 0.6093, "step": 1453 }, { "epoch": 0.5745616201531242, "grad_norm": 0.5075601586098186, "learning_rate": 4.995248104115114e-06, "loss": 0.6715, "step": 1454 }, { "epoch": 0.5749567794517165, "grad_norm": 0.5175079376115269, "learning_rate": 4.995238443951096e-06, "loss": 0.6332, "step": 1455 }, { "epoch": 0.5753519387503088, "grad_norm": 0.5251599308274112, "learning_rate": 4.995228773987296e-06, "loss": 0.6471, "step": 1456 }, { "epoch": 0.575747098048901, "grad_norm": 0.5112664273121302, "learning_rate": 4.995219094223753e-06, "loss": 0.6487, "step": 1457 }, { "epoch": 0.5761422573474932, "grad_norm": 0.5161124213557929, "learning_rate": 4.9952094046605034e-06, "loss": 0.6371, "step": 1458 }, { "epoch": 0.5765374166460855, "grad_norm": 0.5122993630545928, "learning_rate": 4.995199705297587e-06, "loss": 0.6402, "step": 1459 }, { "epoch": 0.5769325759446777, "grad_norm": 0.48934561452372555, "learning_rate": 4.995189996135042e-06, "loss": 0.6453, "step": 1460 }, { "epoch": 0.57732773524327, "grad_norm": 0.5015789106369151, "learning_rate": 4.995180277172905e-06, "loss": 0.6405, "step": 1461 }, { "epoch": 0.5777228945418622, "grad_norm": 0.4896116736893514, "learning_rate": 4.995170548411215e-06, "loss": 0.604, "step": 1462 }, { "epoch": 0.5781180538404544, "grad_norm": 0.5056206160242199, "learning_rate": 4.995160809850012e-06, "loss": 0.6501, "step": 1463 }, { "epoch": 0.5785132131390467, "grad_norm": 0.5271567630707523, "learning_rate": 4.99515106148933e-06, "loss": 0.6637, "step": 1464 }, { "epoch": 0.5789083724376389, "grad_norm": 0.5161640907927971, "learning_rate": 4.9951413033292115e-06, "loss": 0.6575, "step": 1465 }, { "epoch": 0.5793035317362312, "grad_norm": 0.5366526893281445, "learning_rate": 4.995131535369693e-06, "loss": 0.6442, "step": 1466 }, { "epoch": 0.5796986910348234, "grad_norm": 0.478707795838876, "learning_rate": 4.995121757610812e-06, "loss": 0.6264, "step": 1467 }, { "epoch": 0.5800938503334157, "grad_norm": 0.5109398474629543, "learning_rate": 4.995111970052608e-06, "loss": 0.6432, "step": 1468 }, { "epoch": 0.5804890096320079, "grad_norm": 0.5029075819574539, "learning_rate": 4.99510217269512e-06, "loss": 0.641, "step": 1469 }, { "epoch": 0.5808841689306001, "grad_norm": 0.5123704050877957, "learning_rate": 4.995092365538385e-06, "loss": 0.6311, "step": 1470 }, { "epoch": 0.5812793282291924, "grad_norm": 0.5232609805684696, "learning_rate": 4.995082548582443e-06, "loss": 0.6722, "step": 1471 }, { "epoch": 0.5816744875277846, "grad_norm": 0.5140040836372506, "learning_rate": 4.995072721827331e-06, "loss": 0.6263, "step": 1472 }, { "epoch": 0.5820696468263769, "grad_norm": 0.5133765736650968, "learning_rate": 4.995062885273089e-06, "loss": 0.6297, "step": 1473 }, { "epoch": 0.5824648061249691, "grad_norm": 0.4941943476821983, "learning_rate": 4.995053038919755e-06, "loss": 0.6439, "step": 1474 }, { "epoch": 0.5828599654235613, "grad_norm": 0.49939101156839866, "learning_rate": 4.9950431827673676e-06, "loss": 0.6221, "step": 1475 }, { "epoch": 0.5832551247221536, "grad_norm": 0.5284234536221161, "learning_rate": 4.995033316815966e-06, "loss": 0.6466, "step": 1476 }, { "epoch": 0.5836502840207458, "grad_norm": 0.5232984307977716, "learning_rate": 4.9950234410655886e-06, "loss": 0.6657, "step": 1477 }, { "epoch": 0.584045443319338, "grad_norm": 0.49111166592428446, "learning_rate": 4.995013555516274e-06, "loss": 0.6372, "step": 1478 }, { "epoch": 0.5844406026179304, "grad_norm": 0.49817946819904, "learning_rate": 4.99500366016806e-06, "loss": 0.6564, "step": 1479 }, { "epoch": 0.5848357619165226, "grad_norm": 0.5045229504599369, "learning_rate": 4.994993755020989e-06, "loss": 0.6236, "step": 1480 }, { "epoch": 0.5852309212151149, "grad_norm": 0.5137046231454804, "learning_rate": 4.994983840075096e-06, "loss": 0.638, "step": 1481 }, { "epoch": 0.5856260805137071, "grad_norm": 0.5090356242899768, "learning_rate": 4.9949739153304224e-06, "loss": 0.607, "step": 1482 }, { "epoch": 0.5860212398122994, "grad_norm": 0.5086542750896299, "learning_rate": 4.994963980787005e-06, "loss": 0.6137, "step": 1483 }, { "epoch": 0.5864163991108916, "grad_norm": 0.5359161859801612, "learning_rate": 4.994954036444886e-06, "loss": 0.6126, "step": 1484 }, { "epoch": 0.5868115584094838, "grad_norm": 0.507773612302792, "learning_rate": 4.994944082304102e-06, "loss": 0.6161, "step": 1485 }, { "epoch": 0.5872067177080761, "grad_norm": 0.4931028875320953, "learning_rate": 4.9949341183646914e-06, "loss": 0.6326, "step": 1486 }, { "epoch": 0.5876018770066683, "grad_norm": 0.5370606115690266, "learning_rate": 4.994924144626695e-06, "loss": 0.6421, "step": 1487 }, { "epoch": 0.5879970363052606, "grad_norm": 0.5028578672986176, "learning_rate": 4.994914161090152e-06, "loss": 0.6482, "step": 1488 }, { "epoch": 0.5883921956038528, "grad_norm": 0.49503149805703456, "learning_rate": 4.994904167755102e-06, "loss": 0.6424, "step": 1489 }, { "epoch": 0.588787354902445, "grad_norm": 0.52378156821829, "learning_rate": 4.994894164621581e-06, "loss": 0.6372, "step": 1490 }, { "epoch": 0.5891825142010373, "grad_norm": 0.5049018438428512, "learning_rate": 4.994884151689633e-06, "loss": 0.6026, "step": 1491 }, { "epoch": 0.5895776734996295, "grad_norm": 0.5056297740574257, "learning_rate": 4.994874128959294e-06, "loss": 0.6439, "step": 1492 }, { "epoch": 0.5899728327982218, "grad_norm": 0.5059061924424931, "learning_rate": 4.994864096430604e-06, "loss": 0.6496, "step": 1493 }, { "epoch": 0.590367992096814, "grad_norm": 0.5252330272280762, "learning_rate": 4.994854054103604e-06, "loss": 0.6431, "step": 1494 }, { "epoch": 0.5907631513954062, "grad_norm": 0.5082734525840502, "learning_rate": 4.994844001978331e-06, "loss": 0.665, "step": 1495 }, { "epoch": 0.5911583106939985, "grad_norm": 0.5783213762110898, "learning_rate": 4.994833940054827e-06, "loss": 0.6641, "step": 1496 }, { "epoch": 0.5915534699925907, "grad_norm": 0.5579102147905942, "learning_rate": 4.994823868333129e-06, "loss": 0.6442, "step": 1497 }, { "epoch": 0.591948629291183, "grad_norm": 0.49660206731752937, "learning_rate": 4.9948137868132785e-06, "loss": 0.6256, "step": 1498 }, { "epoch": 0.5923437885897752, "grad_norm": 0.5338722501575794, "learning_rate": 4.994803695495315e-06, "loss": 0.6324, "step": 1499 }, { "epoch": 0.5927389478883675, "grad_norm": 0.5435806297907863, "learning_rate": 4.994793594379275e-06, "loss": 0.642, "step": 1500 }, { "epoch": 0.5931341071869597, "grad_norm": 0.5244058039956626, "learning_rate": 4.9947834834652035e-06, "loss": 0.6351, "step": 1501 }, { "epoch": 0.593529266485552, "grad_norm": 0.5397763750210784, "learning_rate": 4.9947733627531365e-06, "loss": 0.5993, "step": 1502 }, { "epoch": 0.5939244257841443, "grad_norm": 0.5288352373402283, "learning_rate": 4.994763232243114e-06, "loss": 0.6477, "step": 1503 }, { "epoch": 0.5943195850827365, "grad_norm": 0.5090715743441777, "learning_rate": 4.994753091935177e-06, "loss": 0.645, "step": 1504 }, { "epoch": 0.5947147443813288, "grad_norm": 0.6719556320052742, "learning_rate": 4.994742941829364e-06, "loss": 0.6573, "step": 1505 }, { "epoch": 0.595109903679921, "grad_norm": 0.5147973826184885, "learning_rate": 4.994732781925717e-06, "loss": 0.6464, "step": 1506 }, { "epoch": 0.5955050629785132, "grad_norm": 0.522878872773451, "learning_rate": 4.994722612224274e-06, "loss": 0.6592, "step": 1507 }, { "epoch": 0.5959002222771055, "grad_norm": 0.5037339588735743, "learning_rate": 4.9947124327250755e-06, "loss": 0.6281, "step": 1508 }, { "epoch": 0.5962953815756977, "grad_norm": 0.5142576687522799, "learning_rate": 4.99470224342816e-06, "loss": 0.6397, "step": 1509 }, { "epoch": 0.59669054087429, "grad_norm": 0.5442114812958629, "learning_rate": 4.99469204433357e-06, "loss": 0.6681, "step": 1510 }, { "epoch": 0.5970857001728822, "grad_norm": 0.49806465072349676, "learning_rate": 4.994681835441345e-06, "loss": 0.6411, "step": 1511 }, { "epoch": 0.5974808594714744, "grad_norm": 0.5113177774148318, "learning_rate": 4.994671616751524e-06, "loss": 0.6365, "step": 1512 }, { "epoch": 0.5978760187700667, "grad_norm": 0.5044746925510942, "learning_rate": 4.994661388264148e-06, "loss": 0.6245, "step": 1513 }, { "epoch": 0.5982711780686589, "grad_norm": 0.5077014321420371, "learning_rate": 4.994651149979257e-06, "loss": 0.6296, "step": 1514 }, { "epoch": 0.5986663373672512, "grad_norm": 0.5577617502488906, "learning_rate": 4.9946409018968915e-06, "loss": 0.6166, "step": 1515 }, { "epoch": 0.5990614966658434, "grad_norm": 0.5454684436241605, "learning_rate": 4.99463064401709e-06, "loss": 0.6386, "step": 1516 }, { "epoch": 0.5994566559644356, "grad_norm": 0.5330025000783452, "learning_rate": 4.994620376339895e-06, "loss": 0.6405, "step": 1517 }, { "epoch": 0.5998518152630279, "grad_norm": 0.501164395733037, "learning_rate": 4.994610098865346e-06, "loss": 0.6225, "step": 1518 }, { "epoch": 0.6002469745616201, "grad_norm": 0.49869645020020736, "learning_rate": 4.994599811593484e-06, "loss": 0.6316, "step": 1519 }, { "epoch": 0.6006421338602124, "grad_norm": 0.5203023850194525, "learning_rate": 4.9945895145243476e-06, "loss": 0.6428, "step": 1520 }, { "epoch": 0.6010372931588046, "grad_norm": 0.5288551722444358, "learning_rate": 4.994579207657979e-06, "loss": 0.6164, "step": 1521 }, { "epoch": 0.6014324524573968, "grad_norm": 0.4919113743752538, "learning_rate": 4.9945688909944175e-06, "loss": 0.6213, "step": 1522 }, { "epoch": 0.6018276117559891, "grad_norm": 0.5595834215690251, "learning_rate": 4.994558564533705e-06, "loss": 0.6436, "step": 1523 }, { "epoch": 0.6022227710545813, "grad_norm": 0.5247902498260631, "learning_rate": 4.9945482282758806e-06, "loss": 0.638, "step": 1524 }, { "epoch": 0.6026179303531736, "grad_norm": 0.5291480908300407, "learning_rate": 4.994537882220985e-06, "loss": 0.6253, "step": 1525 }, { "epoch": 0.6030130896517659, "grad_norm": 0.501147812137641, "learning_rate": 4.994527526369061e-06, "loss": 0.664, "step": 1526 }, { "epoch": 0.6034082489503582, "grad_norm": 0.505734785729107, "learning_rate": 4.994517160720146e-06, "loss": 0.6385, "step": 1527 }, { "epoch": 0.6038034082489504, "grad_norm": 0.5275768661720794, "learning_rate": 4.994506785274283e-06, "loss": 0.6387, "step": 1528 }, { "epoch": 0.6041985675475426, "grad_norm": 0.5056505262627228, "learning_rate": 4.994496400031512e-06, "loss": 0.64, "step": 1529 }, { "epoch": 0.6045937268461349, "grad_norm": 0.4908463182821926, "learning_rate": 4.9944860049918746e-06, "loss": 0.621, "step": 1530 }, { "epoch": 0.6049888861447271, "grad_norm": 0.5234617249072642, "learning_rate": 4.99447560015541e-06, "loss": 0.6418, "step": 1531 }, { "epoch": 0.6053840454433194, "grad_norm": 0.501149633946628, "learning_rate": 4.994465185522161e-06, "loss": 0.6636, "step": 1532 }, { "epoch": 0.6057792047419116, "grad_norm": 0.4720640412577634, "learning_rate": 4.994454761092166e-06, "loss": 0.6411, "step": 1533 }, { "epoch": 0.6061743640405038, "grad_norm": 0.524342790903981, "learning_rate": 4.994444326865469e-06, "loss": 0.6452, "step": 1534 }, { "epoch": 0.6065695233390961, "grad_norm": 0.5013477729936378, "learning_rate": 4.994433882842108e-06, "loss": 0.6304, "step": 1535 }, { "epoch": 0.6069646826376883, "grad_norm": 0.5240831322483116, "learning_rate": 4.994423429022126e-06, "loss": 0.6339, "step": 1536 }, { "epoch": 0.6073598419362806, "grad_norm": 0.4969641999990712, "learning_rate": 4.994412965405563e-06, "loss": 0.6443, "step": 1537 }, { "epoch": 0.6077550012348728, "grad_norm": 0.5272037913210349, "learning_rate": 4.9944024919924615e-06, "loss": 0.6468, "step": 1538 }, { "epoch": 0.608150160533465, "grad_norm": 0.529008774394857, "learning_rate": 4.9943920087828615e-06, "loss": 0.6362, "step": 1539 }, { "epoch": 0.6085453198320573, "grad_norm": 0.4915768675876793, "learning_rate": 4.994381515776804e-06, "loss": 0.6435, "step": 1540 }, { "epoch": 0.6089404791306495, "grad_norm": 0.5822531861044621, "learning_rate": 4.9943710129743304e-06, "loss": 0.6608, "step": 1541 }, { "epoch": 0.6093356384292418, "grad_norm": 0.5146971943259008, "learning_rate": 4.994360500375482e-06, "loss": 0.6351, "step": 1542 }, { "epoch": 0.609730797727834, "grad_norm": 0.5047870000075502, "learning_rate": 4.994349977980301e-06, "loss": 0.6387, "step": 1543 }, { "epoch": 0.6101259570264262, "grad_norm": 0.4924385862974, "learning_rate": 4.994339445788827e-06, "loss": 0.6378, "step": 1544 }, { "epoch": 0.6105211163250185, "grad_norm": 0.5195506832924177, "learning_rate": 4.9943289038011035e-06, "loss": 0.6422, "step": 1545 }, { "epoch": 0.6109162756236107, "grad_norm": 0.504416370915793, "learning_rate": 4.99431835201717e-06, "loss": 0.6521, "step": 1546 }, { "epoch": 0.611311434922203, "grad_norm": 0.5134028045383864, "learning_rate": 4.9943077904370684e-06, "loss": 0.6486, "step": 1547 }, { "epoch": 0.6117065942207952, "grad_norm": 0.5438989106962773, "learning_rate": 4.994297219060841e-06, "loss": 0.6402, "step": 1548 }, { "epoch": 0.6121017535193874, "grad_norm": 0.502459375072004, "learning_rate": 4.994286637888528e-06, "loss": 0.6421, "step": 1549 }, { "epoch": 0.6124969128179798, "grad_norm": 0.5162824244852758, "learning_rate": 4.994276046920172e-06, "loss": 0.6403, "step": 1550 }, { "epoch": 0.612892072116572, "grad_norm": 0.5178670603935037, "learning_rate": 4.994265446155814e-06, "loss": 0.6309, "step": 1551 }, { "epoch": 0.6132872314151643, "grad_norm": 0.5117410334804755, "learning_rate": 4.994254835595497e-06, "loss": 0.6681, "step": 1552 }, { "epoch": 0.6136823907137565, "grad_norm": 0.4965002156326863, "learning_rate": 4.994244215239261e-06, "loss": 0.6358, "step": 1553 }, { "epoch": 0.6140775500123488, "grad_norm": 0.5151456093040961, "learning_rate": 4.994233585087148e-06, "loss": 0.6345, "step": 1554 }, { "epoch": 0.614472709310941, "grad_norm": 0.5107017962950764, "learning_rate": 4.9942229451392e-06, "loss": 0.6224, "step": 1555 }, { "epoch": 0.6148678686095332, "grad_norm": 0.5362359326835328, "learning_rate": 4.99421229539546e-06, "loss": 0.6402, "step": 1556 }, { "epoch": 0.6152630279081255, "grad_norm": 0.5128666635843286, "learning_rate": 4.994201635855967e-06, "loss": 0.6359, "step": 1557 }, { "epoch": 0.6156581872067177, "grad_norm": 0.5273421131964295, "learning_rate": 4.994190966520765e-06, "loss": 0.6503, "step": 1558 }, { "epoch": 0.61605334650531, "grad_norm": 0.49230499474339184, "learning_rate": 4.994180287389896e-06, "loss": 0.6261, "step": 1559 }, { "epoch": 0.6164485058039022, "grad_norm": 0.5217099067284172, "learning_rate": 4.994169598463401e-06, "loss": 0.6586, "step": 1560 }, { "epoch": 0.6168436651024944, "grad_norm": 0.5102545311764888, "learning_rate": 4.994158899741323e-06, "loss": 0.6147, "step": 1561 }, { "epoch": 0.6172388244010867, "grad_norm": 0.538265791682114, "learning_rate": 4.9941481912237024e-06, "loss": 0.6526, "step": 1562 }, { "epoch": 0.6176339836996789, "grad_norm": 0.5087707653077916, "learning_rate": 4.994137472910583e-06, "loss": 0.6103, "step": 1563 }, { "epoch": 0.6180291429982712, "grad_norm": 0.4985542462986526, "learning_rate": 4.994126744802006e-06, "loss": 0.6245, "step": 1564 }, { "epoch": 0.6184243022968634, "grad_norm": 0.48942059314144304, "learning_rate": 4.994116006898013e-06, "loss": 0.6214, "step": 1565 }, { "epoch": 0.6188194615954556, "grad_norm": 0.5116181294437978, "learning_rate": 4.994105259198649e-06, "loss": 0.622, "step": 1566 }, { "epoch": 0.6192146208940479, "grad_norm": 0.5138679125630047, "learning_rate": 4.994094501703951e-06, "loss": 0.6434, "step": 1567 }, { "epoch": 0.6196097801926401, "grad_norm": 0.5680217102402336, "learning_rate": 4.994083734413966e-06, "loss": 0.6421, "step": 1568 }, { "epoch": 0.6200049394912324, "grad_norm": 0.5120382178875652, "learning_rate": 4.9940729573287346e-06, "loss": 0.6318, "step": 1569 }, { "epoch": 0.6204000987898246, "grad_norm": 0.5470369708755527, "learning_rate": 4.994062170448298e-06, "loss": 0.6225, "step": 1570 }, { "epoch": 0.6207952580884168, "grad_norm": 0.5586073977661113, "learning_rate": 4.994051373772701e-06, "loss": 0.6429, "step": 1571 }, { "epoch": 0.6211904173870091, "grad_norm": 0.5780818912171855, "learning_rate": 4.9940405673019844e-06, "loss": 0.6378, "step": 1572 }, { "epoch": 0.6215855766856014, "grad_norm": 0.5199314186964572, "learning_rate": 4.99402975103619e-06, "loss": 0.642, "step": 1573 }, { "epoch": 0.6219807359841937, "grad_norm": 0.5292899229264992, "learning_rate": 4.994018924975362e-06, "loss": 0.638, "step": 1574 }, { "epoch": 0.6223758952827859, "grad_norm": 0.5190979503571678, "learning_rate": 4.994008089119542e-06, "loss": 0.6242, "step": 1575 }, { "epoch": 0.6227710545813782, "grad_norm": 0.575963095804118, "learning_rate": 4.993997243468772e-06, "loss": 0.6266, "step": 1576 }, { "epoch": 0.6231662138799704, "grad_norm": 0.5340579778193899, "learning_rate": 4.993986388023096e-06, "loss": 0.6663, "step": 1577 }, { "epoch": 0.6235613731785626, "grad_norm": 0.4959581900916024, "learning_rate": 4.993975522782556e-06, "loss": 0.6311, "step": 1578 }, { "epoch": 0.6239565324771549, "grad_norm": 0.4981648627664517, "learning_rate": 4.993964647747195e-06, "loss": 0.6364, "step": 1579 }, { "epoch": 0.6243516917757471, "grad_norm": 0.5042116842190024, "learning_rate": 4.993953762917054e-06, "loss": 0.6367, "step": 1580 }, { "epoch": 0.6247468510743394, "grad_norm": 0.5027206712870845, "learning_rate": 4.993942868292178e-06, "loss": 0.6408, "step": 1581 }, { "epoch": 0.6251420103729316, "grad_norm": 0.494384710791813, "learning_rate": 4.993931963872608e-06, "loss": 0.6388, "step": 1582 }, { "epoch": 0.6255371696715238, "grad_norm": 0.48929240453544737, "learning_rate": 4.993921049658389e-06, "loss": 0.6359, "step": 1583 }, { "epoch": 0.6259323289701161, "grad_norm": 0.5275911914444981, "learning_rate": 4.993910125649561e-06, "loss": 0.6435, "step": 1584 }, { "epoch": 0.6263274882687083, "grad_norm": 0.5354103744385167, "learning_rate": 4.993899191846169e-06, "loss": 0.6214, "step": 1585 }, { "epoch": 0.6267226475673006, "grad_norm": 0.5116129949120737, "learning_rate": 4.9938882482482555e-06, "loss": 0.636, "step": 1586 }, { "epoch": 0.6271178068658928, "grad_norm": 0.5185673630960299, "learning_rate": 4.993877294855863e-06, "loss": 0.6366, "step": 1587 }, { "epoch": 0.627512966164485, "grad_norm": 0.5025695303228517, "learning_rate": 4.993866331669035e-06, "loss": 0.6266, "step": 1588 }, { "epoch": 0.6279081254630773, "grad_norm": 0.4939243554026485, "learning_rate": 4.993855358687814e-06, "loss": 0.6247, "step": 1589 }, { "epoch": 0.6283032847616695, "grad_norm": 0.5013958381045799, "learning_rate": 4.993844375912244e-06, "loss": 0.6374, "step": 1590 }, { "epoch": 0.6286984440602618, "grad_norm": 0.5165249055138371, "learning_rate": 4.993833383342368e-06, "loss": 0.6276, "step": 1591 }, { "epoch": 0.629093603358854, "grad_norm": 0.5010885019204411, "learning_rate": 4.993822380978228e-06, "loss": 0.6273, "step": 1592 }, { "epoch": 0.6294887626574462, "grad_norm": 0.47308571192974885, "learning_rate": 4.993811368819869e-06, "loss": 0.6406, "step": 1593 }, { "epoch": 0.6298839219560385, "grad_norm": 0.5181491819308544, "learning_rate": 4.993800346867333e-06, "loss": 0.6577, "step": 1594 }, { "epoch": 0.6302790812546307, "grad_norm": 0.7592473307425937, "learning_rate": 4.993789315120663e-06, "loss": 0.6136, "step": 1595 }, { "epoch": 0.630674240553223, "grad_norm": 0.48322179308535596, "learning_rate": 4.993778273579903e-06, "loss": 0.6233, "step": 1596 }, { "epoch": 0.6310693998518153, "grad_norm": 0.4989991809584226, "learning_rate": 4.993767222245096e-06, "loss": 0.615, "step": 1597 }, { "epoch": 0.6314645591504076, "grad_norm": 0.5102524351860097, "learning_rate": 4.993756161116287e-06, "loss": 0.646, "step": 1598 }, { "epoch": 0.6318597184489998, "grad_norm": 0.48881189426787136, "learning_rate": 4.9937450901935166e-06, "loss": 0.6309, "step": 1599 }, { "epoch": 0.632254877747592, "grad_norm": 0.5162222090652335, "learning_rate": 4.993734009476831e-06, "loss": 0.6234, "step": 1600 }, { "epoch": 0.6326500370461843, "grad_norm": 0.5219467840999582, "learning_rate": 4.99372291896627e-06, "loss": 0.6623, "step": 1601 }, { "epoch": 0.6330451963447765, "grad_norm": 0.4812462236064405, "learning_rate": 4.993711818661882e-06, "loss": 0.6357, "step": 1602 }, { "epoch": 0.6334403556433688, "grad_norm": 0.5121012568296751, "learning_rate": 4.993700708563708e-06, "loss": 0.6475, "step": 1603 }, { "epoch": 0.633835514941961, "grad_norm": 0.5243902599376221, "learning_rate": 4.993689588671792e-06, "loss": 0.6204, "step": 1604 }, { "epoch": 0.6342306742405532, "grad_norm": 0.5038240130102247, "learning_rate": 4.9936784589861765e-06, "loss": 0.6381, "step": 1605 }, { "epoch": 0.6346258335391455, "grad_norm": 0.5056284357196895, "learning_rate": 4.993667319506907e-06, "loss": 0.6124, "step": 1606 }, { "epoch": 0.6350209928377377, "grad_norm": 0.5210968104913557, "learning_rate": 4.993656170234026e-06, "loss": 0.6254, "step": 1607 }, { "epoch": 0.63541615213633, "grad_norm": 0.5529274377061739, "learning_rate": 4.9936450111675785e-06, "loss": 0.6312, "step": 1608 }, { "epoch": 0.6358113114349222, "grad_norm": 0.5464579530723207, "learning_rate": 4.993633842307607e-06, "loss": 0.616, "step": 1609 }, { "epoch": 0.6362064707335144, "grad_norm": 0.5262401377076071, "learning_rate": 4.9936226636541564e-06, "loss": 0.6426, "step": 1610 }, { "epoch": 0.6366016300321067, "grad_norm": 0.543994117033289, "learning_rate": 4.993611475207269e-06, "loss": 0.6192, "step": 1611 }, { "epoch": 0.6369967893306989, "grad_norm": 0.5341939948990526, "learning_rate": 4.993600276966992e-06, "loss": 0.6271, "step": 1612 }, { "epoch": 0.6373919486292912, "grad_norm": 0.5093829321261388, "learning_rate": 4.993589068933366e-06, "loss": 0.6177, "step": 1613 }, { "epoch": 0.6377871079278834, "grad_norm": 0.6103509660198887, "learning_rate": 4.993577851106437e-06, "loss": 0.6292, "step": 1614 }, { "epoch": 0.6381822672264756, "grad_norm": 0.5815804502591855, "learning_rate": 4.993566623486247e-06, "loss": 0.6644, "step": 1615 }, { "epoch": 0.6385774265250679, "grad_norm": 0.5192475918715985, "learning_rate": 4.993555386072843e-06, "loss": 0.6439, "step": 1616 }, { "epoch": 0.6389725858236601, "grad_norm": 0.5474735931150393, "learning_rate": 4.993544138866266e-06, "loss": 0.6393, "step": 1617 }, { "epoch": 0.6393677451222524, "grad_norm": 0.5395600119026902, "learning_rate": 4.993532881866564e-06, "loss": 0.6386, "step": 1618 }, { "epoch": 0.6397629044208446, "grad_norm": 0.5461307453791069, "learning_rate": 4.993521615073777e-06, "loss": 0.6423, "step": 1619 }, { "epoch": 0.6401580637194368, "grad_norm": 0.5129146758889324, "learning_rate": 4.9935103384879525e-06, "loss": 0.6263, "step": 1620 }, { "epoch": 0.6405532230180292, "grad_norm": 0.5338093770005661, "learning_rate": 4.9934990521091335e-06, "loss": 0.6464, "step": 1621 }, { "epoch": 0.6409483823166214, "grad_norm": 0.5019435816787823, "learning_rate": 4.993487755937363e-06, "loss": 0.5966, "step": 1622 }, { "epoch": 0.6413435416152137, "grad_norm": 0.520040205195016, "learning_rate": 4.993476449972689e-06, "loss": 0.6175, "step": 1623 }, { "epoch": 0.6417387009138059, "grad_norm": 0.6219021349863824, "learning_rate": 4.993465134215151e-06, "loss": 0.635, "step": 1624 }, { "epoch": 0.6421338602123982, "grad_norm": 0.4961034586172033, "learning_rate": 4.993453808664798e-06, "loss": 0.6482, "step": 1625 }, { "epoch": 0.6425290195109904, "grad_norm": 0.5096787165995785, "learning_rate": 4.9934424733216715e-06, "loss": 0.6633, "step": 1626 }, { "epoch": 0.6429241788095826, "grad_norm": 0.5237229903942505, "learning_rate": 4.993431128185818e-06, "loss": 0.6385, "step": 1627 }, { "epoch": 0.6433193381081749, "grad_norm": 0.4967553534767619, "learning_rate": 4.9934197732572794e-06, "loss": 0.6326, "step": 1628 }, { "epoch": 0.6437144974067671, "grad_norm": 0.49402878207474876, "learning_rate": 4.993408408536104e-06, "loss": 0.6437, "step": 1629 }, { "epoch": 0.6441096567053594, "grad_norm": 0.5955393513897179, "learning_rate": 4.993397034022333e-06, "loss": 0.6238, "step": 1630 }, { "epoch": 0.6445048160039516, "grad_norm": 0.5038622383402742, "learning_rate": 4.993385649716014e-06, "loss": 0.6286, "step": 1631 }, { "epoch": 0.6448999753025438, "grad_norm": 0.47945185187430733, "learning_rate": 4.9933742556171895e-06, "loss": 0.6099, "step": 1632 }, { "epoch": 0.6452951346011361, "grad_norm": 0.489392501913111, "learning_rate": 4.993362851725905e-06, "loss": 0.6319, "step": 1633 }, { "epoch": 0.6456902938997283, "grad_norm": 0.5010165910463097, "learning_rate": 4.993351438042204e-06, "loss": 0.6394, "step": 1634 }, { "epoch": 0.6460854531983206, "grad_norm": 0.4934200799303604, "learning_rate": 4.993340014566135e-06, "loss": 0.6357, "step": 1635 }, { "epoch": 0.6464806124969128, "grad_norm": 0.4912427926805577, "learning_rate": 4.993328581297738e-06, "loss": 0.623, "step": 1636 }, { "epoch": 0.646875771795505, "grad_norm": 0.4873510707511348, "learning_rate": 4.993317138237062e-06, "loss": 0.6391, "step": 1637 }, { "epoch": 0.6472709310940973, "grad_norm": 0.4948293050023501, "learning_rate": 4.99330568538415e-06, "loss": 0.6412, "step": 1638 }, { "epoch": 0.6476660903926895, "grad_norm": 0.4936219175231247, "learning_rate": 4.993294222739047e-06, "loss": 0.6268, "step": 1639 }, { "epoch": 0.6480612496912818, "grad_norm": 0.49996197505527556, "learning_rate": 4.993282750301799e-06, "loss": 0.6429, "step": 1640 }, { "epoch": 0.648456408989874, "grad_norm": 0.4941502207114622, "learning_rate": 4.993271268072449e-06, "loss": 0.6079, "step": 1641 }, { "epoch": 0.6488515682884662, "grad_norm": 0.49029556645390376, "learning_rate": 4.993259776051045e-06, "loss": 0.6307, "step": 1642 }, { "epoch": 0.6492467275870585, "grad_norm": 0.5033458347363889, "learning_rate": 4.9932482742376295e-06, "loss": 0.6331, "step": 1643 }, { "epoch": 0.6496418868856508, "grad_norm": 0.4996027449996174, "learning_rate": 4.993236762632248e-06, "loss": 0.6174, "step": 1644 }, { "epoch": 0.6500370461842431, "grad_norm": 0.4954371092717256, "learning_rate": 4.993225241234949e-06, "loss": 0.6355, "step": 1645 }, { "epoch": 0.6504322054828353, "grad_norm": 0.525740581307838, "learning_rate": 4.9932137100457735e-06, "loss": 0.6318, "step": 1646 }, { "epoch": 0.6508273647814276, "grad_norm": 0.5096572884835409, "learning_rate": 4.993202169064769e-06, "loss": 0.6369, "step": 1647 }, { "epoch": 0.6512225240800198, "grad_norm": 0.8344702188550295, "learning_rate": 4.993190618291979e-06, "loss": 0.6473, "step": 1648 }, { "epoch": 0.651617683378612, "grad_norm": 0.4943527469749559, "learning_rate": 4.993179057727452e-06, "loss": 0.6305, "step": 1649 }, { "epoch": 0.6520128426772043, "grad_norm": 0.5123537897586123, "learning_rate": 4.993167487371231e-06, "loss": 0.6363, "step": 1650 }, { "epoch": 0.6524080019757965, "grad_norm": 0.48337632188932905, "learning_rate": 4.993155907223362e-06, "loss": 0.6462, "step": 1651 }, { "epoch": 0.6528031612743888, "grad_norm": 0.4941193733493147, "learning_rate": 4.993144317283891e-06, "loss": 0.6395, "step": 1652 }, { "epoch": 0.653198320572981, "grad_norm": 0.5171725409360232, "learning_rate": 4.993132717552862e-06, "loss": 0.615, "step": 1653 }, { "epoch": 0.6535934798715732, "grad_norm": 0.5028964520752601, "learning_rate": 4.9931211080303225e-06, "loss": 0.6264, "step": 1654 }, { "epoch": 0.6539886391701655, "grad_norm": 0.5170261798292486, "learning_rate": 4.9931094887163165e-06, "loss": 0.6232, "step": 1655 }, { "epoch": 0.6543837984687577, "grad_norm": 0.5393500555133617, "learning_rate": 4.993097859610891e-06, "loss": 0.6295, "step": 1656 }, { "epoch": 0.65477895776735, "grad_norm": 0.5170632791663832, "learning_rate": 4.99308622071409e-06, "loss": 0.6457, "step": 1657 }, { "epoch": 0.6551741170659422, "grad_norm": 0.5088037328336552, "learning_rate": 4.993074572025961e-06, "loss": 0.6329, "step": 1658 }, { "epoch": 0.6555692763645344, "grad_norm": 0.5517685712418717, "learning_rate": 4.993062913546549e-06, "loss": 0.6151, "step": 1659 }, { "epoch": 0.6559644356631267, "grad_norm": 0.6574193345875109, "learning_rate": 4.9930512452758996e-06, "loss": 0.6541, "step": 1660 }, { "epoch": 0.6563595949617189, "grad_norm": 0.50641713145241, "learning_rate": 4.993039567214058e-06, "loss": 0.6439, "step": 1661 }, { "epoch": 0.6567547542603112, "grad_norm": 0.5183226080631096, "learning_rate": 4.993027879361072e-06, "loss": 0.6441, "step": 1662 }, { "epoch": 0.6571499135589034, "grad_norm": 0.5026167336939322, "learning_rate": 4.993016181716987e-06, "loss": 0.6335, "step": 1663 }, { "epoch": 0.6575450728574956, "grad_norm": 0.5060260690365562, "learning_rate": 4.993004474281846e-06, "loss": 0.6447, "step": 1664 }, { "epoch": 0.6579402321560879, "grad_norm": 0.5057941400086423, "learning_rate": 4.992992757055699e-06, "loss": 0.6291, "step": 1665 }, { "epoch": 0.6583353914546801, "grad_norm": 0.506979357825653, "learning_rate": 4.9929810300385894e-06, "loss": 0.635, "step": 1666 }, { "epoch": 0.6587305507532724, "grad_norm": 0.4818917872233048, "learning_rate": 4.992969293230565e-06, "loss": 0.6382, "step": 1667 }, { "epoch": 0.6591257100518647, "grad_norm": 0.4925360002260035, "learning_rate": 4.992957546631671e-06, "loss": 0.6142, "step": 1668 }, { "epoch": 0.659520869350457, "grad_norm": 0.5046203653064728, "learning_rate": 4.992945790241952e-06, "loss": 0.6304, "step": 1669 }, { "epoch": 0.6599160286490492, "grad_norm": 0.49179177381723277, "learning_rate": 4.992934024061456e-06, "loss": 0.6293, "step": 1670 }, { "epoch": 0.6603111879476414, "grad_norm": 0.4818457491170986, "learning_rate": 4.9929222480902305e-06, "loss": 0.612, "step": 1671 }, { "epoch": 0.6607063472462337, "grad_norm": 0.4853439003519475, "learning_rate": 4.992910462328319e-06, "loss": 0.6331, "step": 1672 }, { "epoch": 0.6611015065448259, "grad_norm": 0.4736906753344473, "learning_rate": 4.99289866677577e-06, "loss": 0.6233, "step": 1673 }, { "epoch": 0.6614966658434182, "grad_norm": 0.4875165225064638, "learning_rate": 4.992886861432628e-06, "loss": 0.6267, "step": 1674 }, { "epoch": 0.6618918251420104, "grad_norm": 0.4808588973957292, "learning_rate": 4.99287504629894e-06, "loss": 0.6172, "step": 1675 }, { "epoch": 0.6622869844406026, "grad_norm": 0.4907208193211784, "learning_rate": 4.992863221374753e-06, "loss": 0.617, "step": 1676 }, { "epoch": 0.6626821437391949, "grad_norm": 0.48958473331720903, "learning_rate": 4.992851386660114e-06, "loss": 0.6482, "step": 1677 }, { "epoch": 0.6630773030377871, "grad_norm": 0.4746449843260302, "learning_rate": 4.992839542155067e-06, "loss": 0.6363, "step": 1678 }, { "epoch": 0.6634724623363794, "grad_norm": 0.5081053936197776, "learning_rate": 4.9928276878596605e-06, "loss": 0.6349, "step": 1679 }, { "epoch": 0.6638676216349716, "grad_norm": 0.5059779639168953, "learning_rate": 4.99281582377394e-06, "loss": 0.6479, "step": 1680 }, { "epoch": 0.6642627809335638, "grad_norm": 0.49460659054649947, "learning_rate": 4.992803949897954e-06, "loss": 0.6161, "step": 1681 }, { "epoch": 0.6646579402321561, "grad_norm": 0.48860656949577014, "learning_rate": 4.992792066231746e-06, "loss": 0.6147, "step": 1682 }, { "epoch": 0.6650530995307483, "grad_norm": 0.4990949380069287, "learning_rate": 4.992780172775366e-06, "loss": 0.6687, "step": 1683 }, { "epoch": 0.6654482588293406, "grad_norm": 0.50449571493179, "learning_rate": 4.9927682695288584e-06, "loss": 0.6234, "step": 1684 }, { "epoch": 0.6658434181279328, "grad_norm": 0.5097265264943369, "learning_rate": 4.992756356492271e-06, "loss": 0.6345, "step": 1685 }, { "epoch": 0.666238577426525, "grad_norm": 0.4883277462528845, "learning_rate": 4.99274443366565e-06, "loss": 0.6419, "step": 1686 }, { "epoch": 0.6666337367251173, "grad_norm": 0.47970413042564153, "learning_rate": 4.992732501049044e-06, "loss": 0.6036, "step": 1687 }, { "epoch": 0.6670288960237095, "grad_norm": 0.4935176772839213, "learning_rate": 4.992720558642496e-06, "loss": 0.6463, "step": 1688 }, { "epoch": 0.6674240553223018, "grad_norm": 0.4759709026940183, "learning_rate": 4.9927086064460575e-06, "loss": 0.6537, "step": 1689 }, { "epoch": 0.667819214620894, "grad_norm": 0.47430486816189027, "learning_rate": 4.992696644459771e-06, "loss": 0.6317, "step": 1690 }, { "epoch": 0.6682143739194862, "grad_norm": 0.5034833730971029, "learning_rate": 4.992684672683688e-06, "loss": 0.627, "step": 1691 }, { "epoch": 0.6686095332180786, "grad_norm": 0.519836264221229, "learning_rate": 4.992672691117852e-06, "loss": 0.6538, "step": 1692 }, { "epoch": 0.6690046925166708, "grad_norm": 0.5050122558075558, "learning_rate": 4.992660699762311e-06, "loss": 0.6473, "step": 1693 }, { "epoch": 0.6693998518152631, "grad_norm": 0.4707732129541408, "learning_rate": 4.992648698617113e-06, "loss": 0.6076, "step": 1694 }, { "epoch": 0.6697950111138553, "grad_norm": 0.49511795117802215, "learning_rate": 4.9926366876823054e-06, "loss": 0.6271, "step": 1695 }, { "epoch": 0.6701901704124476, "grad_norm": 0.49908332035521424, "learning_rate": 4.992624666957932e-06, "loss": 0.6431, "step": 1696 }, { "epoch": 0.6705853297110398, "grad_norm": 0.48240971327251186, "learning_rate": 4.992612636444045e-06, "loss": 0.6218, "step": 1697 }, { "epoch": 0.670980489009632, "grad_norm": 0.5028260213765751, "learning_rate": 4.992600596140688e-06, "loss": 0.6142, "step": 1698 }, { "epoch": 0.6713756483082243, "grad_norm": 0.49372992278625305, "learning_rate": 4.99258854604791e-06, "loss": 0.6289, "step": 1699 }, { "epoch": 0.6717708076068165, "grad_norm": 0.48990901770488715, "learning_rate": 4.9925764861657575e-06, "loss": 0.6139, "step": 1700 }, { "epoch": 0.6721659669054088, "grad_norm": 0.5062662633187875, "learning_rate": 4.9925644164942776e-06, "loss": 0.6363, "step": 1701 }, { "epoch": 0.672561126204001, "grad_norm": 0.5000686626853466, "learning_rate": 4.992552337033519e-06, "loss": 0.6062, "step": 1702 }, { "epoch": 0.6729562855025932, "grad_norm": 0.49142806262561756, "learning_rate": 4.992540247783528e-06, "loss": 0.639, "step": 1703 }, { "epoch": 0.6733514448011855, "grad_norm": 0.49049346875912636, "learning_rate": 4.992528148744353e-06, "loss": 0.6244, "step": 1704 }, { "epoch": 0.6737466040997777, "grad_norm": 0.47745945762367137, "learning_rate": 4.99251603991604e-06, "loss": 0.6541, "step": 1705 }, { "epoch": 0.67414176339837, "grad_norm": 0.4842192684853816, "learning_rate": 4.992503921298638e-06, "loss": 0.621, "step": 1706 }, { "epoch": 0.6745369226969622, "grad_norm": 0.4870080088135584, "learning_rate": 4.992491792892194e-06, "loss": 0.6359, "step": 1707 }, { "epoch": 0.6749320819955544, "grad_norm": 0.48850423253720654, "learning_rate": 4.992479654696757e-06, "loss": 0.6375, "step": 1708 }, { "epoch": 0.6753272412941467, "grad_norm": 0.49822947926968186, "learning_rate": 4.992467506712372e-06, "loss": 0.6399, "step": 1709 }, { "epoch": 0.6757224005927389, "grad_norm": 0.48888282315789844, "learning_rate": 4.992455348939088e-06, "loss": 0.6275, "step": 1710 }, { "epoch": 0.6761175598913312, "grad_norm": 0.4782716794651655, "learning_rate": 4.992443181376954e-06, "loss": 0.6353, "step": 1711 }, { "epoch": 0.6765127191899234, "grad_norm": 0.49789653142772966, "learning_rate": 4.992431004026016e-06, "loss": 0.6202, "step": 1712 }, { "epoch": 0.6769078784885156, "grad_norm": 0.49660191988375124, "learning_rate": 4.992418816886322e-06, "loss": 0.6237, "step": 1713 }, { "epoch": 0.6773030377871079, "grad_norm": 0.5012417152818854, "learning_rate": 4.992406619957922e-06, "loss": 0.6216, "step": 1714 }, { "epoch": 0.6776981970857002, "grad_norm": 0.4839082971693346, "learning_rate": 4.992394413240861e-06, "loss": 0.6118, "step": 1715 }, { "epoch": 0.6780933563842925, "grad_norm": 0.4965208023800787, "learning_rate": 4.992382196735188e-06, "loss": 0.6224, "step": 1716 }, { "epoch": 0.6784885156828847, "grad_norm": 0.5042338428138545, "learning_rate": 4.992369970440952e-06, "loss": 0.643, "step": 1717 }, { "epoch": 0.678883674981477, "grad_norm": 0.4975143700279896, "learning_rate": 4.9923577343582e-06, "loss": 0.6136, "step": 1718 }, { "epoch": 0.6792788342800692, "grad_norm": 0.5281177934717292, "learning_rate": 4.992345488486979e-06, "loss": 0.6397, "step": 1719 }, { "epoch": 0.6796739935786614, "grad_norm": 0.4881426918498292, "learning_rate": 4.99233323282734e-06, "loss": 0.6249, "step": 1720 }, { "epoch": 0.6800691528772537, "grad_norm": 0.49409156351025163, "learning_rate": 4.992320967379329e-06, "loss": 0.6397, "step": 1721 }, { "epoch": 0.6804643121758459, "grad_norm": 0.48346096985864084, "learning_rate": 4.992308692142995e-06, "loss": 0.633, "step": 1722 }, { "epoch": 0.6808594714744381, "grad_norm": 0.4926531840900509, "learning_rate": 4.992296407118385e-06, "loss": 0.6346, "step": 1723 }, { "epoch": 0.6812546307730304, "grad_norm": 0.4894311266855414, "learning_rate": 4.992284112305549e-06, "loss": 0.6086, "step": 1724 }, { "epoch": 0.6816497900716226, "grad_norm": 0.49490153693793343, "learning_rate": 4.992271807704534e-06, "loss": 0.6318, "step": 1725 }, { "epoch": 0.6820449493702149, "grad_norm": 0.4828850751455796, "learning_rate": 4.9922594933153884e-06, "loss": 0.632, "step": 1726 }, { "epoch": 0.6824401086688071, "grad_norm": 0.5330475769017348, "learning_rate": 4.992247169138161e-06, "loss": 0.6269, "step": 1727 }, { "epoch": 0.6828352679673994, "grad_norm": 0.48050985014818776, "learning_rate": 4.9922348351729e-06, "loss": 0.6349, "step": 1728 }, { "epoch": 0.6832304272659916, "grad_norm": 0.48483280529770134, "learning_rate": 4.992222491419655e-06, "loss": 0.6555, "step": 1729 }, { "epoch": 0.6836255865645838, "grad_norm": 0.5080406635977245, "learning_rate": 4.992210137878472e-06, "loss": 0.6359, "step": 1730 }, { "epoch": 0.6840207458631761, "grad_norm": 0.5089264759702186, "learning_rate": 4.9921977745494025e-06, "loss": 0.6406, "step": 1731 }, { "epoch": 0.6844159051617683, "grad_norm": 0.48659054045056965, "learning_rate": 4.992185401432493e-06, "loss": 0.602, "step": 1732 }, { "epoch": 0.6848110644603606, "grad_norm": 0.49059051256711417, "learning_rate": 4.992173018527791e-06, "loss": 0.6035, "step": 1733 }, { "epoch": 0.6852062237589528, "grad_norm": 0.5182172495516812, "learning_rate": 4.992160625835348e-06, "loss": 0.643, "step": 1734 }, { "epoch": 0.685601383057545, "grad_norm": 0.49425214892911673, "learning_rate": 4.992148223355211e-06, "loss": 0.6326, "step": 1735 }, { "epoch": 0.6859965423561373, "grad_norm": 0.5023535053495725, "learning_rate": 4.9921358110874295e-06, "loss": 0.6416, "step": 1736 }, { "epoch": 0.6863917016547295, "grad_norm": 0.6023947929077457, "learning_rate": 4.992123389032052e-06, "loss": 0.6429, "step": 1737 }, { "epoch": 0.6867868609533218, "grad_norm": 0.5037834252439649, "learning_rate": 4.992110957189126e-06, "loss": 0.6277, "step": 1738 }, { "epoch": 0.6871820202519141, "grad_norm": 0.49549521269729524, "learning_rate": 4.992098515558702e-06, "loss": 0.6305, "step": 1739 }, { "epoch": 0.6875771795505063, "grad_norm": 0.49551911885336786, "learning_rate": 4.992086064140829e-06, "loss": 0.6248, "step": 1740 }, { "epoch": 0.6879723388490986, "grad_norm": 0.5246656926007554, "learning_rate": 4.9920736029355544e-06, "loss": 0.6281, "step": 1741 }, { "epoch": 0.6883674981476908, "grad_norm": 0.5001944530177468, "learning_rate": 4.992061131942929e-06, "loss": 0.6261, "step": 1742 }, { "epoch": 0.6887626574462831, "grad_norm": 0.4826806905887991, "learning_rate": 4.992048651163e-06, "loss": 0.6205, "step": 1743 }, { "epoch": 0.6891578167448753, "grad_norm": 0.5077638034008282, "learning_rate": 4.992036160595817e-06, "loss": 0.6366, "step": 1744 }, { "epoch": 0.6895529760434675, "grad_norm": 0.4957736986969226, "learning_rate": 4.9920236602414295e-06, "loss": 0.6197, "step": 1745 }, { "epoch": 0.6899481353420598, "grad_norm": 0.4934343040818385, "learning_rate": 4.992011150099886e-06, "loss": 0.6381, "step": 1746 }, { "epoch": 0.690343294640652, "grad_norm": 0.492375853768171, "learning_rate": 4.991998630171236e-06, "loss": 0.6313, "step": 1747 }, { "epoch": 0.6907384539392443, "grad_norm": 0.47870636501116376, "learning_rate": 4.991986100455529e-06, "loss": 0.6038, "step": 1748 }, { "epoch": 0.6911336132378365, "grad_norm": 0.5048304913807063, "learning_rate": 4.991973560952813e-06, "loss": 0.6293, "step": 1749 }, { "epoch": 0.6915287725364287, "grad_norm": 0.5249637722841536, "learning_rate": 4.991961011663139e-06, "loss": 0.6307, "step": 1750 }, { "epoch": 0.691923931835021, "grad_norm": 0.5208870005299038, "learning_rate": 4.991948452586555e-06, "loss": 0.6332, "step": 1751 }, { "epoch": 0.6923190911336132, "grad_norm": 0.5015944555669404, "learning_rate": 4.991935883723111e-06, "loss": 0.5839, "step": 1752 }, { "epoch": 0.6927142504322055, "grad_norm": 0.5163525589308585, "learning_rate": 4.991923305072856e-06, "loss": 0.6235, "step": 1753 }, { "epoch": 0.6931094097307977, "grad_norm": 0.5032310673814737, "learning_rate": 4.991910716635838e-06, "loss": 0.6263, "step": 1754 }, { "epoch": 0.69350456902939, "grad_norm": 0.47440600362048463, "learning_rate": 4.991898118412109e-06, "loss": 0.6376, "step": 1755 }, { "epoch": 0.6938997283279822, "grad_norm": 0.47177908043565303, "learning_rate": 4.991885510401717e-06, "loss": 0.5965, "step": 1756 }, { "epoch": 0.6942948876265744, "grad_norm": 0.5050010124065104, "learning_rate": 4.991872892604713e-06, "loss": 0.6443, "step": 1757 }, { "epoch": 0.6946900469251667, "grad_norm": 0.4703390805427689, "learning_rate": 4.991860265021144e-06, "loss": 0.6371, "step": 1758 }, { "epoch": 0.6950852062237589, "grad_norm": 0.5965162740676478, "learning_rate": 4.991847627651062e-06, "loss": 0.6325, "step": 1759 }, { "epoch": 0.6954803655223512, "grad_norm": 0.4995953085902974, "learning_rate": 4.991834980494515e-06, "loss": 0.6124, "step": 1760 }, { "epoch": 0.6958755248209434, "grad_norm": 0.48410450082082324, "learning_rate": 4.991822323551554e-06, "loss": 0.6543, "step": 1761 }, { "epoch": 0.6962706841195357, "grad_norm": 0.5097060752514024, "learning_rate": 4.991809656822227e-06, "loss": 0.6341, "step": 1762 }, { "epoch": 0.696665843418128, "grad_norm": 0.5028866911312128, "learning_rate": 4.991796980306586e-06, "loss": 0.6196, "step": 1763 }, { "epoch": 0.6970610027167202, "grad_norm": 0.48181973244537724, "learning_rate": 4.991784294004679e-06, "loss": 0.6142, "step": 1764 }, { "epoch": 0.6974561620153125, "grad_norm": 0.481960114220102, "learning_rate": 4.991771597916556e-06, "loss": 0.6064, "step": 1765 }, { "epoch": 0.6978513213139047, "grad_norm": 0.4962930971445703, "learning_rate": 4.9917588920422675e-06, "loss": 0.6415, "step": 1766 }, { "epoch": 0.698246480612497, "grad_norm": 0.49440862308297445, "learning_rate": 4.991746176381863e-06, "loss": 0.6169, "step": 1767 }, { "epoch": 0.6986416399110892, "grad_norm": 0.5047352213306215, "learning_rate": 4.991733450935393e-06, "loss": 0.6304, "step": 1768 }, { "epoch": 0.6990367992096814, "grad_norm": 0.4733163582966052, "learning_rate": 4.991720715702907e-06, "loss": 0.6272, "step": 1769 }, { "epoch": 0.6994319585082737, "grad_norm": 0.5098227044422302, "learning_rate": 4.991707970684455e-06, "loss": 0.6364, "step": 1770 }, { "epoch": 0.6998271178068659, "grad_norm": 0.49090533225232885, "learning_rate": 4.991695215880087e-06, "loss": 0.6415, "step": 1771 }, { "epoch": 0.7002222771054581, "grad_norm": 0.49614105702660316, "learning_rate": 4.991682451289853e-06, "loss": 0.6588, "step": 1772 }, { "epoch": 0.7006174364040504, "grad_norm": 0.47774693460503354, "learning_rate": 4.991669676913804e-06, "loss": 0.6316, "step": 1773 }, { "epoch": 0.7010125957026426, "grad_norm": 0.4822380940669454, "learning_rate": 4.991656892751989e-06, "loss": 0.6086, "step": 1774 }, { "epoch": 0.7014077550012349, "grad_norm": 0.472072856981578, "learning_rate": 4.99164409880446e-06, "loss": 0.6126, "step": 1775 }, { "epoch": 0.7018029142998271, "grad_norm": 0.5033099153233312, "learning_rate": 4.991631295071265e-06, "loss": 0.6334, "step": 1776 }, { "epoch": 0.7021980735984193, "grad_norm": 0.48177193738949203, "learning_rate": 4.991618481552455e-06, "loss": 0.6191, "step": 1777 }, { "epoch": 0.7025932328970116, "grad_norm": 0.5635001881112269, "learning_rate": 4.9916056582480805e-06, "loss": 0.6255, "step": 1778 }, { "epoch": 0.7029883921956038, "grad_norm": 0.497837050692373, "learning_rate": 4.991592825158192e-06, "loss": 0.6277, "step": 1779 }, { "epoch": 0.7033835514941961, "grad_norm": 0.5046834438070316, "learning_rate": 4.991579982282841e-06, "loss": 0.6419, "step": 1780 }, { "epoch": 0.7037787107927883, "grad_norm": 0.4902373827129985, "learning_rate": 4.991567129622076e-06, "loss": 0.6432, "step": 1781 }, { "epoch": 0.7041738700913805, "grad_norm": 0.5067434826741889, "learning_rate": 4.991554267175947e-06, "loss": 0.6453, "step": 1782 }, { "epoch": 0.7045690293899728, "grad_norm": 0.49398025290400144, "learning_rate": 4.991541394944508e-06, "loss": 0.6082, "step": 1783 }, { "epoch": 0.704964188688565, "grad_norm": 0.500932111828932, "learning_rate": 4.991528512927806e-06, "loss": 0.6222, "step": 1784 }, { "epoch": 0.7053593479871573, "grad_norm": 0.4855703145150172, "learning_rate": 4.991515621125893e-06, "loss": 0.6395, "step": 1785 }, { "epoch": 0.7057545072857496, "grad_norm": 0.48895788982354876, "learning_rate": 4.99150271953882e-06, "loss": 0.6254, "step": 1786 }, { "epoch": 0.7061496665843419, "grad_norm": 0.49706293010128083, "learning_rate": 4.9914898081666375e-06, "loss": 0.6225, "step": 1787 }, { "epoch": 0.7065448258829341, "grad_norm": 0.5177841757480415, "learning_rate": 4.991476887009395e-06, "loss": 0.6477, "step": 1788 }, { "epoch": 0.7069399851815263, "grad_norm": 0.4768336877804729, "learning_rate": 4.991463956067145e-06, "loss": 0.6427, "step": 1789 }, { "epoch": 0.7073351444801186, "grad_norm": 0.48872337485295864, "learning_rate": 4.9914510153399375e-06, "loss": 0.6283, "step": 1790 }, { "epoch": 0.7077303037787108, "grad_norm": 0.5233174541443238, "learning_rate": 4.9914380648278224e-06, "loss": 0.6483, "step": 1791 }, { "epoch": 0.7081254630773031, "grad_norm": 0.48167766487074626, "learning_rate": 4.991425104530852e-06, "loss": 0.6253, "step": 1792 }, { "epoch": 0.7085206223758953, "grad_norm": 0.49635669925532444, "learning_rate": 4.991412134449078e-06, "loss": 0.6354, "step": 1793 }, { "epoch": 0.7089157816744875, "grad_norm": 0.494107036823578, "learning_rate": 4.991399154582548e-06, "loss": 0.618, "step": 1794 }, { "epoch": 0.7093109409730798, "grad_norm": 0.48336185208770716, "learning_rate": 4.991386164931316e-06, "loss": 0.6236, "step": 1795 }, { "epoch": 0.709706100271672, "grad_norm": 0.49749633745530736, "learning_rate": 4.991373165495431e-06, "loss": 0.6336, "step": 1796 }, { "epoch": 0.7101012595702643, "grad_norm": 0.5034095057979783, "learning_rate": 4.991360156274946e-06, "loss": 0.647, "step": 1797 }, { "epoch": 0.7104964188688565, "grad_norm": 0.5322413221010839, "learning_rate": 4.9913471372699115e-06, "loss": 0.6011, "step": 1798 }, { "epoch": 0.7108915781674487, "grad_norm": 0.49545526697939474, "learning_rate": 4.991334108480377e-06, "loss": 0.6364, "step": 1799 }, { "epoch": 0.711286737466041, "grad_norm": 0.49309817729132516, "learning_rate": 4.9913210699063965e-06, "loss": 0.614, "step": 1800 }, { "epoch": 0.7116818967646332, "grad_norm": 0.5691944811153536, "learning_rate": 4.991308021548018e-06, "loss": 0.6186, "step": 1801 }, { "epoch": 0.7120770560632255, "grad_norm": 0.5449350122228545, "learning_rate": 4.9912949634052955e-06, "loss": 0.6247, "step": 1802 }, { "epoch": 0.7124722153618177, "grad_norm": 0.5049416194812504, "learning_rate": 4.991281895478279e-06, "loss": 0.5903, "step": 1803 }, { "epoch": 0.71286737466041, "grad_norm": 0.47208342204692466, "learning_rate": 4.9912688177670195e-06, "loss": 0.6231, "step": 1804 }, { "epoch": 0.7132625339590022, "grad_norm": 0.5243565371621691, "learning_rate": 4.991255730271569e-06, "loss": 0.6277, "step": 1805 }, { "epoch": 0.7136576932575944, "grad_norm": 0.5670677049532014, "learning_rate": 4.991242632991979e-06, "loss": 0.6372, "step": 1806 }, { "epoch": 0.7140528525561867, "grad_norm": 0.5158307483356673, "learning_rate": 4.9912295259283015e-06, "loss": 0.6125, "step": 1807 }, { "epoch": 0.7144480118547789, "grad_norm": 0.508525557153672, "learning_rate": 4.991216409080586e-06, "loss": 0.6101, "step": 1808 }, { "epoch": 0.7148431711533711, "grad_norm": 0.5076780173186427, "learning_rate": 4.9912032824488855e-06, "loss": 0.6345, "step": 1809 }, { "epoch": 0.7152383304519635, "grad_norm": 0.5069119882804742, "learning_rate": 4.991190146033251e-06, "loss": 0.5988, "step": 1810 }, { "epoch": 0.7156334897505557, "grad_norm": 0.5111101135704393, "learning_rate": 4.991176999833734e-06, "loss": 0.6285, "step": 1811 }, { "epoch": 0.716028649049148, "grad_norm": 0.494023778451646, "learning_rate": 4.991163843850388e-06, "loss": 0.6271, "step": 1812 }, { "epoch": 0.7164238083477402, "grad_norm": 0.49401028308719136, "learning_rate": 4.991150678083262e-06, "loss": 0.6162, "step": 1813 }, { "epoch": 0.7168189676463325, "grad_norm": 0.5455894933511418, "learning_rate": 4.99113750253241e-06, "loss": 0.6207, "step": 1814 }, { "epoch": 0.7172141269449247, "grad_norm": 0.5038767858357743, "learning_rate": 4.991124317197881e-06, "loss": 0.6508, "step": 1815 }, { "epoch": 0.7176092862435169, "grad_norm": 0.49228914244907324, "learning_rate": 4.991111122079729e-06, "loss": 0.6566, "step": 1816 }, { "epoch": 0.7180044455421092, "grad_norm": 0.4939671059206662, "learning_rate": 4.991097917178005e-06, "loss": 0.6153, "step": 1817 }, { "epoch": 0.7183996048407014, "grad_norm": 0.48627356885183, "learning_rate": 4.991084702492761e-06, "loss": 0.6016, "step": 1818 }, { "epoch": 0.7187947641392937, "grad_norm": 0.4940616525370284, "learning_rate": 4.99107147802405e-06, "loss": 0.6139, "step": 1819 }, { "epoch": 0.7191899234378859, "grad_norm": 0.498285217304123, "learning_rate": 4.991058243771922e-06, "loss": 0.6108, "step": 1820 }, { "epoch": 0.7195850827364781, "grad_norm": 0.5053900244808598, "learning_rate": 4.9910449997364295e-06, "loss": 0.632, "step": 1821 }, { "epoch": 0.7199802420350704, "grad_norm": 0.5073136517973986, "learning_rate": 4.991031745917626e-06, "loss": 0.6369, "step": 1822 }, { "epoch": 0.7203754013336626, "grad_norm": 0.5195708029162378, "learning_rate": 4.991018482315561e-06, "loss": 0.6273, "step": 1823 }, { "epoch": 0.7207705606322549, "grad_norm": 0.48766761832670025, "learning_rate": 4.99100520893029e-06, "loss": 0.6362, "step": 1824 }, { "epoch": 0.7211657199308471, "grad_norm": 0.5320190857436362, "learning_rate": 4.990991925761862e-06, "loss": 0.6209, "step": 1825 }, { "epoch": 0.7215608792294393, "grad_norm": 0.5362331175084071, "learning_rate": 4.99097863281033e-06, "loss": 0.6234, "step": 1826 }, { "epoch": 0.7219560385280316, "grad_norm": 0.46797239650415506, "learning_rate": 4.990965330075746e-06, "loss": 0.6149, "step": 1827 }, { "epoch": 0.7223511978266238, "grad_norm": 0.49165256567192805, "learning_rate": 4.990952017558164e-06, "loss": 0.6197, "step": 1828 }, { "epoch": 0.7227463571252161, "grad_norm": 0.5734478891642341, "learning_rate": 4.9909386952576355e-06, "loss": 0.6099, "step": 1829 }, { "epoch": 0.7231415164238083, "grad_norm": 0.5067294740219436, "learning_rate": 4.9909253631742115e-06, "loss": 0.6278, "step": 1830 }, { "epoch": 0.7235366757224005, "grad_norm": 0.5289952166160319, "learning_rate": 4.990912021307945e-06, "loss": 0.6258, "step": 1831 }, { "epoch": 0.7239318350209928, "grad_norm": 0.5043539597608305, "learning_rate": 4.990898669658889e-06, "loss": 0.6166, "step": 1832 }, { "epoch": 0.7243269943195851, "grad_norm": 0.5137481323998266, "learning_rate": 4.990885308227096e-06, "loss": 0.6349, "step": 1833 }, { "epoch": 0.7247221536181774, "grad_norm": 0.5223800914526201, "learning_rate": 4.9908719370126175e-06, "loss": 0.6335, "step": 1834 }, { "epoch": 0.7251173129167696, "grad_norm": 0.49483421508060577, "learning_rate": 4.990858556015507e-06, "loss": 0.6243, "step": 1835 }, { "epoch": 0.7255124722153619, "grad_norm": 0.507533775647517, "learning_rate": 4.990845165235816e-06, "loss": 0.6288, "step": 1836 }, { "epoch": 0.7259076315139541, "grad_norm": 0.5337126385462883, "learning_rate": 4.990831764673598e-06, "loss": 0.6422, "step": 1837 }, { "epoch": 0.7263027908125463, "grad_norm": 0.8373622357864616, "learning_rate": 4.9908183543289055e-06, "loss": 0.6366, "step": 1838 }, { "epoch": 0.7266979501111386, "grad_norm": 0.4867027048473409, "learning_rate": 4.99080493420179e-06, "loss": 0.6196, "step": 1839 }, { "epoch": 0.7270931094097308, "grad_norm": 0.5143251024891161, "learning_rate": 4.990791504292307e-06, "loss": 0.6378, "step": 1840 }, { "epoch": 0.7274882687083231, "grad_norm": 0.4925899162999197, "learning_rate": 4.990778064600506e-06, "loss": 0.6077, "step": 1841 }, { "epoch": 0.7278834280069153, "grad_norm": 0.46958077754810756, "learning_rate": 4.990764615126442e-06, "loss": 0.6249, "step": 1842 }, { "epoch": 0.7282785873055075, "grad_norm": 0.502560018511133, "learning_rate": 4.990751155870167e-06, "loss": 0.6106, "step": 1843 }, { "epoch": 0.7286737466040998, "grad_norm": 0.5082154553642297, "learning_rate": 4.990737686831734e-06, "loss": 0.6111, "step": 1844 }, { "epoch": 0.729068905902692, "grad_norm": 0.48176602480847014, "learning_rate": 4.990724208011195e-06, "loss": 0.6225, "step": 1845 }, { "epoch": 0.7294640652012843, "grad_norm": 0.47402002408096394, "learning_rate": 4.990710719408604e-06, "loss": 0.6258, "step": 1846 }, { "epoch": 0.7298592244998765, "grad_norm": 0.4718364904587041, "learning_rate": 4.9906972210240146e-06, "loss": 0.6206, "step": 1847 }, { "epoch": 0.7302543837984687, "grad_norm": 0.48775817606336896, "learning_rate": 4.990683712857479e-06, "loss": 0.6176, "step": 1848 }, { "epoch": 0.730649543097061, "grad_norm": 0.48862016729905866, "learning_rate": 4.99067019490905e-06, "loss": 0.6477, "step": 1849 }, { "epoch": 0.7310447023956532, "grad_norm": 0.4851256824616345, "learning_rate": 4.990656667178781e-06, "loss": 0.6297, "step": 1850 }, { "epoch": 0.7314398616942455, "grad_norm": 0.4900449934686768, "learning_rate": 4.9906431296667235e-06, "loss": 0.6225, "step": 1851 }, { "epoch": 0.7318350209928377, "grad_norm": 0.4803057428812473, "learning_rate": 4.9906295823729334e-06, "loss": 0.6096, "step": 1852 }, { "epoch": 0.7322301802914299, "grad_norm": 0.5084616847011403, "learning_rate": 4.990616025297462e-06, "loss": 0.6402, "step": 1853 }, { "epoch": 0.7326253395900222, "grad_norm": 0.48222218139870393, "learning_rate": 4.990602458440364e-06, "loss": 0.618, "step": 1854 }, { "epoch": 0.7330204988886144, "grad_norm": 0.4768756457104444, "learning_rate": 4.990588881801692e-06, "loss": 0.6277, "step": 1855 }, { "epoch": 0.7334156581872067, "grad_norm": 0.47054304737078606, "learning_rate": 4.990575295381499e-06, "loss": 0.639, "step": 1856 }, { "epoch": 0.733810817485799, "grad_norm": 0.4954513777862497, "learning_rate": 4.990561699179838e-06, "loss": 0.6336, "step": 1857 }, { "epoch": 0.7342059767843913, "grad_norm": 0.4977664948935915, "learning_rate": 4.990548093196765e-06, "loss": 0.6408, "step": 1858 }, { "epoch": 0.7346011360829835, "grad_norm": 0.4821488918104782, "learning_rate": 4.9905344774323285e-06, "loss": 0.6477, "step": 1859 }, { "epoch": 0.7349962953815757, "grad_norm": 0.47164432904373776, "learning_rate": 4.990520851886586e-06, "loss": 0.6116, "step": 1860 }, { "epoch": 0.735391454680168, "grad_norm": 0.5130522518926274, "learning_rate": 4.990507216559591e-06, "loss": 0.6285, "step": 1861 }, { "epoch": 0.7357866139787602, "grad_norm": 0.5085662976075583, "learning_rate": 4.990493571451396e-06, "loss": 0.6263, "step": 1862 }, { "epoch": 0.7361817732773525, "grad_norm": 0.48426222103509337, "learning_rate": 4.990479916562053e-06, "loss": 0.6231, "step": 1863 }, { "epoch": 0.7365769325759447, "grad_norm": 0.5176828196274378, "learning_rate": 4.990466251891618e-06, "loss": 0.6608, "step": 1864 }, { "epoch": 0.7369720918745369, "grad_norm": 0.5221468288688574, "learning_rate": 4.990452577440144e-06, "loss": 0.6274, "step": 1865 }, { "epoch": 0.7373672511731292, "grad_norm": 0.47850681281032353, "learning_rate": 4.990438893207684e-06, "loss": 0.6229, "step": 1866 }, { "epoch": 0.7377624104717214, "grad_norm": 0.543170513792422, "learning_rate": 4.990425199194293e-06, "loss": 0.6256, "step": 1867 }, { "epoch": 0.7381575697703137, "grad_norm": 0.5436786053164533, "learning_rate": 4.990411495400024e-06, "loss": 0.6326, "step": 1868 }, { "epoch": 0.7385527290689059, "grad_norm": 0.4779013731244117, "learning_rate": 4.9903977818249305e-06, "loss": 0.624, "step": 1869 }, { "epoch": 0.7389478883674981, "grad_norm": 0.5107248359446434, "learning_rate": 4.9903840584690675e-06, "loss": 0.6465, "step": 1870 }, { "epoch": 0.7393430476660904, "grad_norm": 0.5191444772993868, "learning_rate": 4.990370325332488e-06, "loss": 0.6215, "step": 1871 }, { "epoch": 0.7397382069646826, "grad_norm": 0.5148010616996448, "learning_rate": 4.990356582415245e-06, "loss": 0.6254, "step": 1872 }, { "epoch": 0.7401333662632749, "grad_norm": 0.4822825362300129, "learning_rate": 4.990342829717394e-06, "loss": 0.6275, "step": 1873 }, { "epoch": 0.7405285255618671, "grad_norm": 0.5111622322537774, "learning_rate": 4.9903290672389895e-06, "loss": 0.6222, "step": 1874 }, { "epoch": 0.7409236848604593, "grad_norm": 0.524971372504675, "learning_rate": 4.990315294980083e-06, "loss": 0.6329, "step": 1875 }, { "epoch": 0.7413188441590516, "grad_norm": 0.46981819397533325, "learning_rate": 4.990301512940732e-06, "loss": 0.6255, "step": 1876 }, { "epoch": 0.7417140034576438, "grad_norm": 0.5061645450254407, "learning_rate": 4.990287721120988e-06, "loss": 0.6332, "step": 1877 }, { "epoch": 0.7421091627562361, "grad_norm": 0.5081802490588557, "learning_rate": 4.990273919520906e-06, "loss": 0.6223, "step": 1878 }, { "epoch": 0.7425043220548283, "grad_norm": 0.4904066696859732, "learning_rate": 4.990260108140541e-06, "loss": 0.6403, "step": 1879 }, { "epoch": 0.7428994813534205, "grad_norm": 0.4991704339399477, "learning_rate": 4.990246286979945e-06, "loss": 0.6378, "step": 1880 }, { "epoch": 0.7432946406520129, "grad_norm": 0.4834570028068084, "learning_rate": 4.9902324560391745e-06, "loss": 0.6003, "step": 1881 }, { "epoch": 0.7436897999506051, "grad_norm": 0.4946859173466393, "learning_rate": 4.990218615318283e-06, "loss": 0.609, "step": 1882 }, { "epoch": 0.7440849592491974, "grad_norm": 0.4988829525100141, "learning_rate": 4.990204764817326e-06, "loss": 0.6338, "step": 1883 }, { "epoch": 0.7444801185477896, "grad_norm": 0.5882873111602047, "learning_rate": 4.990190904536355e-06, "loss": 0.652, "step": 1884 }, { "epoch": 0.7448752778463819, "grad_norm": 0.4988958396253975, "learning_rate": 4.990177034475427e-06, "loss": 0.626, "step": 1885 }, { "epoch": 0.7452704371449741, "grad_norm": 0.495275464799001, "learning_rate": 4.990163154634596e-06, "loss": 0.6095, "step": 1886 }, { "epoch": 0.7456655964435663, "grad_norm": 0.4919380989234139, "learning_rate": 4.990149265013916e-06, "loss": 0.6211, "step": 1887 }, { "epoch": 0.7460607557421586, "grad_norm": 0.5019734847912949, "learning_rate": 4.990135365613442e-06, "loss": 0.6299, "step": 1888 }, { "epoch": 0.7464559150407508, "grad_norm": 0.4828661219472996, "learning_rate": 4.9901214564332275e-06, "loss": 0.6202, "step": 1889 }, { "epoch": 0.7468510743393431, "grad_norm": 0.48107070668497365, "learning_rate": 4.990107537473329e-06, "loss": 0.6092, "step": 1890 }, { "epoch": 0.7472462336379353, "grad_norm": 0.4791747545680655, "learning_rate": 4.9900936087338e-06, "loss": 0.6447, "step": 1891 }, { "epoch": 0.7476413929365275, "grad_norm": 0.49731269148004137, "learning_rate": 4.990079670214696e-06, "loss": 0.6332, "step": 1892 }, { "epoch": 0.7480365522351198, "grad_norm": 0.49528011354972196, "learning_rate": 4.99006572191607e-06, "loss": 0.6231, "step": 1893 }, { "epoch": 0.748431711533712, "grad_norm": 0.4967662330741853, "learning_rate": 4.990051763837978e-06, "loss": 0.635, "step": 1894 }, { "epoch": 0.7488268708323043, "grad_norm": 0.5265304272477109, "learning_rate": 4.990037795980475e-06, "loss": 0.622, "step": 1895 }, { "epoch": 0.7492220301308965, "grad_norm": 0.49557723563808614, "learning_rate": 4.990023818343615e-06, "loss": 0.6204, "step": 1896 }, { "epoch": 0.7496171894294887, "grad_norm": 0.4721310887893004, "learning_rate": 4.9900098309274544e-06, "loss": 0.6307, "step": 1897 }, { "epoch": 0.750012348728081, "grad_norm": 0.5090822375536443, "learning_rate": 4.989995833732047e-06, "loss": 0.6415, "step": 1898 }, { "epoch": 0.7504075080266732, "grad_norm": 0.48847487647832843, "learning_rate": 4.989981826757447e-06, "loss": 0.6407, "step": 1899 }, { "epoch": 0.7508026673252655, "grad_norm": 0.4756812146928004, "learning_rate": 4.989967810003712e-06, "loss": 0.6259, "step": 1900 }, { "epoch": 0.7511978266238577, "grad_norm": 0.4824209290886366, "learning_rate": 4.989953783470895e-06, "loss": 0.6306, "step": 1901 }, { "epoch": 0.7515929859224499, "grad_norm": 0.48714913834725626, "learning_rate": 4.9899397471590505e-06, "loss": 0.6251, "step": 1902 }, { "epoch": 0.7519881452210422, "grad_norm": 0.6529166302870585, "learning_rate": 4.9899257010682355e-06, "loss": 0.628, "step": 1903 }, { "epoch": 0.7523833045196345, "grad_norm": 0.4958403702141253, "learning_rate": 4.989911645198504e-06, "loss": 0.6198, "step": 1904 }, { "epoch": 0.7527784638182268, "grad_norm": 0.4834392149932408, "learning_rate": 4.989897579549912e-06, "loss": 0.6155, "step": 1905 }, { "epoch": 0.753173623116819, "grad_norm": 0.48216115631971956, "learning_rate": 4.989883504122514e-06, "loss": 0.6083, "step": 1906 }, { "epoch": 0.7535687824154113, "grad_norm": 0.5044002777393403, "learning_rate": 4.989869418916364e-06, "loss": 0.6608, "step": 1907 }, { "epoch": 0.7539639417140035, "grad_norm": 0.4915044040208007, "learning_rate": 4.98985532393152e-06, "loss": 0.6306, "step": 1908 }, { "epoch": 0.7543591010125957, "grad_norm": 0.4808889993199748, "learning_rate": 4.989841219168037e-06, "loss": 0.6045, "step": 1909 }, { "epoch": 0.754754260311188, "grad_norm": 0.48355433928580754, "learning_rate": 4.989827104625969e-06, "loss": 0.6225, "step": 1910 }, { "epoch": 0.7551494196097802, "grad_norm": 0.4946507696546552, "learning_rate": 4.989812980305372e-06, "loss": 0.622, "step": 1911 }, { "epoch": 0.7555445789083725, "grad_norm": 0.4821780404795558, "learning_rate": 4.989798846206302e-06, "loss": 0.6334, "step": 1912 }, { "epoch": 0.7559397382069647, "grad_norm": 0.4729609905608424, "learning_rate": 4.989784702328814e-06, "loss": 0.6138, "step": 1913 }, { "epoch": 0.7563348975055569, "grad_norm": 0.47732682551126054, "learning_rate": 4.989770548672962e-06, "loss": 0.6308, "step": 1914 }, { "epoch": 0.7567300568041492, "grad_norm": 0.49741773014790336, "learning_rate": 4.9897563852388046e-06, "loss": 0.6096, "step": 1915 }, { "epoch": 0.7571252161027414, "grad_norm": 0.491814907631123, "learning_rate": 4.989742212026396e-06, "loss": 0.6148, "step": 1916 }, { "epoch": 0.7575203754013337, "grad_norm": 0.4953592826702089, "learning_rate": 4.989728029035791e-06, "loss": 0.6176, "step": 1917 }, { "epoch": 0.7579155346999259, "grad_norm": 0.5041003634543201, "learning_rate": 4.989713836267047e-06, "loss": 0.6147, "step": 1918 }, { "epoch": 0.7583106939985181, "grad_norm": 0.6158285253583131, "learning_rate": 4.989699633720218e-06, "loss": 0.6389, "step": 1919 }, { "epoch": 0.7587058532971104, "grad_norm": 0.5193736067753019, "learning_rate": 4.989685421395361e-06, "loss": 0.6441, "step": 1920 }, { "epoch": 0.7591010125957026, "grad_norm": 0.5216730101645742, "learning_rate": 4.989671199292533e-06, "loss": 0.6293, "step": 1921 }, { "epoch": 0.7594961718942949, "grad_norm": 0.4902177383288198, "learning_rate": 4.989656967411787e-06, "loss": 0.6349, "step": 1922 }, { "epoch": 0.7598913311928871, "grad_norm": 0.48667537873584354, "learning_rate": 4.9896427257531795e-06, "loss": 0.5971, "step": 1923 }, { "epoch": 0.7602864904914793, "grad_norm": 0.5011754592981034, "learning_rate": 4.9896284743167685e-06, "loss": 0.6287, "step": 1924 }, { "epoch": 0.7606816497900716, "grad_norm": 0.49307058655487895, "learning_rate": 4.989614213102608e-06, "loss": 0.6161, "step": 1925 }, { "epoch": 0.7610768090886638, "grad_norm": 0.48115110544146544, "learning_rate": 4.989599942110754e-06, "loss": 0.6203, "step": 1926 }, { "epoch": 0.7614719683872561, "grad_norm": 0.5197946460675674, "learning_rate": 4.9895856613412645e-06, "loss": 0.6194, "step": 1927 }, { "epoch": 0.7618671276858484, "grad_norm": 0.4970888947426336, "learning_rate": 4.989571370794194e-06, "loss": 0.6471, "step": 1928 }, { "epoch": 0.7622622869844407, "grad_norm": 0.5938997863900486, "learning_rate": 4.989557070469598e-06, "loss": 0.627, "step": 1929 }, { "epoch": 0.7626574462830329, "grad_norm": 0.4912343551544669, "learning_rate": 4.989542760367535e-06, "loss": 0.6073, "step": 1930 }, { "epoch": 0.7630526055816251, "grad_norm": 0.5035910915834263, "learning_rate": 4.989528440488059e-06, "loss": 0.6061, "step": 1931 }, { "epoch": 0.7634477648802174, "grad_norm": 0.47771768086688043, "learning_rate": 4.9895141108312264e-06, "loss": 0.6155, "step": 1932 }, { "epoch": 0.7638429241788096, "grad_norm": 0.49893837750925285, "learning_rate": 4.9894997713970945e-06, "loss": 0.6194, "step": 1933 }, { "epoch": 0.7642380834774019, "grad_norm": 0.48183105529501513, "learning_rate": 4.989485422185719e-06, "loss": 0.6072, "step": 1934 }, { "epoch": 0.7646332427759941, "grad_norm": 0.49437340564091187, "learning_rate": 4.989471063197157e-06, "loss": 0.6235, "step": 1935 }, { "epoch": 0.7650284020745863, "grad_norm": 0.5086950926898877, "learning_rate": 4.989456694431464e-06, "loss": 0.6204, "step": 1936 }, { "epoch": 0.7654235613731786, "grad_norm": 0.4769480906144647, "learning_rate": 4.989442315888697e-06, "loss": 0.5961, "step": 1937 }, { "epoch": 0.7658187206717708, "grad_norm": 0.49314554262535976, "learning_rate": 4.9894279275689124e-06, "loss": 0.6249, "step": 1938 }, { "epoch": 0.766213879970363, "grad_norm": 0.5154666684010718, "learning_rate": 4.989413529472166e-06, "loss": 0.6252, "step": 1939 }, { "epoch": 0.7666090392689553, "grad_norm": 0.4870804225575094, "learning_rate": 4.989399121598516e-06, "loss": 0.6125, "step": 1940 }, { "epoch": 0.7670041985675475, "grad_norm": 0.5588292063163853, "learning_rate": 4.989384703948017e-06, "loss": 0.6152, "step": 1941 }, { "epoch": 0.7673993578661398, "grad_norm": 0.49714045565400894, "learning_rate": 4.989370276520726e-06, "loss": 0.6175, "step": 1942 }, { "epoch": 0.767794517164732, "grad_norm": 0.501785140415218, "learning_rate": 4.989355839316701e-06, "loss": 0.6385, "step": 1943 }, { "epoch": 0.7681896764633243, "grad_norm": 0.4727502626196803, "learning_rate": 4.989341392335998e-06, "loss": 0.619, "step": 1944 }, { "epoch": 0.7685848357619165, "grad_norm": 0.5109703415941235, "learning_rate": 4.989326935578673e-06, "loss": 0.5984, "step": 1945 }, { "epoch": 0.7689799950605087, "grad_norm": 0.512283569257176, "learning_rate": 4.9893124690447835e-06, "loss": 0.6236, "step": 1946 }, { "epoch": 0.769375154359101, "grad_norm": 0.4941171640474088, "learning_rate": 4.989297992734386e-06, "loss": 0.6163, "step": 1947 }, { "epoch": 0.7697703136576932, "grad_norm": 0.5134713647285842, "learning_rate": 4.989283506647539e-06, "loss": 0.6033, "step": 1948 }, { "epoch": 0.7701654729562855, "grad_norm": 0.5065873835627485, "learning_rate": 4.9892690107842964e-06, "loss": 0.6306, "step": 1949 }, { "epoch": 0.7705606322548777, "grad_norm": 0.488059732764682, "learning_rate": 4.9892545051447175e-06, "loss": 0.6095, "step": 1950 }, { "epoch": 0.7709557915534699, "grad_norm": 0.5039415788029225, "learning_rate": 4.989239989728859e-06, "loss": 0.614, "step": 1951 }, { "epoch": 0.7713509508520623, "grad_norm": 0.5032937006287896, "learning_rate": 4.989225464536776e-06, "loss": 0.6511, "step": 1952 }, { "epoch": 0.7717461101506545, "grad_norm": 0.49467260366888127, "learning_rate": 4.989210929568527e-06, "loss": 0.6123, "step": 1953 }, { "epoch": 0.7721412694492468, "grad_norm": 0.5221851652881345, "learning_rate": 4.98919638482417e-06, "loss": 0.6117, "step": 1954 }, { "epoch": 0.772536428747839, "grad_norm": 0.5123539208719938, "learning_rate": 4.989181830303761e-06, "loss": 0.6081, "step": 1955 }, { "epoch": 0.7729315880464313, "grad_norm": 0.49070829163767216, "learning_rate": 4.9891672660073566e-06, "loss": 0.6169, "step": 1956 }, { "epoch": 0.7733267473450235, "grad_norm": 0.504303254265576, "learning_rate": 4.989152691935015e-06, "loss": 0.6099, "step": 1957 }, { "epoch": 0.7737219066436157, "grad_norm": 0.4823674667046708, "learning_rate": 4.989138108086793e-06, "loss": 0.6057, "step": 1958 }, { "epoch": 0.774117065942208, "grad_norm": 0.488893702740108, "learning_rate": 4.989123514462748e-06, "loss": 0.6215, "step": 1959 }, { "epoch": 0.7745122252408002, "grad_norm": 0.5065060131368263, "learning_rate": 4.989108911062938e-06, "loss": 0.6268, "step": 1960 }, { "epoch": 0.7749073845393925, "grad_norm": 0.46321552663150034, "learning_rate": 4.989094297887419e-06, "loss": 0.622, "step": 1961 }, { "epoch": 0.7753025438379847, "grad_norm": 0.5020834149871061, "learning_rate": 4.989079674936249e-06, "loss": 0.6185, "step": 1962 }, { "epoch": 0.7756977031365769, "grad_norm": 0.4981082643061992, "learning_rate": 4.989065042209486e-06, "loss": 0.6292, "step": 1963 }, { "epoch": 0.7760928624351692, "grad_norm": 0.47491786674487524, "learning_rate": 4.989050399707186e-06, "loss": 0.6186, "step": 1964 }, { "epoch": 0.7764880217337614, "grad_norm": 0.47555277744404306, "learning_rate": 4.989035747429409e-06, "loss": 0.6205, "step": 1965 }, { "epoch": 0.7768831810323537, "grad_norm": 0.5232964357542301, "learning_rate": 4.989021085376209e-06, "loss": 0.6325, "step": 1966 }, { "epoch": 0.7772783403309459, "grad_norm": 0.48296952218572786, "learning_rate": 4.989006413547647e-06, "loss": 0.6232, "step": 1967 }, { "epoch": 0.7776734996295381, "grad_norm": 0.49104169294101535, "learning_rate": 4.988991731943778e-06, "loss": 0.6095, "step": 1968 }, { "epoch": 0.7780686589281304, "grad_norm": 0.4706225330715028, "learning_rate": 4.988977040564662e-06, "loss": 0.6001, "step": 1969 }, { "epoch": 0.7784638182267226, "grad_norm": 0.4916599710014126, "learning_rate": 4.988962339410356e-06, "loss": 0.6018, "step": 1970 }, { "epoch": 0.7788589775253149, "grad_norm": 0.4930270017350692, "learning_rate": 4.988947628480917e-06, "loss": 0.6232, "step": 1971 }, { "epoch": 0.7792541368239071, "grad_norm": 0.49545246948058913, "learning_rate": 4.988932907776402e-06, "loss": 0.6423, "step": 1972 }, { "epoch": 0.7796492961224993, "grad_norm": 0.48258013902876534, "learning_rate": 4.988918177296871e-06, "loss": 0.6234, "step": 1973 }, { "epoch": 0.7800444554210916, "grad_norm": 0.4654851797716931, "learning_rate": 4.988903437042379e-06, "loss": 0.6061, "step": 1974 }, { "epoch": 0.7804396147196839, "grad_norm": 0.4709686787629398, "learning_rate": 4.988888687012988e-06, "loss": 0.6047, "step": 1975 }, { "epoch": 0.7808347740182762, "grad_norm": 0.480518188036582, "learning_rate": 4.988873927208753e-06, "loss": 0.6272, "step": 1976 }, { "epoch": 0.7812299333168684, "grad_norm": 0.4830740477285781, "learning_rate": 4.9888591576297315e-06, "loss": 0.6199, "step": 1977 }, { "epoch": 0.7816250926154606, "grad_norm": 0.49273772037327457, "learning_rate": 4.988844378275983e-06, "loss": 0.6278, "step": 1978 }, { "epoch": 0.7820202519140529, "grad_norm": 0.5246304983064629, "learning_rate": 4.988829589147566e-06, "loss": 0.642, "step": 1979 }, { "epoch": 0.7824154112126451, "grad_norm": 0.5572740103595891, "learning_rate": 4.988814790244536e-06, "loss": 0.6414, "step": 1980 }, { "epoch": 0.7828105705112374, "grad_norm": 0.5029929788394535, "learning_rate": 4.988799981566954e-06, "loss": 0.6119, "step": 1981 }, { "epoch": 0.7832057298098296, "grad_norm": 0.5256809229609704, "learning_rate": 4.988785163114876e-06, "loss": 0.6411, "step": 1982 }, { "epoch": 0.7836008891084218, "grad_norm": 0.46688383401448613, "learning_rate": 4.988770334888362e-06, "loss": 0.6118, "step": 1983 }, { "epoch": 0.7839960484070141, "grad_norm": 0.4953447635940505, "learning_rate": 4.988755496887469e-06, "loss": 0.616, "step": 1984 }, { "epoch": 0.7843912077056063, "grad_norm": 0.5398841768039435, "learning_rate": 4.988740649112256e-06, "loss": 0.6177, "step": 1985 }, { "epoch": 0.7847863670041986, "grad_norm": 0.5019947174695433, "learning_rate": 4.988725791562782e-06, "loss": 0.6296, "step": 1986 }, { "epoch": 0.7851815263027908, "grad_norm": 0.49373501939581677, "learning_rate": 4.988710924239103e-06, "loss": 0.6365, "step": 1987 }, { "epoch": 0.785576685601383, "grad_norm": 0.5151169309400688, "learning_rate": 4.988696047141278e-06, "loss": 0.5958, "step": 1988 }, { "epoch": 0.7859718448999753, "grad_norm": 0.4864586261342781, "learning_rate": 4.988681160269367e-06, "loss": 0.6389, "step": 1989 }, { "epoch": 0.7863670041985675, "grad_norm": 0.47700282379110287, "learning_rate": 4.988666263623428e-06, "loss": 0.6333, "step": 1990 }, { "epoch": 0.7867621634971598, "grad_norm": 0.5030597816770237, "learning_rate": 4.988651357203519e-06, "loss": 0.6201, "step": 1991 }, { "epoch": 0.787157322795752, "grad_norm": 0.4781975328115511, "learning_rate": 4.988636441009698e-06, "loss": 0.6175, "step": 1992 }, { "epoch": 0.7875524820943443, "grad_norm": 0.5174944437050455, "learning_rate": 4.988621515042025e-06, "loss": 0.6462, "step": 1993 }, { "epoch": 0.7879476413929365, "grad_norm": 0.5374787592098473, "learning_rate": 4.988606579300557e-06, "loss": 0.634, "step": 1994 }, { "epoch": 0.7883428006915287, "grad_norm": 0.47432495552537524, "learning_rate": 4.988591633785354e-06, "loss": 0.6332, "step": 1995 }, { "epoch": 0.788737959990121, "grad_norm": 0.48163792175353165, "learning_rate": 4.988576678496474e-06, "loss": 0.6038, "step": 1996 }, { "epoch": 0.7891331192887132, "grad_norm": 0.46928084110206164, "learning_rate": 4.988561713433977e-06, "loss": 0.6053, "step": 1997 }, { "epoch": 0.7895282785873055, "grad_norm": 0.5029560676742602, "learning_rate": 4.988546738597919e-06, "loss": 0.6366, "step": 1998 }, { "epoch": 0.7899234378858978, "grad_norm": 0.4850619096705073, "learning_rate": 4.988531753988361e-06, "loss": 0.6231, "step": 1999 }, { "epoch": 0.79031859718449, "grad_norm": 0.4946994316101753, "learning_rate": 4.988516759605363e-06, "loss": 0.6331, "step": 2000 }, { "epoch": 0.7907137564830823, "grad_norm": 0.5367867666042327, "learning_rate": 4.988501755448981e-06, "loss": 0.639, "step": 2001 }, { "epoch": 0.7911089157816745, "grad_norm": 0.5010435528360841, "learning_rate": 4.988486741519275e-06, "loss": 0.62, "step": 2002 }, { "epoch": 0.7915040750802668, "grad_norm": 0.4847944569719835, "learning_rate": 4.988471717816305e-06, "loss": 0.6629, "step": 2003 }, { "epoch": 0.791899234378859, "grad_norm": 0.7692429223559273, "learning_rate": 4.988456684340128e-06, "loss": 0.6407, "step": 2004 }, { "epoch": 0.7922943936774512, "grad_norm": 0.5018665724568167, "learning_rate": 4.9884416410908055e-06, "loss": 0.6318, "step": 2005 }, { "epoch": 0.7926895529760435, "grad_norm": 0.47570736353057425, "learning_rate": 4.988426588068394e-06, "loss": 0.6203, "step": 2006 }, { "epoch": 0.7930847122746357, "grad_norm": 0.49443151633573906, "learning_rate": 4.988411525272954e-06, "loss": 0.6314, "step": 2007 }, { "epoch": 0.793479871573228, "grad_norm": 0.47649470527071924, "learning_rate": 4.988396452704546e-06, "loss": 0.6122, "step": 2008 }, { "epoch": 0.7938750308718202, "grad_norm": 0.4959125816855865, "learning_rate": 4.988381370363227e-06, "loss": 0.6264, "step": 2009 }, { "epoch": 0.7942701901704124, "grad_norm": 0.4670880342830564, "learning_rate": 4.9883662782490576e-06, "loss": 0.5956, "step": 2010 }, { "epoch": 0.7946653494690047, "grad_norm": 0.5050700439632813, "learning_rate": 4.988351176362095e-06, "loss": 0.6234, "step": 2011 }, { "epoch": 0.7950605087675969, "grad_norm": 0.48271559918802115, "learning_rate": 4.9883360647024e-06, "loss": 0.6295, "step": 2012 }, { "epoch": 0.7954556680661892, "grad_norm": 0.4636363520372164, "learning_rate": 4.988320943270034e-06, "loss": 0.6177, "step": 2013 }, { "epoch": 0.7958508273647814, "grad_norm": 0.6019915166951819, "learning_rate": 4.988305812065053e-06, "loss": 0.6307, "step": 2014 }, { "epoch": 0.7962459866633737, "grad_norm": 0.5005076532485925, "learning_rate": 4.988290671087517e-06, "loss": 0.6331, "step": 2015 }, { "epoch": 0.7966411459619659, "grad_norm": 0.5057151799248627, "learning_rate": 4.988275520337488e-06, "loss": 0.6397, "step": 2016 }, { "epoch": 0.7970363052605581, "grad_norm": 0.4620738413199476, "learning_rate": 4.988260359815022e-06, "loss": 0.6243, "step": 2017 }, { "epoch": 0.7974314645591504, "grad_norm": 0.48041825341679295, "learning_rate": 4.988245189520181e-06, "loss": 0.6347, "step": 2018 }, { "epoch": 0.7978266238577426, "grad_norm": 0.4854380738784906, "learning_rate": 4.9882300094530236e-06, "loss": 0.6321, "step": 2019 }, { "epoch": 0.7982217831563349, "grad_norm": 0.4732758526185407, "learning_rate": 4.988214819613611e-06, "loss": 0.613, "step": 2020 }, { "epoch": 0.7986169424549271, "grad_norm": 0.4902286255287152, "learning_rate": 4.988199620002e-06, "loss": 0.642, "step": 2021 }, { "epoch": 0.7990121017535193, "grad_norm": 0.48018089371628836, "learning_rate": 4.988184410618252e-06, "loss": 0.6281, "step": 2022 }, { "epoch": 0.7994072610521117, "grad_norm": 0.48276881530797655, "learning_rate": 4.988169191462426e-06, "loss": 0.624, "step": 2023 }, { "epoch": 0.7998024203507039, "grad_norm": 0.4805829544314743, "learning_rate": 4.988153962534583e-06, "loss": 0.6355, "step": 2024 }, { "epoch": 0.8001975796492962, "grad_norm": 0.4650087756290116, "learning_rate": 4.988138723834783e-06, "loss": 0.6159, "step": 2025 }, { "epoch": 0.8005927389478884, "grad_norm": 0.500302525183259, "learning_rate": 4.9881234753630835e-06, "loss": 0.6207, "step": 2026 }, { "epoch": 0.8009878982464806, "grad_norm": 0.4633513547631363, "learning_rate": 4.988108217119547e-06, "loss": 0.603, "step": 2027 }, { "epoch": 0.8013830575450729, "grad_norm": 0.4566681685008273, "learning_rate": 4.988092949104232e-06, "loss": 0.6074, "step": 2028 }, { "epoch": 0.8017782168436651, "grad_norm": 0.4988424175671541, "learning_rate": 4.988077671317198e-06, "loss": 0.6013, "step": 2029 }, { "epoch": 0.8021733761422574, "grad_norm": 0.4929308612546276, "learning_rate": 4.988062383758506e-06, "loss": 0.6244, "step": 2030 }, { "epoch": 0.8025685354408496, "grad_norm": 0.4781963632718603, "learning_rate": 4.988047086428217e-06, "loss": 0.6197, "step": 2031 }, { "epoch": 0.8029636947394418, "grad_norm": 0.6162145939572514, "learning_rate": 4.988031779326389e-06, "loss": 0.6274, "step": 2032 }, { "epoch": 0.8033588540380341, "grad_norm": 0.48495525673379924, "learning_rate": 4.988016462453082e-06, "loss": 0.6293, "step": 2033 }, { "epoch": 0.8037540133366263, "grad_norm": 0.5043930111987597, "learning_rate": 4.988001135808358e-06, "loss": 0.6138, "step": 2034 }, { "epoch": 0.8041491726352186, "grad_norm": 0.46666576704300994, "learning_rate": 4.987985799392277e-06, "loss": 0.6072, "step": 2035 }, { "epoch": 0.8045443319338108, "grad_norm": 0.48805370345241733, "learning_rate": 4.987970453204898e-06, "loss": 0.6249, "step": 2036 }, { "epoch": 0.804939491232403, "grad_norm": 0.48377604545911973, "learning_rate": 4.987955097246282e-06, "loss": 0.617, "step": 2037 }, { "epoch": 0.8053346505309953, "grad_norm": 0.4720053686199778, "learning_rate": 4.987939731516489e-06, "loss": 0.6046, "step": 2038 }, { "epoch": 0.8057298098295875, "grad_norm": 0.5828932827171168, "learning_rate": 4.987924356015579e-06, "loss": 0.6315, "step": 2039 }, { "epoch": 0.8061249691281798, "grad_norm": 0.4891162177690198, "learning_rate": 4.987908970743614e-06, "loss": 0.6332, "step": 2040 }, { "epoch": 0.806520128426772, "grad_norm": 0.4827837446920491, "learning_rate": 4.987893575700652e-06, "loss": 0.6258, "step": 2041 }, { "epoch": 0.8069152877253642, "grad_norm": 0.46077989778395895, "learning_rate": 4.987878170886755e-06, "loss": 0.5977, "step": 2042 }, { "epoch": 0.8073104470239565, "grad_norm": 0.48314109012199535, "learning_rate": 4.987862756301984e-06, "loss": 0.6335, "step": 2043 }, { "epoch": 0.8077056063225487, "grad_norm": 0.47542653192334977, "learning_rate": 4.987847331946398e-06, "loss": 0.6009, "step": 2044 }, { "epoch": 0.808100765621141, "grad_norm": 0.46969812561265184, "learning_rate": 4.987831897820059e-06, "loss": 0.6262, "step": 2045 }, { "epoch": 0.8084959249197333, "grad_norm": 0.4697880436971334, "learning_rate": 4.987816453923027e-06, "loss": 0.6212, "step": 2046 }, { "epoch": 0.8088910842183256, "grad_norm": 0.5290819232360799, "learning_rate": 4.987801000255362e-06, "loss": 0.6359, "step": 2047 }, { "epoch": 0.8092862435169178, "grad_norm": 0.4774269454534689, "learning_rate": 4.987785536817127e-06, "loss": 0.6069, "step": 2048 }, { "epoch": 0.80968140281551, "grad_norm": 0.4925043786131469, "learning_rate": 4.987770063608379e-06, "loss": 0.6237, "step": 2049 }, { "epoch": 0.8100765621141023, "grad_norm": 0.48913030453903555, "learning_rate": 4.987754580629182e-06, "loss": 0.617, "step": 2050 }, { "epoch": 0.8104717214126945, "grad_norm": 0.4870701601969778, "learning_rate": 4.987739087879596e-06, "loss": 0.6152, "step": 2051 }, { "epoch": 0.8108668807112868, "grad_norm": 0.4732126768516727, "learning_rate": 4.987723585359681e-06, "loss": 0.619, "step": 2052 }, { "epoch": 0.811262040009879, "grad_norm": 0.4889602356350729, "learning_rate": 4.987708073069498e-06, "loss": 0.6352, "step": 2053 }, { "epoch": 0.8116571993084712, "grad_norm": 0.48795631546959883, "learning_rate": 4.9876925510091085e-06, "loss": 0.6143, "step": 2054 }, { "epoch": 0.8120523586070635, "grad_norm": 0.46539271557990114, "learning_rate": 4.987677019178573e-06, "loss": 0.6046, "step": 2055 }, { "epoch": 0.8124475179056557, "grad_norm": 0.49405822426322776, "learning_rate": 4.987661477577953e-06, "loss": 0.6014, "step": 2056 }, { "epoch": 0.812842677204248, "grad_norm": 0.48386934068049814, "learning_rate": 4.98764592620731e-06, "loss": 0.6001, "step": 2057 }, { "epoch": 0.8132378365028402, "grad_norm": 0.5187696026651084, "learning_rate": 4.987630365066703e-06, "loss": 0.6317, "step": 2058 }, { "epoch": 0.8136329958014324, "grad_norm": 0.5042378749814072, "learning_rate": 4.987614794156196e-06, "loss": 0.6258, "step": 2059 }, { "epoch": 0.8140281551000247, "grad_norm": 0.4644958030378567, "learning_rate": 4.987599213475848e-06, "loss": 0.6111, "step": 2060 }, { "epoch": 0.8144233143986169, "grad_norm": 0.45523155834489054, "learning_rate": 4.98758362302572e-06, "loss": 0.614, "step": 2061 }, { "epoch": 0.8148184736972092, "grad_norm": 0.4798484565189588, "learning_rate": 4.987568022805875e-06, "loss": 0.6285, "step": 2062 }, { "epoch": 0.8152136329958014, "grad_norm": 0.48279313141569785, "learning_rate": 4.987552412816373e-06, "loss": 0.6223, "step": 2063 }, { "epoch": 0.8156087922943936, "grad_norm": 0.452316831060907, "learning_rate": 4.9875367930572764e-06, "loss": 0.5993, "step": 2064 }, { "epoch": 0.8160039515929859, "grad_norm": 0.4964123620781851, "learning_rate": 4.987521163528645e-06, "loss": 0.6159, "step": 2065 }, { "epoch": 0.8163991108915781, "grad_norm": 0.487979292436224, "learning_rate": 4.9875055242305414e-06, "loss": 0.6027, "step": 2066 }, { "epoch": 0.8167942701901704, "grad_norm": 0.46013675854663044, "learning_rate": 4.987489875163027e-06, "loss": 0.6076, "step": 2067 }, { "epoch": 0.8171894294887626, "grad_norm": 0.4929449077318516, "learning_rate": 4.987474216326162e-06, "loss": 0.6387, "step": 2068 }, { "epoch": 0.8175845887873548, "grad_norm": 0.4845064197629273, "learning_rate": 4.987458547720009e-06, "loss": 0.624, "step": 2069 }, { "epoch": 0.8179797480859472, "grad_norm": 0.5780674060390795, "learning_rate": 4.987442869344629e-06, "loss": 0.6473, "step": 2070 }, { "epoch": 0.8183749073845394, "grad_norm": 0.5093404649783988, "learning_rate": 4.987427181200084e-06, "loss": 0.6399, "step": 2071 }, { "epoch": 0.8187700666831317, "grad_norm": 0.5027970250091759, "learning_rate": 4.987411483286436e-06, "loss": 0.6364, "step": 2072 }, { "epoch": 0.8191652259817239, "grad_norm": 0.4681596123762207, "learning_rate": 4.987395775603746e-06, "loss": 0.5992, "step": 2073 }, { "epoch": 0.8195603852803162, "grad_norm": 0.4774118024924558, "learning_rate": 4.987380058152076e-06, "loss": 0.6076, "step": 2074 }, { "epoch": 0.8199555445789084, "grad_norm": 0.4758447061485989, "learning_rate": 4.987364330931487e-06, "loss": 0.6348, "step": 2075 }, { "epoch": 0.8203507038775006, "grad_norm": 0.4820431900570343, "learning_rate": 4.9873485939420405e-06, "loss": 0.6126, "step": 2076 }, { "epoch": 0.8207458631760929, "grad_norm": 0.46836863605198487, "learning_rate": 4.987332847183801e-06, "loss": 0.6143, "step": 2077 }, { "epoch": 0.8211410224746851, "grad_norm": 0.47555062710148005, "learning_rate": 4.987317090656827e-06, "loss": 0.6142, "step": 2078 }, { "epoch": 0.8215361817732774, "grad_norm": 0.4870036833640785, "learning_rate": 4.987301324361182e-06, "loss": 0.6083, "step": 2079 }, { "epoch": 0.8219313410718696, "grad_norm": 0.4844543461263999, "learning_rate": 4.9872855482969284e-06, "loss": 0.6168, "step": 2080 }, { "epoch": 0.8223265003704618, "grad_norm": 0.4746621256551825, "learning_rate": 4.987269762464127e-06, "loss": 0.62, "step": 2081 }, { "epoch": 0.8227216596690541, "grad_norm": 0.4718461178840169, "learning_rate": 4.987253966862841e-06, "loss": 0.626, "step": 2082 }, { "epoch": 0.8231168189676463, "grad_norm": 0.47940330337486675, "learning_rate": 4.987238161493132e-06, "loss": 0.6384, "step": 2083 }, { "epoch": 0.8235119782662386, "grad_norm": 0.4831189596082736, "learning_rate": 4.987222346355061e-06, "loss": 0.6006, "step": 2084 }, { "epoch": 0.8239071375648308, "grad_norm": 0.5111828704378958, "learning_rate": 4.987206521448691e-06, "loss": 0.6275, "step": 2085 }, { "epoch": 0.824302296863423, "grad_norm": 0.4761799453175209, "learning_rate": 4.987190686774084e-06, "loss": 0.6166, "step": 2086 }, { "epoch": 0.8246974561620153, "grad_norm": 0.479237633298224, "learning_rate": 4.987174842331303e-06, "loss": 0.6157, "step": 2087 }, { "epoch": 0.8250926154606075, "grad_norm": 0.4877816511875846, "learning_rate": 4.9871589881204085e-06, "loss": 0.633, "step": 2088 }, { "epoch": 0.8254877747591998, "grad_norm": 0.5597118422036577, "learning_rate": 4.987143124141465e-06, "loss": 0.6281, "step": 2089 }, { "epoch": 0.825882934057792, "grad_norm": 0.4808185647219305, "learning_rate": 4.987127250394532e-06, "loss": 0.6284, "step": 2090 }, { "epoch": 0.8262780933563842, "grad_norm": 0.49350572331519976, "learning_rate": 4.987111366879674e-06, "loss": 0.6327, "step": 2091 }, { "epoch": 0.8266732526549765, "grad_norm": 0.4950625588459782, "learning_rate": 4.987095473596954e-06, "loss": 0.5991, "step": 2092 }, { "epoch": 0.8270684119535687, "grad_norm": 0.48717004314629225, "learning_rate": 4.987079570546432e-06, "loss": 0.6125, "step": 2093 }, { "epoch": 0.8274635712521611, "grad_norm": 0.477632541157664, "learning_rate": 4.987063657728172e-06, "loss": 0.6184, "step": 2094 }, { "epoch": 0.8278587305507533, "grad_norm": 0.490920143190879, "learning_rate": 4.987047735142236e-06, "loss": 0.6112, "step": 2095 }, { "epoch": 0.8282538898493456, "grad_norm": 0.4963440592377515, "learning_rate": 4.9870318027886874e-06, "loss": 0.6122, "step": 2096 }, { "epoch": 0.8286490491479378, "grad_norm": 0.4867870903292657, "learning_rate": 4.9870158606675875e-06, "loss": 0.5993, "step": 2097 }, { "epoch": 0.82904420844653, "grad_norm": 0.4976198098833027, "learning_rate": 4.986999908779e-06, "loss": 0.6465, "step": 2098 }, { "epoch": 0.8294393677451223, "grad_norm": 0.4870232342621309, "learning_rate": 4.986983947122986e-06, "loss": 0.6201, "step": 2099 }, { "epoch": 0.8298345270437145, "grad_norm": 0.46830678342177057, "learning_rate": 4.9869679756996105e-06, "loss": 0.6107, "step": 2100 }, { "epoch": 0.8302296863423068, "grad_norm": 0.6549407593688914, "learning_rate": 4.986951994508934e-06, "loss": 0.6266, "step": 2101 }, { "epoch": 0.830624845640899, "grad_norm": 0.47125597170756056, "learning_rate": 4.98693600355102e-06, "loss": 0.6293, "step": 2102 }, { "epoch": 0.8310200049394912, "grad_norm": 0.491814138267252, "learning_rate": 4.9869200028259325e-06, "loss": 0.6273, "step": 2103 }, { "epoch": 0.8314151642380835, "grad_norm": 0.4992441103556944, "learning_rate": 4.986903992333734e-06, "loss": 0.6485, "step": 2104 }, { "epoch": 0.8318103235366757, "grad_norm": 0.4838463362223243, "learning_rate": 4.986887972074485e-06, "loss": 0.6233, "step": 2105 }, { "epoch": 0.832205482835268, "grad_norm": 0.5162232947613259, "learning_rate": 4.986871942048252e-06, "loss": 0.5937, "step": 2106 }, { "epoch": 0.8326006421338602, "grad_norm": 0.491642550409257, "learning_rate": 4.986855902255094e-06, "loss": 0.6275, "step": 2107 }, { "epoch": 0.8329958014324524, "grad_norm": 0.4697163915777976, "learning_rate": 4.9868398526950765e-06, "loss": 0.6012, "step": 2108 }, { "epoch": 0.8333909607310447, "grad_norm": 0.5029059751532935, "learning_rate": 4.986823793368263e-06, "loss": 0.6184, "step": 2109 }, { "epoch": 0.8337861200296369, "grad_norm": 0.49890701973097873, "learning_rate": 4.9868077242747156e-06, "loss": 0.6489, "step": 2110 }, { "epoch": 0.8341812793282292, "grad_norm": 0.47086406769377864, "learning_rate": 4.986791645414498e-06, "loss": 0.6158, "step": 2111 }, { "epoch": 0.8345764386268214, "grad_norm": 0.4711411719674825, "learning_rate": 4.986775556787672e-06, "loss": 0.6108, "step": 2112 }, { "epoch": 0.8349715979254136, "grad_norm": 0.5170133429016198, "learning_rate": 4.986759458394302e-06, "loss": 0.6205, "step": 2113 }, { "epoch": 0.8353667572240059, "grad_norm": 0.48113051440505156, "learning_rate": 4.986743350234451e-06, "loss": 0.5975, "step": 2114 }, { "epoch": 0.8357619165225981, "grad_norm": 0.4867487446570137, "learning_rate": 4.986727232308182e-06, "loss": 0.6193, "step": 2115 }, { "epoch": 0.8361570758211904, "grad_norm": 0.5120874526734966, "learning_rate": 4.986711104615558e-06, "loss": 0.6233, "step": 2116 }, { "epoch": 0.8365522351197827, "grad_norm": 0.5108538780702159, "learning_rate": 4.986694967156644e-06, "loss": 0.6112, "step": 2117 }, { "epoch": 0.836947394418375, "grad_norm": 0.5009432477935026, "learning_rate": 4.986678819931501e-06, "loss": 0.6129, "step": 2118 }, { "epoch": 0.8373425537169672, "grad_norm": 0.5394279204944199, "learning_rate": 4.986662662940193e-06, "loss": 0.6328, "step": 2119 }, { "epoch": 0.8377377130155594, "grad_norm": 0.5074352200988693, "learning_rate": 4.986646496182786e-06, "loss": 0.6297, "step": 2120 }, { "epoch": 0.8381328723141517, "grad_norm": 0.4623558465170015, "learning_rate": 4.98663031965934e-06, "loss": 0.5988, "step": 2121 }, { "epoch": 0.8385280316127439, "grad_norm": 0.5048081966048025, "learning_rate": 4.9866141333699215e-06, "loss": 0.6172, "step": 2122 }, { "epoch": 0.8389231909113362, "grad_norm": 0.4969733235844862, "learning_rate": 4.986597937314591e-06, "loss": 0.6126, "step": 2123 }, { "epoch": 0.8393183502099284, "grad_norm": 0.4644808410529684, "learning_rate": 4.986581731493415e-06, "loss": 0.6122, "step": 2124 }, { "epoch": 0.8397135095085206, "grad_norm": 0.49621141240128863, "learning_rate": 4.986565515906455e-06, "loss": 0.627, "step": 2125 }, { "epoch": 0.8401086688071129, "grad_norm": 0.5140052050923385, "learning_rate": 4.986549290553777e-06, "loss": 0.6076, "step": 2126 }, { "epoch": 0.8405038281057051, "grad_norm": 0.4774110621495217, "learning_rate": 4.986533055435442e-06, "loss": 0.638, "step": 2127 }, { "epoch": 0.8408989874042974, "grad_norm": 0.4831024360099805, "learning_rate": 4.986516810551515e-06, "loss": 0.5982, "step": 2128 }, { "epoch": 0.8412941467028896, "grad_norm": 0.489641326798273, "learning_rate": 4.9865005559020605e-06, "loss": 0.6139, "step": 2129 }, { "epoch": 0.8416893060014818, "grad_norm": 0.4700688822738762, "learning_rate": 4.986484291487142e-06, "loss": 0.6094, "step": 2130 }, { "epoch": 0.8420844653000741, "grad_norm": 0.5498327067539832, "learning_rate": 4.9864680173068215e-06, "loss": 0.6159, "step": 2131 }, { "epoch": 0.8424796245986663, "grad_norm": 0.5209813131829413, "learning_rate": 4.986451733361165e-06, "loss": 0.6058, "step": 2132 }, { "epoch": 0.8428747838972586, "grad_norm": 0.4755760046484086, "learning_rate": 4.986435439650236e-06, "loss": 0.6139, "step": 2133 }, { "epoch": 0.8432699431958508, "grad_norm": 0.4980942846797616, "learning_rate": 4.9864191361741e-06, "loss": 0.6253, "step": 2134 }, { "epoch": 0.843665102494443, "grad_norm": 0.5023114342986108, "learning_rate": 4.986402822932818e-06, "loss": 0.6174, "step": 2135 }, { "epoch": 0.8440602617930353, "grad_norm": 0.487624480191535, "learning_rate": 4.986386499926456e-06, "loss": 0.6156, "step": 2136 }, { "epoch": 0.8444554210916275, "grad_norm": 0.47126111959332073, "learning_rate": 4.986370167155078e-06, "loss": 0.604, "step": 2137 }, { "epoch": 0.8448505803902198, "grad_norm": 0.4859939062367633, "learning_rate": 4.986353824618747e-06, "loss": 0.6085, "step": 2138 }, { "epoch": 0.845245739688812, "grad_norm": 0.4865870625029928, "learning_rate": 4.9863374723175285e-06, "loss": 0.6155, "step": 2139 }, { "epoch": 0.8456408989874042, "grad_norm": 0.5154805544761918, "learning_rate": 4.9863211102514855e-06, "loss": 0.6092, "step": 2140 }, { "epoch": 0.8460360582859966, "grad_norm": 0.479204794222624, "learning_rate": 4.986304738420684e-06, "loss": 0.6202, "step": 2141 }, { "epoch": 0.8464312175845888, "grad_norm": 0.4854511339075718, "learning_rate": 4.986288356825186e-06, "loss": 0.5973, "step": 2142 }, { "epoch": 0.8468263768831811, "grad_norm": 0.4947439475538887, "learning_rate": 4.986271965465058e-06, "loss": 0.606, "step": 2143 }, { "epoch": 0.8472215361817733, "grad_norm": 0.4772366979723403, "learning_rate": 4.9862555643403634e-06, "loss": 0.6412, "step": 2144 }, { "epoch": 0.8476166954803656, "grad_norm": 0.45202495018365624, "learning_rate": 4.986239153451167e-06, "loss": 0.6009, "step": 2145 }, { "epoch": 0.8480118547789578, "grad_norm": 0.4674899554356609, "learning_rate": 4.986222732797532e-06, "loss": 0.6083, "step": 2146 }, { "epoch": 0.84840701407755, "grad_norm": 0.4720229690067215, "learning_rate": 4.986206302379524e-06, "loss": 0.6193, "step": 2147 }, { "epoch": 0.8488021733761423, "grad_norm": 0.5068197579171579, "learning_rate": 4.986189862197208e-06, "loss": 0.6188, "step": 2148 }, { "epoch": 0.8491973326747345, "grad_norm": 0.475281381106489, "learning_rate": 4.9861734122506475e-06, "loss": 0.6115, "step": 2149 }, { "epoch": 0.8495924919733268, "grad_norm": 0.46986989066929974, "learning_rate": 4.986156952539908e-06, "loss": 0.6269, "step": 2150 }, { "epoch": 0.849987651271919, "grad_norm": 0.46811282834432916, "learning_rate": 4.986140483065053e-06, "loss": 0.6215, "step": 2151 }, { "epoch": 0.8503828105705112, "grad_norm": 0.4910081036842562, "learning_rate": 4.986124003826148e-06, "loss": 0.6181, "step": 2152 }, { "epoch": 0.8507779698691035, "grad_norm": 0.4872472385487101, "learning_rate": 4.986107514823257e-06, "loss": 0.6192, "step": 2153 }, { "epoch": 0.8511731291676957, "grad_norm": 0.47883592093053695, "learning_rate": 4.986091016056446e-06, "loss": 0.6201, "step": 2154 }, { "epoch": 0.851568288466288, "grad_norm": 0.485056443392643, "learning_rate": 4.986074507525779e-06, "loss": 0.6156, "step": 2155 }, { "epoch": 0.8519634477648802, "grad_norm": 0.4833961803335379, "learning_rate": 4.986057989231321e-06, "loss": 0.5979, "step": 2156 }, { "epoch": 0.8523586070634724, "grad_norm": 0.48491015695364686, "learning_rate": 4.9860414611731375e-06, "loss": 0.6379, "step": 2157 }, { "epoch": 0.8527537663620647, "grad_norm": 0.47943852412179266, "learning_rate": 4.986024923351292e-06, "loss": 0.6266, "step": 2158 }, { "epoch": 0.8531489256606569, "grad_norm": 0.4899269774275039, "learning_rate": 4.9860083757658505e-06, "loss": 0.6266, "step": 2159 }, { "epoch": 0.8535440849592492, "grad_norm": 0.47032529957003527, "learning_rate": 4.985991818416877e-06, "loss": 0.6491, "step": 2160 }, { "epoch": 0.8539392442578414, "grad_norm": 0.4902687851540872, "learning_rate": 4.9859752513044375e-06, "loss": 0.6301, "step": 2161 }, { "epoch": 0.8543344035564336, "grad_norm": 0.48553682325045194, "learning_rate": 4.985958674428597e-06, "loss": 0.6061, "step": 2162 }, { "epoch": 0.8547295628550259, "grad_norm": 0.48411182285405713, "learning_rate": 4.98594208778942e-06, "loss": 0.6247, "step": 2163 }, { "epoch": 0.8551247221536181, "grad_norm": 0.47951482859948863, "learning_rate": 4.985925491386973e-06, "loss": 0.6312, "step": 2164 }, { "epoch": 0.8555198814522105, "grad_norm": 0.4746537184476964, "learning_rate": 4.98590888522132e-06, "loss": 0.6109, "step": 2165 }, { "epoch": 0.8559150407508027, "grad_norm": 0.4730418292508103, "learning_rate": 4.985892269292526e-06, "loss": 0.6161, "step": 2166 }, { "epoch": 0.856310200049395, "grad_norm": 0.4904685249853375, "learning_rate": 4.985875643600656e-06, "loss": 0.6025, "step": 2167 }, { "epoch": 0.8567053593479872, "grad_norm": 0.48706770829729257, "learning_rate": 4.985859008145777e-06, "loss": 0.6083, "step": 2168 }, { "epoch": 0.8571005186465794, "grad_norm": 0.4789542146071817, "learning_rate": 4.9858423629279525e-06, "loss": 0.6022, "step": 2169 }, { "epoch": 0.8574956779451717, "grad_norm": 0.4764312440150958, "learning_rate": 4.98582570794725e-06, "loss": 0.6269, "step": 2170 }, { "epoch": 0.8578908372437639, "grad_norm": 0.4905768390433796, "learning_rate": 4.985809043203732e-06, "loss": 0.6202, "step": 2171 }, { "epoch": 0.8582859965423562, "grad_norm": 0.48600066838590206, "learning_rate": 4.9857923686974664e-06, "loss": 0.6293, "step": 2172 }, { "epoch": 0.8586811558409484, "grad_norm": 0.5278954101657808, "learning_rate": 4.985775684428518e-06, "loss": 0.626, "step": 2173 }, { "epoch": 0.8590763151395406, "grad_norm": 0.4772206937537348, "learning_rate": 4.985758990396952e-06, "loss": 0.6127, "step": 2174 }, { "epoch": 0.8594714744381329, "grad_norm": 0.5032599137233393, "learning_rate": 4.985742286602834e-06, "loss": 0.6247, "step": 2175 }, { "epoch": 0.8598666337367251, "grad_norm": 0.4890130823058908, "learning_rate": 4.985725573046229e-06, "loss": 0.6138, "step": 2176 }, { "epoch": 0.8602617930353174, "grad_norm": 0.5090384794958555, "learning_rate": 4.985708849727205e-06, "loss": 0.6221, "step": 2177 }, { "epoch": 0.8606569523339096, "grad_norm": 0.5064691369911446, "learning_rate": 4.985692116645825e-06, "loss": 0.6365, "step": 2178 }, { "epoch": 0.8610521116325018, "grad_norm": 0.4884256917672367, "learning_rate": 4.985675373802155e-06, "loss": 0.6245, "step": 2179 }, { "epoch": 0.8614472709310941, "grad_norm": 0.48277194568900933, "learning_rate": 4.9856586211962636e-06, "loss": 0.6303, "step": 2180 }, { "epoch": 0.8618424302296863, "grad_norm": 0.4796258764505248, "learning_rate": 4.985641858828213e-06, "loss": 0.5917, "step": 2181 }, { "epoch": 0.8622375895282786, "grad_norm": 0.4742026777974947, "learning_rate": 4.985625086698071e-06, "loss": 0.6014, "step": 2182 }, { "epoch": 0.8626327488268708, "grad_norm": 0.4798722649367731, "learning_rate": 4.9856083048059025e-06, "loss": 0.6058, "step": 2183 }, { "epoch": 0.863027908125463, "grad_norm": 0.484402428507696, "learning_rate": 4.985591513151775e-06, "loss": 0.6244, "step": 2184 }, { "epoch": 0.8634230674240553, "grad_norm": 0.4859733762622107, "learning_rate": 4.985574711735752e-06, "loss": 0.6098, "step": 2185 }, { "epoch": 0.8638182267226475, "grad_norm": 0.4810113395505376, "learning_rate": 4.985557900557902e-06, "loss": 0.6142, "step": 2186 }, { "epoch": 0.8642133860212398, "grad_norm": 0.504453340452416, "learning_rate": 4.985541079618289e-06, "loss": 0.6289, "step": 2187 }, { "epoch": 0.8646085453198321, "grad_norm": 0.4855393706146718, "learning_rate": 4.985524248916981e-06, "loss": 0.6215, "step": 2188 }, { "epoch": 0.8650037046184244, "grad_norm": 0.4816372880869928, "learning_rate": 4.985507408454042e-06, "loss": 0.6214, "step": 2189 }, { "epoch": 0.8653988639170166, "grad_norm": 0.4971312058231278, "learning_rate": 4.98549055822954e-06, "loss": 0.6184, "step": 2190 }, { "epoch": 0.8657940232156088, "grad_norm": 0.4737419973037183, "learning_rate": 4.985473698243539e-06, "loss": 0.6362, "step": 2191 }, { "epoch": 0.8661891825142011, "grad_norm": 0.4819594165338205, "learning_rate": 4.985456828496108e-06, "loss": 0.6265, "step": 2192 }, { "epoch": 0.8665843418127933, "grad_norm": 0.4713033614828359, "learning_rate": 4.985439948987311e-06, "loss": 0.6062, "step": 2193 }, { "epoch": 0.8669795011113856, "grad_norm": 0.4731870858016755, "learning_rate": 4.985423059717216e-06, "loss": 0.6222, "step": 2194 }, { "epoch": 0.8673746604099778, "grad_norm": 0.4835316201063381, "learning_rate": 4.9854061606858875e-06, "loss": 0.636, "step": 2195 }, { "epoch": 0.86776981970857, "grad_norm": 0.48541889377733993, "learning_rate": 4.985389251893393e-06, "loss": 0.6183, "step": 2196 }, { "epoch": 0.8681649790071623, "grad_norm": 0.49391047126615967, "learning_rate": 4.985372333339799e-06, "loss": 0.6044, "step": 2197 }, { "epoch": 0.8685601383057545, "grad_norm": 0.5015005291120314, "learning_rate": 4.985355405025172e-06, "loss": 0.6142, "step": 2198 }, { "epoch": 0.8689552976043468, "grad_norm": 0.4898017203240075, "learning_rate": 4.985338466949577e-06, "loss": 0.6299, "step": 2199 }, { "epoch": 0.869350456902939, "grad_norm": 0.49139107555550443, "learning_rate": 4.985321519113083e-06, "loss": 0.5891, "step": 2200 }, { "epoch": 0.8697456162015312, "grad_norm": 0.4653030042469201, "learning_rate": 4.985304561515754e-06, "loss": 0.6056, "step": 2201 }, { "epoch": 0.8701407755001235, "grad_norm": 0.48204406635629615, "learning_rate": 4.985287594157659e-06, "loss": 0.6149, "step": 2202 }, { "epoch": 0.8705359347987157, "grad_norm": 0.4944551279372508, "learning_rate": 4.9852706170388635e-06, "loss": 0.6122, "step": 2203 }, { "epoch": 0.870931094097308, "grad_norm": 0.4632483556840172, "learning_rate": 4.985253630159434e-06, "loss": 0.6117, "step": 2204 }, { "epoch": 0.8713262533959002, "grad_norm": 0.4758369257065419, "learning_rate": 4.9852366335194365e-06, "loss": 0.631, "step": 2205 }, { "epoch": 0.8717214126944924, "grad_norm": 0.4784261261826601, "learning_rate": 4.985219627118939e-06, "loss": 0.6254, "step": 2206 }, { "epoch": 0.8721165719930847, "grad_norm": 0.46820459867095077, "learning_rate": 4.985202610958008e-06, "loss": 0.6041, "step": 2207 }, { "epoch": 0.8725117312916769, "grad_norm": 0.6774502372821436, "learning_rate": 4.98518558503671e-06, "loss": 0.6349, "step": 2208 }, { "epoch": 0.8729068905902692, "grad_norm": 0.477847341760241, "learning_rate": 4.985168549355113e-06, "loss": 0.5999, "step": 2209 }, { "epoch": 0.8733020498888614, "grad_norm": 0.4709206605453225, "learning_rate": 4.985151503913283e-06, "loss": 0.6219, "step": 2210 }, { "epoch": 0.8736972091874536, "grad_norm": 0.5073393890746453, "learning_rate": 4.985134448711285e-06, "loss": 0.6131, "step": 2211 }, { "epoch": 0.874092368486046, "grad_norm": 0.47895189242680536, "learning_rate": 4.98511738374919e-06, "loss": 0.6276, "step": 2212 }, { "epoch": 0.8744875277846382, "grad_norm": 0.4732689476332835, "learning_rate": 4.985100309027062e-06, "loss": 0.6256, "step": 2213 }, { "epoch": 0.8748826870832305, "grad_norm": 0.4682497810387466, "learning_rate": 4.985083224544969e-06, "loss": 0.6024, "step": 2214 }, { "epoch": 0.8752778463818227, "grad_norm": 0.5044101641071467, "learning_rate": 4.985066130302979e-06, "loss": 0.6292, "step": 2215 }, { "epoch": 0.875673005680415, "grad_norm": 0.5079557931579949, "learning_rate": 4.985049026301158e-06, "loss": 0.6228, "step": 2216 }, { "epoch": 0.8760681649790072, "grad_norm": 0.4867583650129786, "learning_rate": 4.985031912539572e-06, "loss": 0.6211, "step": 2217 }, { "epoch": 0.8764633242775994, "grad_norm": 0.48476520801738077, "learning_rate": 4.985014789018291e-06, "loss": 0.5925, "step": 2218 }, { "epoch": 0.8768584835761917, "grad_norm": 0.49568692882589194, "learning_rate": 4.9849976557373805e-06, "loss": 0.6223, "step": 2219 }, { "epoch": 0.8772536428747839, "grad_norm": 0.5244025646487809, "learning_rate": 4.984980512696908e-06, "loss": 0.6113, "step": 2220 }, { "epoch": 0.8776488021733762, "grad_norm": 0.4647233758348524, "learning_rate": 4.984963359896941e-06, "loss": 0.6245, "step": 2221 }, { "epoch": 0.8780439614719684, "grad_norm": 0.493296948580835, "learning_rate": 4.984946197337548e-06, "loss": 0.6289, "step": 2222 }, { "epoch": 0.8784391207705606, "grad_norm": 0.48487371283817793, "learning_rate": 4.984929025018794e-06, "loss": 0.618, "step": 2223 }, { "epoch": 0.8788342800691529, "grad_norm": 0.4628323988563299, "learning_rate": 4.9849118429407486e-06, "loss": 0.6116, "step": 2224 }, { "epoch": 0.8792294393677451, "grad_norm": 0.4708565386034292, "learning_rate": 4.984894651103478e-06, "loss": 0.6073, "step": 2225 }, { "epoch": 0.8796245986663374, "grad_norm": 0.47596980477482004, "learning_rate": 4.98487744950705e-06, "loss": 0.6134, "step": 2226 }, { "epoch": 0.8800197579649296, "grad_norm": 0.48436684059520585, "learning_rate": 4.984860238151533e-06, "loss": 0.6306, "step": 2227 }, { "epoch": 0.8804149172635218, "grad_norm": 2.901406553646194, "learning_rate": 4.984843017036993e-06, "loss": 0.6343, "step": 2228 }, { "epoch": 0.8808100765621141, "grad_norm": 0.5052255791290899, "learning_rate": 4.984825786163499e-06, "loss": 0.6131, "step": 2229 }, { "epoch": 0.8812052358607063, "grad_norm": 0.4790543312612506, "learning_rate": 4.984808545531118e-06, "loss": 0.6215, "step": 2230 }, { "epoch": 0.8816003951592986, "grad_norm": 0.46921948088144955, "learning_rate": 4.984791295139917e-06, "loss": 0.6004, "step": 2231 }, { "epoch": 0.8819955544578908, "grad_norm": 0.4774028382073232, "learning_rate": 4.984774034989965e-06, "loss": 0.6351, "step": 2232 }, { "epoch": 0.882390713756483, "grad_norm": 0.48613593924598036, "learning_rate": 4.98475676508133e-06, "loss": 0.6223, "step": 2233 }, { "epoch": 0.8827858730550753, "grad_norm": 0.47488141594904604, "learning_rate": 4.9847394854140796e-06, "loss": 0.6361, "step": 2234 }, { "epoch": 0.8831810323536676, "grad_norm": 0.469545225131135, "learning_rate": 4.984722195988281e-06, "loss": 0.6227, "step": 2235 }, { "epoch": 0.8835761916522599, "grad_norm": 0.5460234062255062, "learning_rate": 4.984704896804003e-06, "loss": 0.5994, "step": 2236 }, { "epoch": 0.8839713509508521, "grad_norm": 0.5087695798593977, "learning_rate": 4.984687587861311e-06, "loss": 0.6432, "step": 2237 }, { "epoch": 0.8843665102494443, "grad_norm": 0.49848654741613146, "learning_rate": 4.984670269160277e-06, "loss": 0.6081, "step": 2238 }, { "epoch": 0.8847616695480366, "grad_norm": 0.47222113192747045, "learning_rate": 4.984652940700966e-06, "loss": 0.5955, "step": 2239 }, { "epoch": 0.8851568288466288, "grad_norm": 0.47776469283687745, "learning_rate": 4.984635602483447e-06, "loss": 0.6107, "step": 2240 }, { "epoch": 0.8855519881452211, "grad_norm": 0.4761875116964049, "learning_rate": 4.984618254507788e-06, "loss": 0.6104, "step": 2241 }, { "epoch": 0.8859471474438133, "grad_norm": 0.4772606111305406, "learning_rate": 4.984600896774058e-06, "loss": 0.612, "step": 2242 }, { "epoch": 0.8863423067424056, "grad_norm": 0.4875078890714507, "learning_rate": 4.984583529282323e-06, "loss": 0.6046, "step": 2243 }, { "epoch": 0.8867374660409978, "grad_norm": 0.5351729625323042, "learning_rate": 4.984566152032654e-06, "loss": 0.6302, "step": 2244 }, { "epoch": 0.88713262533959, "grad_norm": 0.5134865903542387, "learning_rate": 4.984548765025117e-06, "loss": 0.6398, "step": 2245 }, { "epoch": 0.8875277846381823, "grad_norm": 0.47943572967975917, "learning_rate": 4.984531368259782e-06, "loss": 0.583, "step": 2246 }, { "epoch": 0.8879229439367745, "grad_norm": 0.48674628922099783, "learning_rate": 4.984513961736716e-06, "loss": 0.6189, "step": 2247 }, { "epoch": 0.8883181032353668, "grad_norm": 0.5054075145507174, "learning_rate": 4.984496545455988e-06, "loss": 0.6236, "step": 2248 }, { "epoch": 0.888713262533959, "grad_norm": 0.4717072903007971, "learning_rate": 4.984479119417666e-06, "loss": 0.6108, "step": 2249 }, { "epoch": 0.8891084218325512, "grad_norm": 0.4973332505120243, "learning_rate": 4.984461683621818e-06, "loss": 0.6141, "step": 2250 }, { "epoch": 0.8895035811311435, "grad_norm": 0.5202503935334306, "learning_rate": 4.984444238068515e-06, "loss": 0.611, "step": 2251 }, { "epoch": 0.8898987404297357, "grad_norm": 0.4868713771823043, "learning_rate": 4.984426782757822e-06, "loss": 0.623, "step": 2252 }, { "epoch": 0.890293899728328, "grad_norm": 0.4797581634559235, "learning_rate": 4.984409317689809e-06, "loss": 0.6192, "step": 2253 }, { "epoch": 0.8906890590269202, "grad_norm": 0.49427284931935045, "learning_rate": 4.984391842864546e-06, "loss": 0.6149, "step": 2254 }, { "epoch": 0.8910842183255124, "grad_norm": 0.4869169065393114, "learning_rate": 4.9843743582821005e-06, "loss": 0.621, "step": 2255 }, { "epoch": 0.8914793776241047, "grad_norm": 0.4876328379453221, "learning_rate": 4.98435686394254e-06, "loss": 0.5969, "step": 2256 }, { "epoch": 0.8918745369226969, "grad_norm": 0.4832121246843812, "learning_rate": 4.984339359845935e-06, "loss": 0.6265, "step": 2257 }, { "epoch": 0.8922696962212892, "grad_norm": 0.566324624003132, "learning_rate": 4.9843218459923535e-06, "loss": 0.6097, "step": 2258 }, { "epoch": 0.8926648555198815, "grad_norm": 0.49414708906211646, "learning_rate": 4.9843043223818646e-06, "loss": 0.6336, "step": 2259 }, { "epoch": 0.8930600148184737, "grad_norm": 0.4670689454184053, "learning_rate": 4.984286789014536e-06, "loss": 0.6231, "step": 2260 }, { "epoch": 0.893455174117066, "grad_norm": 0.47425632523334776, "learning_rate": 4.984269245890438e-06, "loss": 0.6116, "step": 2261 }, { "epoch": 0.8938503334156582, "grad_norm": 0.481098132153482, "learning_rate": 4.98425169300964e-06, "loss": 0.6339, "step": 2262 }, { "epoch": 0.8942454927142505, "grad_norm": 0.4881412900652154, "learning_rate": 4.984234130372209e-06, "loss": 0.617, "step": 2263 }, { "epoch": 0.8946406520128427, "grad_norm": 0.49122021239183733, "learning_rate": 4.984216557978214e-06, "loss": 0.6199, "step": 2264 }, { "epoch": 0.895035811311435, "grad_norm": 0.4890052599536881, "learning_rate": 4.9841989758277255e-06, "loss": 0.6359, "step": 2265 }, { "epoch": 0.8954309706100272, "grad_norm": 0.5014418130563002, "learning_rate": 4.984181383920812e-06, "loss": 0.6162, "step": 2266 }, { "epoch": 0.8958261299086194, "grad_norm": 0.48774048149914556, "learning_rate": 4.984163782257543e-06, "loss": 0.6175, "step": 2267 }, { "epoch": 0.8962212892072117, "grad_norm": 0.4619145815324718, "learning_rate": 4.9841461708379865e-06, "loss": 0.6129, "step": 2268 }, { "epoch": 0.8966164485058039, "grad_norm": 0.4706359033079191, "learning_rate": 4.9841285496622124e-06, "loss": 0.6217, "step": 2269 }, { "epoch": 0.8970116078043961, "grad_norm": 0.5330737136007412, "learning_rate": 4.984110918730289e-06, "loss": 0.6302, "step": 2270 }, { "epoch": 0.8974067671029884, "grad_norm": 0.4929056846260133, "learning_rate": 4.984093278042288e-06, "loss": 0.6106, "step": 2271 }, { "epoch": 0.8978019264015806, "grad_norm": 0.4780478045107924, "learning_rate": 4.984075627598276e-06, "loss": 0.6254, "step": 2272 }, { "epoch": 0.8981970857001729, "grad_norm": 0.48039178372010166, "learning_rate": 4.984057967398324e-06, "loss": 0.613, "step": 2273 }, { "epoch": 0.8985922449987651, "grad_norm": 0.4902534031318573, "learning_rate": 4.984040297442499e-06, "loss": 0.6196, "step": 2274 }, { "epoch": 0.8989874042973574, "grad_norm": 0.48251041315749466, "learning_rate": 4.9840226177308745e-06, "loss": 0.635, "step": 2275 }, { "epoch": 0.8993825635959496, "grad_norm": 0.5107948423795806, "learning_rate": 4.984004928263516e-06, "loss": 0.6371, "step": 2276 }, { "epoch": 0.8997777228945418, "grad_norm": 0.4849099292899462, "learning_rate": 4.983987229040495e-06, "loss": 0.5988, "step": 2277 }, { "epoch": 0.9001728821931341, "grad_norm": 0.45033030391921003, "learning_rate": 4.9839695200618804e-06, "loss": 0.5957, "step": 2278 }, { "epoch": 0.9005680414917263, "grad_norm": 0.48511884291874885, "learning_rate": 4.9839518013277425e-06, "loss": 0.5963, "step": 2279 }, { "epoch": 0.9009632007903186, "grad_norm": 0.4818554448134295, "learning_rate": 4.983934072838149e-06, "loss": 0.6199, "step": 2280 }, { "epoch": 0.9013583600889108, "grad_norm": 0.4864050546230534, "learning_rate": 4.983916334593171e-06, "loss": 0.6268, "step": 2281 }, { "epoch": 0.901753519387503, "grad_norm": 0.48126531375723003, "learning_rate": 4.9838985865928794e-06, "loss": 0.645, "step": 2282 }, { "epoch": 0.9021486786860954, "grad_norm": 0.47513264097168434, "learning_rate": 4.9838808288373405e-06, "loss": 0.628, "step": 2283 }, { "epoch": 0.9025438379846876, "grad_norm": 0.4858642717988499, "learning_rate": 4.983863061326627e-06, "loss": 0.6167, "step": 2284 }, { "epoch": 0.9029389972832799, "grad_norm": 0.5024695807134403, "learning_rate": 4.983845284060808e-06, "loss": 0.6108, "step": 2285 }, { "epoch": 0.9033341565818721, "grad_norm": 0.47725860181210494, "learning_rate": 4.983827497039953e-06, "loss": 0.5905, "step": 2286 }, { "epoch": 0.9037293158804643, "grad_norm": 0.4725442142623689, "learning_rate": 4.983809700264131e-06, "loss": 0.6251, "step": 2287 }, { "epoch": 0.9041244751790566, "grad_norm": 0.4917289285832563, "learning_rate": 4.9837918937334125e-06, "loss": 0.642, "step": 2288 }, { "epoch": 0.9045196344776488, "grad_norm": 0.49254790176708696, "learning_rate": 4.983774077447869e-06, "loss": 0.6035, "step": 2289 }, { "epoch": 0.9049147937762411, "grad_norm": 0.47663891087530386, "learning_rate": 4.983756251407569e-06, "loss": 0.6165, "step": 2290 }, { "epoch": 0.9053099530748333, "grad_norm": 0.46771621207039893, "learning_rate": 4.983738415612581e-06, "loss": 0.5982, "step": 2291 }, { "epoch": 0.9057051123734255, "grad_norm": 0.48007080844590533, "learning_rate": 4.983720570062979e-06, "loss": 0.6118, "step": 2292 }, { "epoch": 0.9061002716720178, "grad_norm": 0.47976244296713044, "learning_rate": 4.9837027147588294e-06, "loss": 0.6042, "step": 2293 }, { "epoch": 0.90649543097061, "grad_norm": 0.5070646462309771, "learning_rate": 4.983684849700204e-06, "loss": 0.6336, "step": 2294 }, { "epoch": 0.9068905902692023, "grad_norm": 0.4746142937787273, "learning_rate": 4.983666974887172e-06, "loss": 0.6275, "step": 2295 }, { "epoch": 0.9072857495677945, "grad_norm": 0.4773646681690295, "learning_rate": 4.983649090319806e-06, "loss": 0.6222, "step": 2296 }, { "epoch": 0.9076809088663867, "grad_norm": 0.4870955712187313, "learning_rate": 4.983631195998173e-06, "loss": 0.6336, "step": 2297 }, { "epoch": 0.908076068164979, "grad_norm": 0.5385238468634763, "learning_rate": 4.983613291922345e-06, "loss": 0.6218, "step": 2298 }, { "epoch": 0.9084712274635712, "grad_norm": 0.4704713115373773, "learning_rate": 4.983595378092393e-06, "loss": 0.6295, "step": 2299 }, { "epoch": 0.9088663867621635, "grad_norm": 0.5046914027772894, "learning_rate": 4.9835774545083856e-06, "loss": 0.6145, "step": 2300 }, { "epoch": 0.9092615460607557, "grad_norm": 0.4839085038708857, "learning_rate": 4.983559521170394e-06, "loss": 0.6254, "step": 2301 }, { "epoch": 0.909656705359348, "grad_norm": 0.5087717682587891, "learning_rate": 4.98354157807849e-06, "loss": 0.6161, "step": 2302 }, { "epoch": 0.9100518646579402, "grad_norm": 0.46452927922636916, "learning_rate": 4.983523625232741e-06, "loss": 0.621, "step": 2303 }, { "epoch": 0.9104470239565324, "grad_norm": 0.46904690117204156, "learning_rate": 4.983505662633221e-06, "loss": 0.6, "step": 2304 }, { "epoch": 0.9108421832551247, "grad_norm": 0.5301523549946244, "learning_rate": 4.983487690279998e-06, "loss": 0.6086, "step": 2305 }, { "epoch": 0.911237342553717, "grad_norm": 0.5060704545892506, "learning_rate": 4.983469708173143e-06, "loss": 0.6229, "step": 2306 }, { "epoch": 0.9116325018523093, "grad_norm": 0.4813420559082125, "learning_rate": 4.9834517163127275e-06, "loss": 0.6211, "step": 2307 }, { "epoch": 0.9120276611509015, "grad_norm": 0.5220914006598277, "learning_rate": 4.983433714698821e-06, "loss": 0.6152, "step": 2308 }, { "epoch": 0.9124228204494937, "grad_norm": 0.49458336480253934, "learning_rate": 4.983415703331496e-06, "loss": 0.5969, "step": 2309 }, { "epoch": 0.912817979748086, "grad_norm": 0.48129884758049346, "learning_rate": 4.983397682210821e-06, "loss": 0.621, "step": 2310 }, { "epoch": 0.9132131390466782, "grad_norm": 0.4727468640393369, "learning_rate": 4.983379651336869e-06, "loss": 0.6197, "step": 2311 }, { "epoch": 0.9136082983452705, "grad_norm": 0.49587024829033005, "learning_rate": 4.983361610709709e-06, "loss": 0.6351, "step": 2312 }, { "epoch": 0.9140034576438627, "grad_norm": 0.4887985778014849, "learning_rate": 4.983343560329413e-06, "loss": 0.6036, "step": 2313 }, { "epoch": 0.914398616942455, "grad_norm": 0.5094208232865344, "learning_rate": 4.983325500196051e-06, "loss": 0.6281, "step": 2314 }, { "epoch": 0.9147937762410472, "grad_norm": 0.520779337465162, "learning_rate": 4.983307430309695e-06, "loss": 0.5966, "step": 2315 }, { "epoch": 0.9151889355396394, "grad_norm": 0.4983724492164491, "learning_rate": 4.983289350670415e-06, "loss": 0.6002, "step": 2316 }, { "epoch": 0.9155840948382317, "grad_norm": 0.4977585587251415, "learning_rate": 4.983271261278282e-06, "loss": 0.6048, "step": 2317 }, { "epoch": 0.9159792541368239, "grad_norm": 0.5074344584060536, "learning_rate": 4.983253162133368e-06, "loss": 0.6061, "step": 2318 }, { "epoch": 0.9163744134354161, "grad_norm": 0.47393303007323573, "learning_rate": 4.983235053235743e-06, "loss": 0.6107, "step": 2319 }, { "epoch": 0.9167695727340084, "grad_norm": 0.48092444595983924, "learning_rate": 4.983216934585478e-06, "loss": 0.6252, "step": 2320 }, { "epoch": 0.9171647320326006, "grad_norm": 0.5173009133356737, "learning_rate": 4.983198806182647e-06, "loss": 0.6342, "step": 2321 }, { "epoch": 0.9175598913311929, "grad_norm": 0.4823899650053501, "learning_rate": 4.983180668027317e-06, "loss": 0.6254, "step": 2322 }, { "epoch": 0.9179550506297851, "grad_norm": 0.48307570444067927, "learning_rate": 4.9831625201195625e-06, "loss": 0.6074, "step": 2323 }, { "epoch": 0.9183502099283773, "grad_norm": 0.5114385837411004, "learning_rate": 4.983144362459452e-06, "loss": 0.5995, "step": 2324 }, { "epoch": 0.9187453692269696, "grad_norm": 0.49319776581630537, "learning_rate": 4.9831261950470595e-06, "loss": 0.6033, "step": 2325 }, { "epoch": 0.9191405285255618, "grad_norm": 0.48172601408960075, "learning_rate": 4.9831080178824545e-06, "loss": 0.6144, "step": 2326 }, { "epoch": 0.9195356878241541, "grad_norm": 0.6998545259966715, "learning_rate": 4.983089830965709e-06, "loss": 0.5958, "step": 2327 }, { "epoch": 0.9199308471227463, "grad_norm": 0.47578948749489164, "learning_rate": 4.983071634296895e-06, "loss": 0.6066, "step": 2328 }, { "epoch": 0.9203260064213385, "grad_norm": 0.4924183719099034, "learning_rate": 4.983053427876083e-06, "loss": 0.6328, "step": 2329 }, { "epoch": 0.9207211657199309, "grad_norm": 0.46828818805254213, "learning_rate": 4.983035211703345e-06, "loss": 0.6272, "step": 2330 }, { "epoch": 0.9211163250185231, "grad_norm": 0.5014119318637013, "learning_rate": 4.9830169857787524e-06, "loss": 0.6223, "step": 2331 }, { "epoch": 0.9215114843171154, "grad_norm": 0.509639756966488, "learning_rate": 4.982998750102378e-06, "loss": 0.6451, "step": 2332 }, { "epoch": 0.9219066436157076, "grad_norm": 0.4874868797487242, "learning_rate": 4.982980504674291e-06, "loss": 0.6198, "step": 2333 }, { "epoch": 0.9223018029142999, "grad_norm": 0.4910768108400356, "learning_rate": 4.982962249494564e-06, "loss": 0.6133, "step": 2334 }, { "epoch": 0.9226969622128921, "grad_norm": 0.600038361094374, "learning_rate": 4.98294398456327e-06, "loss": 0.5906, "step": 2335 }, { "epoch": 0.9230921215114843, "grad_norm": 0.4882683625038398, "learning_rate": 4.982925709880479e-06, "loss": 0.6216, "step": 2336 }, { "epoch": 0.9234872808100766, "grad_norm": 0.4877944511418589, "learning_rate": 4.982907425446264e-06, "loss": 0.6142, "step": 2337 }, { "epoch": 0.9238824401086688, "grad_norm": 0.47849837901974185, "learning_rate": 4.982889131260696e-06, "loss": 0.629, "step": 2338 }, { "epoch": 0.9242775994072611, "grad_norm": 0.49657514695675, "learning_rate": 4.982870827323847e-06, "loss": 0.6344, "step": 2339 }, { "epoch": 0.9246727587058533, "grad_norm": 0.4756867812643004, "learning_rate": 4.98285251363579e-06, "loss": 0.5892, "step": 2340 }, { "epoch": 0.9250679180044455, "grad_norm": 0.4792345020023528, "learning_rate": 4.982834190196595e-06, "loss": 0.6405, "step": 2341 }, { "epoch": 0.9254630773030378, "grad_norm": 0.47568126691061463, "learning_rate": 4.982815857006336e-06, "loss": 0.6218, "step": 2342 }, { "epoch": 0.92585823660163, "grad_norm": 0.4622150942965156, "learning_rate": 4.982797514065083e-06, "loss": 0.5966, "step": 2343 }, { "epoch": 0.9262533959002223, "grad_norm": 0.48062685913560826, "learning_rate": 4.982779161372909e-06, "loss": 0.6067, "step": 2344 }, { "epoch": 0.9266485551988145, "grad_norm": 0.49832908386195485, "learning_rate": 4.9827607989298874e-06, "loss": 0.6213, "step": 2345 }, { "epoch": 0.9270437144974067, "grad_norm": 0.4651264905943558, "learning_rate": 4.982742426736088e-06, "loss": 0.6104, "step": 2346 }, { "epoch": 0.927438873795999, "grad_norm": 0.4627917129880916, "learning_rate": 4.982724044791584e-06, "loss": 0.5947, "step": 2347 }, { "epoch": 0.9278340330945912, "grad_norm": 0.47459773400411004, "learning_rate": 4.982705653096447e-06, "loss": 0.6171, "step": 2348 }, { "epoch": 0.9282291923931835, "grad_norm": 0.4754459015910213, "learning_rate": 4.982687251650751e-06, "loss": 0.5867, "step": 2349 }, { "epoch": 0.9286243516917757, "grad_norm": 0.4693623306765774, "learning_rate": 4.9826688404545655e-06, "loss": 0.6016, "step": 2350 }, { "epoch": 0.929019510990368, "grad_norm": 0.4595592610393575, "learning_rate": 4.982650419507965e-06, "loss": 0.6083, "step": 2351 }, { "epoch": 0.9294146702889602, "grad_norm": 1.0329197407008157, "learning_rate": 4.982631988811022e-06, "loss": 0.6151, "step": 2352 }, { "epoch": 0.9298098295875524, "grad_norm": 0.46434445302211896, "learning_rate": 4.982613548363807e-06, "loss": 0.6109, "step": 2353 }, { "epoch": 0.9302049888861448, "grad_norm": 0.4720496810881737, "learning_rate": 4.982595098166394e-06, "loss": 0.6135, "step": 2354 }, { "epoch": 0.930600148184737, "grad_norm": 0.46567296934174945, "learning_rate": 4.982576638218855e-06, "loss": 0.6092, "step": 2355 }, { "epoch": 0.9309953074833293, "grad_norm": 0.4783627968108407, "learning_rate": 4.982558168521263e-06, "loss": 0.6226, "step": 2356 }, { "epoch": 0.9313904667819215, "grad_norm": 0.46771411865600776, "learning_rate": 4.982539689073689e-06, "loss": 0.6216, "step": 2357 }, { "epoch": 0.9317856260805137, "grad_norm": 0.4824264375712755, "learning_rate": 4.982521199876207e-06, "loss": 0.6331, "step": 2358 }, { "epoch": 0.932180785379106, "grad_norm": 0.4870691371321396, "learning_rate": 4.9825027009288896e-06, "loss": 0.6275, "step": 2359 }, { "epoch": 0.9325759446776982, "grad_norm": 0.48392613415399993, "learning_rate": 4.982484192231808e-06, "loss": 0.6143, "step": 2360 }, { "epoch": 0.9329711039762905, "grad_norm": 0.4622678197643799, "learning_rate": 4.9824656737850365e-06, "loss": 0.6188, "step": 2361 }, { "epoch": 0.9333662632748827, "grad_norm": 0.49520864864087916, "learning_rate": 4.982447145588648e-06, "loss": 0.5983, "step": 2362 }, { "epoch": 0.9337614225734749, "grad_norm": 0.49238331015657344, "learning_rate": 4.982428607642713e-06, "loss": 0.6304, "step": 2363 }, { "epoch": 0.9341565818720672, "grad_norm": 0.46810832966740035, "learning_rate": 4.9824100599473065e-06, "loss": 0.6057, "step": 2364 }, { "epoch": 0.9345517411706594, "grad_norm": 0.4789733237570732, "learning_rate": 4.982391502502501e-06, "loss": 0.5931, "step": 2365 }, { "epoch": 0.9349469004692517, "grad_norm": 0.48015565027094287, "learning_rate": 4.98237293530837e-06, "loss": 0.6304, "step": 2366 }, { "epoch": 0.9353420597678439, "grad_norm": 0.4851134573753921, "learning_rate": 4.9823543583649846e-06, "loss": 0.6296, "step": 2367 }, { "epoch": 0.9357372190664361, "grad_norm": 0.48585992460960437, "learning_rate": 4.982335771672418e-06, "loss": 0.5901, "step": 2368 }, { "epoch": 0.9361323783650284, "grad_norm": 0.47063745016814984, "learning_rate": 4.982317175230744e-06, "loss": 0.6024, "step": 2369 }, { "epoch": 0.9365275376636206, "grad_norm": 0.4799775780201179, "learning_rate": 4.982298569040036e-06, "loss": 0.6158, "step": 2370 }, { "epoch": 0.9369226969622129, "grad_norm": 0.487398135567962, "learning_rate": 4.982279953100366e-06, "loss": 0.6207, "step": 2371 }, { "epoch": 0.9373178562608051, "grad_norm": 0.4707569265394489, "learning_rate": 4.9822613274118085e-06, "loss": 0.6129, "step": 2372 }, { "epoch": 0.9377130155593973, "grad_norm": 0.48484895295531333, "learning_rate": 4.9822426919744355e-06, "loss": 0.5927, "step": 2373 }, { "epoch": 0.9381081748579896, "grad_norm": 0.5086631550313602, "learning_rate": 4.9822240467883205e-06, "loss": 0.6513, "step": 2374 }, { "epoch": 0.9385033341565818, "grad_norm": 0.4854307750752647, "learning_rate": 4.9822053918535366e-06, "loss": 0.639, "step": 2375 }, { "epoch": 0.9388984934551741, "grad_norm": 0.49908919962486925, "learning_rate": 4.982186727170157e-06, "loss": 0.6171, "step": 2376 }, { "epoch": 0.9392936527537664, "grad_norm": 0.47151786983504285, "learning_rate": 4.982168052738255e-06, "loss": 0.6127, "step": 2377 }, { "epoch": 0.9396888120523587, "grad_norm": 0.48250797464606165, "learning_rate": 4.982149368557905e-06, "loss": 0.636, "step": 2378 }, { "epoch": 0.9400839713509509, "grad_norm": 0.4839100853335766, "learning_rate": 4.982130674629179e-06, "loss": 0.6175, "step": 2379 }, { "epoch": 0.9404791306495431, "grad_norm": 0.46593074774551896, "learning_rate": 4.982111970952151e-06, "loss": 0.6, "step": 2380 }, { "epoch": 0.9408742899481354, "grad_norm": 0.4726472043264624, "learning_rate": 4.982093257526894e-06, "loss": 0.6306, "step": 2381 }, { "epoch": 0.9412694492467276, "grad_norm": 0.4707519207258955, "learning_rate": 4.982074534353482e-06, "loss": 0.6089, "step": 2382 }, { "epoch": 0.9416646085453199, "grad_norm": 0.47810887989613376, "learning_rate": 4.9820558014319895e-06, "loss": 0.6068, "step": 2383 }, { "epoch": 0.9420597678439121, "grad_norm": 0.46370942325328973, "learning_rate": 4.9820370587624875e-06, "loss": 0.5938, "step": 2384 }, { "epoch": 0.9424549271425043, "grad_norm": 0.5035785783227827, "learning_rate": 4.9820183063450525e-06, "loss": 0.6265, "step": 2385 }, { "epoch": 0.9428500864410966, "grad_norm": 0.5024383370795096, "learning_rate": 4.981999544179756e-06, "loss": 0.6236, "step": 2386 }, { "epoch": 0.9432452457396888, "grad_norm": 0.45990102437488745, "learning_rate": 4.981980772266672e-06, "loss": 0.5894, "step": 2387 }, { "epoch": 0.9436404050382811, "grad_norm": 0.4654023117324646, "learning_rate": 4.981961990605876e-06, "loss": 0.5898, "step": 2388 }, { "epoch": 0.9440355643368733, "grad_norm": 0.4762087512767081, "learning_rate": 4.981943199197439e-06, "loss": 0.6056, "step": 2389 }, { "epoch": 0.9444307236354655, "grad_norm": 0.4749986650314227, "learning_rate": 4.981924398041436e-06, "loss": 0.6135, "step": 2390 }, { "epoch": 0.9448258829340578, "grad_norm": 0.4719526074802648, "learning_rate": 4.981905587137943e-06, "loss": 0.5893, "step": 2391 }, { "epoch": 0.94522104223265, "grad_norm": 0.4755247323470361, "learning_rate": 4.9818867664870306e-06, "loss": 0.6073, "step": 2392 }, { "epoch": 0.9456162015312423, "grad_norm": 0.48675290749713584, "learning_rate": 4.981867936088774e-06, "loss": 0.5924, "step": 2393 }, { "epoch": 0.9460113608298345, "grad_norm": 0.4743633378571163, "learning_rate": 4.9818490959432485e-06, "loss": 0.62, "step": 2394 }, { "epoch": 0.9464065201284267, "grad_norm": 0.49288950026353556, "learning_rate": 4.981830246050526e-06, "loss": 0.6045, "step": 2395 }, { "epoch": 0.946801679427019, "grad_norm": 0.4884082163788517, "learning_rate": 4.981811386410681e-06, "loss": 0.6118, "step": 2396 }, { "epoch": 0.9471968387256112, "grad_norm": 0.4818633127585905, "learning_rate": 4.981792517023788e-06, "loss": 0.6154, "step": 2397 }, { "epoch": 0.9475919980242035, "grad_norm": 0.5212061592296549, "learning_rate": 4.981773637889921e-06, "loss": 0.5999, "step": 2398 }, { "epoch": 0.9479871573227957, "grad_norm": 0.4921362608346308, "learning_rate": 4.981754749009154e-06, "loss": 0.6248, "step": 2399 }, { "epoch": 0.9483823166213879, "grad_norm": 0.6375277486862304, "learning_rate": 4.9817358503815616e-06, "loss": 0.6205, "step": 2400 }, { "epoch": 0.9487774759199803, "grad_norm": 0.4822713530796236, "learning_rate": 4.9817169420072175e-06, "loss": 0.6068, "step": 2401 }, { "epoch": 0.9491726352185725, "grad_norm": 0.45735313205333067, "learning_rate": 4.981698023886197e-06, "loss": 0.6045, "step": 2402 }, { "epoch": 0.9495677945171648, "grad_norm": 0.4870688284599884, "learning_rate": 4.9816790960185725e-06, "loss": 0.6169, "step": 2403 }, { "epoch": 0.949962953815757, "grad_norm": 0.4508977591163079, "learning_rate": 4.981660158404421e-06, "loss": 0.6081, "step": 2404 }, { "epoch": 0.9503581131143493, "grad_norm": 0.48177734751539525, "learning_rate": 4.981641211043814e-06, "loss": 0.6075, "step": 2405 }, { "epoch": 0.9507532724129415, "grad_norm": 0.45900423930549045, "learning_rate": 4.981622253936828e-06, "loss": 0.5804, "step": 2406 }, { "epoch": 0.9511484317115337, "grad_norm": 0.46636251306928633, "learning_rate": 4.9816032870835366e-06, "loss": 0.6093, "step": 2407 }, { "epoch": 0.951543591010126, "grad_norm": 0.49173914273079045, "learning_rate": 4.981584310484014e-06, "loss": 0.6423, "step": 2408 }, { "epoch": 0.9519387503087182, "grad_norm": 0.4871067267842084, "learning_rate": 4.981565324138336e-06, "loss": 0.6404, "step": 2409 }, { "epoch": 0.9523339096073105, "grad_norm": 0.48801903916719275, "learning_rate": 4.981546328046575e-06, "loss": 0.6091, "step": 2410 }, { "epoch": 0.9527290689059027, "grad_norm": 0.47631638584783426, "learning_rate": 4.9815273222088075e-06, "loss": 0.6086, "step": 2411 }, { "epoch": 0.9531242282044949, "grad_norm": 0.45326422779082315, "learning_rate": 4.981508306625108e-06, "loss": 0.612, "step": 2412 }, { "epoch": 0.9535193875030872, "grad_norm": 0.5469615293144247, "learning_rate": 4.981489281295549e-06, "loss": 0.5988, "step": 2413 }, { "epoch": 0.9539145468016794, "grad_norm": 0.49738406140305297, "learning_rate": 4.9814702462202094e-06, "loss": 0.6225, "step": 2414 }, { "epoch": 0.9543097061002717, "grad_norm": 0.4621955995020316, "learning_rate": 4.98145120139916e-06, "loss": 0.6016, "step": 2415 }, { "epoch": 0.9547048653988639, "grad_norm": 0.46293528627278396, "learning_rate": 4.981432146832478e-06, "loss": 0.6214, "step": 2416 }, { "epoch": 0.9551000246974561, "grad_norm": 0.49663994492008307, "learning_rate": 4.981413082520236e-06, "loss": 0.6133, "step": 2417 }, { "epoch": 0.9554951839960484, "grad_norm": 0.4817582642896156, "learning_rate": 4.981394008462511e-06, "loss": 0.6238, "step": 2418 }, { "epoch": 0.9558903432946406, "grad_norm": 0.5586123030609345, "learning_rate": 4.981374924659377e-06, "loss": 0.6243, "step": 2419 }, { "epoch": 0.9562855025932329, "grad_norm": 0.4850500636019948, "learning_rate": 4.9813558311109095e-06, "loss": 0.622, "step": 2420 }, { "epoch": 0.9566806618918251, "grad_norm": 0.4689441752707809, "learning_rate": 4.9813367278171835e-06, "loss": 0.5765, "step": 2421 }, { "epoch": 0.9570758211904173, "grad_norm": 0.47158958076654856, "learning_rate": 4.981317614778272e-06, "loss": 0.6296, "step": 2422 }, { "epoch": 0.9574709804890096, "grad_norm": 0.514101025815252, "learning_rate": 4.981298491994252e-06, "loss": 0.6234, "step": 2423 }, { "epoch": 0.9578661397876018, "grad_norm": 0.48399944464009775, "learning_rate": 4.981279359465199e-06, "loss": 0.6107, "step": 2424 }, { "epoch": 0.9582612990861942, "grad_norm": 0.48729938368312087, "learning_rate": 4.981260217191187e-06, "loss": 0.5893, "step": 2425 }, { "epoch": 0.9586564583847864, "grad_norm": 0.49004477541134556, "learning_rate": 4.981241065172292e-06, "loss": 0.6008, "step": 2426 }, { "epoch": 0.9590516176833787, "grad_norm": 0.48347415116566556, "learning_rate": 4.9812219034085886e-06, "loss": 0.618, "step": 2427 }, { "epoch": 0.9594467769819709, "grad_norm": 0.4793590490738295, "learning_rate": 4.981202731900152e-06, "loss": 0.6251, "step": 2428 }, { "epoch": 0.9598419362805631, "grad_norm": 0.48347957011954834, "learning_rate": 4.9811835506470575e-06, "loss": 0.6144, "step": 2429 }, { "epoch": 0.9602370955791554, "grad_norm": 0.5000951926275022, "learning_rate": 4.981164359649381e-06, "loss": 0.6217, "step": 2430 }, { "epoch": 0.9606322548777476, "grad_norm": 0.4853268721052706, "learning_rate": 4.981145158907198e-06, "loss": 0.6107, "step": 2431 }, { "epoch": 0.9610274141763399, "grad_norm": 0.4775051457662627, "learning_rate": 4.981125948420583e-06, "loss": 0.619, "step": 2432 }, { "epoch": 0.9614225734749321, "grad_norm": 0.4610581366616115, "learning_rate": 4.981106728189612e-06, "loss": 0.6011, "step": 2433 }, { "epoch": 0.9618177327735243, "grad_norm": 0.46908821425073965, "learning_rate": 4.98108749821436e-06, "loss": 0.5991, "step": 2434 }, { "epoch": 0.9622128920721166, "grad_norm": 0.4643140368630478, "learning_rate": 4.981068258494903e-06, "loss": 0.5848, "step": 2435 }, { "epoch": 0.9626080513707088, "grad_norm": 0.48640039378791994, "learning_rate": 4.981049009031317e-06, "loss": 0.6175, "step": 2436 }, { "epoch": 0.9630032106693011, "grad_norm": 0.4707625851268911, "learning_rate": 4.9810297498236765e-06, "loss": 0.5903, "step": 2437 }, { "epoch": 0.9633983699678933, "grad_norm": 0.47186949155654456, "learning_rate": 4.981010480872058e-06, "loss": 0.6051, "step": 2438 }, { "epoch": 0.9637935292664855, "grad_norm": 0.47253458553373584, "learning_rate": 4.980991202176536e-06, "loss": 0.6135, "step": 2439 }, { "epoch": 0.9641886885650778, "grad_norm": 0.4739469724306039, "learning_rate": 4.980971913737188e-06, "loss": 0.5884, "step": 2440 }, { "epoch": 0.96458384786367, "grad_norm": 0.47756433353936395, "learning_rate": 4.980952615554089e-06, "loss": 0.6054, "step": 2441 }, { "epoch": 0.9649790071622623, "grad_norm": 0.46338471724667013, "learning_rate": 4.980933307627315e-06, "loss": 0.611, "step": 2442 }, { "epoch": 0.9653741664608545, "grad_norm": 0.487680669856884, "learning_rate": 4.98091398995694e-06, "loss": 0.6192, "step": 2443 }, { "epoch": 0.9657693257594467, "grad_norm": 0.5227924200190431, "learning_rate": 4.9808946625430425e-06, "loss": 0.6034, "step": 2444 }, { "epoch": 0.966164485058039, "grad_norm": 0.49032597318513343, "learning_rate": 4.980875325385697e-06, "loss": 0.588, "step": 2445 }, { "epoch": 0.9665596443566312, "grad_norm": 0.46454318608059514, "learning_rate": 4.98085597848498e-06, "loss": 0.5968, "step": 2446 }, { "epoch": 0.9669548036552235, "grad_norm": 0.5078562623229934, "learning_rate": 4.980836621840967e-06, "loss": 0.6195, "step": 2447 }, { "epoch": 0.9673499629538158, "grad_norm": 0.4752546102671055, "learning_rate": 4.980817255453734e-06, "loss": 0.6118, "step": 2448 }, { "epoch": 0.967745122252408, "grad_norm": 0.4833480233978842, "learning_rate": 4.980797879323357e-06, "loss": 0.6261, "step": 2449 }, { "epoch": 0.9681402815510003, "grad_norm": 0.4771570217763964, "learning_rate": 4.980778493449912e-06, "loss": 0.6318, "step": 2450 }, { "epoch": 0.9685354408495925, "grad_norm": 0.47469969618301255, "learning_rate": 4.980759097833476e-06, "loss": 0.6027, "step": 2451 }, { "epoch": 0.9689306001481848, "grad_norm": 0.4781745301282861, "learning_rate": 4.980739692474125e-06, "loss": 0.6172, "step": 2452 }, { "epoch": 0.969325759446777, "grad_norm": 0.48953552287496055, "learning_rate": 4.980720277371934e-06, "loss": 0.6059, "step": 2453 }, { "epoch": 0.9697209187453693, "grad_norm": 0.4697257378860957, "learning_rate": 4.980700852526981e-06, "loss": 0.6108, "step": 2454 }, { "epoch": 0.9701160780439615, "grad_norm": 0.5170911041975621, "learning_rate": 4.980681417939341e-06, "loss": 0.6353, "step": 2455 }, { "epoch": 0.9705112373425537, "grad_norm": 0.4765311737964586, "learning_rate": 4.980661973609091e-06, "loss": 0.5911, "step": 2456 }, { "epoch": 0.970906396641146, "grad_norm": 0.4571918246118576, "learning_rate": 4.980642519536307e-06, "loss": 0.6204, "step": 2457 }, { "epoch": 0.9713015559397382, "grad_norm": 0.47402704361826625, "learning_rate": 4.980623055721065e-06, "loss": 0.6075, "step": 2458 }, { "epoch": 0.9716967152383305, "grad_norm": 0.4823370913476268, "learning_rate": 4.980603582163443e-06, "loss": 0.612, "step": 2459 }, { "epoch": 0.9720918745369227, "grad_norm": 0.4665199872651599, "learning_rate": 4.980584098863516e-06, "loss": 0.6255, "step": 2460 }, { "epoch": 0.9724870338355149, "grad_norm": 0.45850568990455864, "learning_rate": 4.980564605821361e-06, "loss": 0.6003, "step": 2461 }, { "epoch": 0.9728821931341072, "grad_norm": 0.4660323061963318, "learning_rate": 4.980545103037054e-06, "loss": 0.6153, "step": 2462 }, { "epoch": 0.9732773524326994, "grad_norm": 0.47426339473199003, "learning_rate": 4.9805255905106735e-06, "loss": 0.6203, "step": 2463 }, { "epoch": 0.9736725117312917, "grad_norm": 0.4648762466922138, "learning_rate": 4.9805060682422925e-06, "loss": 0.602, "step": 2464 }, { "epoch": 0.9740676710298839, "grad_norm": 0.4774681753525283, "learning_rate": 4.980486536231992e-06, "loss": 0.6199, "step": 2465 }, { "epoch": 0.9744628303284761, "grad_norm": 0.5025939174795709, "learning_rate": 4.980466994479845e-06, "loss": 0.6342, "step": 2466 }, { "epoch": 0.9748579896270684, "grad_norm": 0.46520163938175313, "learning_rate": 4.980447442985931e-06, "loss": 0.6082, "step": 2467 }, { "epoch": 0.9752531489256606, "grad_norm": 0.5093555297811326, "learning_rate": 4.980427881750325e-06, "loss": 0.6406, "step": 2468 }, { "epoch": 0.9756483082242529, "grad_norm": 0.4675282128894731, "learning_rate": 4.980408310773105e-06, "loss": 0.6013, "step": 2469 }, { "epoch": 0.9760434675228451, "grad_norm": 0.4600464903665783, "learning_rate": 4.980388730054347e-06, "loss": 0.6021, "step": 2470 }, { "epoch": 0.9764386268214373, "grad_norm": 0.4781471662113073, "learning_rate": 4.980369139594128e-06, "loss": 0.6302, "step": 2471 }, { "epoch": 0.9768337861200297, "grad_norm": 0.569664962295512, "learning_rate": 4.980349539392526e-06, "loss": 0.6076, "step": 2472 }, { "epoch": 0.9772289454186219, "grad_norm": 0.474728795793001, "learning_rate": 4.980329929449616e-06, "loss": 0.5973, "step": 2473 }, { "epoch": 0.9776241047172142, "grad_norm": 0.466971617619074, "learning_rate": 4.980310309765477e-06, "loss": 0.5881, "step": 2474 }, { "epoch": 0.9780192640158064, "grad_norm": 0.49734334610344294, "learning_rate": 4.980290680340185e-06, "loss": 0.6377, "step": 2475 }, { "epoch": 0.9784144233143987, "grad_norm": 0.4660369065720144, "learning_rate": 4.980271041173818e-06, "loss": 0.5939, "step": 2476 }, { "epoch": 0.9788095826129909, "grad_norm": 0.4541019720752234, "learning_rate": 4.9802513922664506e-06, "loss": 0.6013, "step": 2477 }, { "epoch": 0.9792047419115831, "grad_norm": 0.46864260551647896, "learning_rate": 4.980231733618164e-06, "loss": 0.5934, "step": 2478 }, { "epoch": 0.9795999012101754, "grad_norm": 0.49812016619203664, "learning_rate": 4.980212065229032e-06, "loss": 0.6074, "step": 2479 }, { "epoch": 0.9799950605087676, "grad_norm": 0.47269225015752886, "learning_rate": 4.980192387099133e-06, "loss": 0.6075, "step": 2480 }, { "epoch": 0.9803902198073599, "grad_norm": 0.46117462938553433, "learning_rate": 4.980172699228545e-06, "loss": 0.5987, "step": 2481 }, { "epoch": 0.9807853791059521, "grad_norm": 0.47234556720051585, "learning_rate": 4.980153001617344e-06, "loss": 0.6008, "step": 2482 }, { "epoch": 0.9811805384045443, "grad_norm": 0.46430799343376644, "learning_rate": 4.980133294265608e-06, "loss": 0.6126, "step": 2483 }, { "epoch": 0.9815756977031366, "grad_norm": 0.48231158605504654, "learning_rate": 4.980113577173415e-06, "loss": 0.6166, "step": 2484 }, { "epoch": 0.9819708570017288, "grad_norm": 0.4544084646577223, "learning_rate": 4.980093850340842e-06, "loss": 0.5911, "step": 2485 }, { "epoch": 0.982366016300321, "grad_norm": 0.49807318396062805, "learning_rate": 4.980074113767966e-06, "loss": 0.596, "step": 2486 }, { "epoch": 0.9827611755989133, "grad_norm": 0.47544603743432695, "learning_rate": 4.980054367454865e-06, "loss": 0.6095, "step": 2487 }, { "epoch": 0.9831563348975055, "grad_norm": 0.4659987694152846, "learning_rate": 4.9800346114016165e-06, "loss": 0.606, "step": 2488 }, { "epoch": 0.9835514941960978, "grad_norm": 0.47287386440876494, "learning_rate": 4.980014845608298e-06, "loss": 0.6352, "step": 2489 }, { "epoch": 0.98394665349469, "grad_norm": 0.4864128975388848, "learning_rate": 4.979995070074987e-06, "loss": 0.6278, "step": 2490 }, { "epoch": 0.9843418127932823, "grad_norm": 0.4544891691408379, "learning_rate": 4.979975284801761e-06, "loss": 0.608, "step": 2491 }, { "epoch": 0.9847369720918745, "grad_norm": 0.4795256509587486, "learning_rate": 4.979955489788698e-06, "loss": 0.6273, "step": 2492 }, { "epoch": 0.9851321313904667, "grad_norm": 0.4766741019260381, "learning_rate": 4.979935685035876e-06, "loss": 0.608, "step": 2493 }, { "epoch": 0.985527290689059, "grad_norm": 0.4737813917288424, "learning_rate": 4.979915870543372e-06, "loss": 0.6259, "step": 2494 }, { "epoch": 0.9859224499876512, "grad_norm": 0.4923664866297512, "learning_rate": 4.979896046311266e-06, "loss": 0.5989, "step": 2495 }, { "epoch": 0.9863176092862436, "grad_norm": 0.49326185549267143, "learning_rate": 4.979876212339632e-06, "loss": 0.6077, "step": 2496 }, { "epoch": 0.9867127685848358, "grad_norm": 0.4660497012513692, "learning_rate": 4.9798563686285515e-06, "loss": 0.6168, "step": 2497 }, { "epoch": 0.987107927883428, "grad_norm": 0.4800478363437438, "learning_rate": 4.979836515178101e-06, "loss": 0.6211, "step": 2498 }, { "epoch": 0.9875030871820203, "grad_norm": 0.47487319014526613, "learning_rate": 4.979816651988358e-06, "loss": 0.6136, "step": 2499 }, { "epoch": 0.9878982464806125, "grad_norm": 0.47991505886589036, "learning_rate": 4.979796779059401e-06, "loss": 0.6081, "step": 2500 }, { "epoch": 0.9882934057792048, "grad_norm": 0.472571865692289, "learning_rate": 4.979776896391308e-06, "loss": 0.6208, "step": 2501 }, { "epoch": 0.988688565077797, "grad_norm": 0.5058758260925268, "learning_rate": 4.979757003984158e-06, "loss": 0.6318, "step": 2502 }, { "epoch": 0.9890837243763893, "grad_norm": 0.45916735017432975, "learning_rate": 4.979737101838028e-06, "loss": 0.5949, "step": 2503 }, { "epoch": 0.9894788836749815, "grad_norm": 0.46426645842628067, "learning_rate": 4.979717189952996e-06, "loss": 0.6169, "step": 2504 }, { "epoch": 0.9898740429735737, "grad_norm": 0.46140290638450737, "learning_rate": 4.9796972683291415e-06, "loss": 0.6021, "step": 2505 }, { "epoch": 0.990269202272166, "grad_norm": 0.4503137341547014, "learning_rate": 4.979677336966541e-06, "loss": 0.5937, "step": 2506 }, { "epoch": 0.9906643615707582, "grad_norm": 0.4734628943802484, "learning_rate": 4.9796573958652735e-06, "loss": 0.6208, "step": 2507 }, { "epoch": 0.9910595208693505, "grad_norm": 0.4665327413909791, "learning_rate": 4.979637445025418e-06, "loss": 0.5911, "step": 2508 }, { "epoch": 0.9914546801679427, "grad_norm": 0.5283523686632157, "learning_rate": 4.979617484447052e-06, "loss": 0.6054, "step": 2509 }, { "epoch": 0.9918498394665349, "grad_norm": 0.4671594121597241, "learning_rate": 4.979597514130254e-06, "loss": 0.6187, "step": 2510 }, { "epoch": 0.9922449987651272, "grad_norm": 0.4893067227078538, "learning_rate": 4.979577534075103e-06, "loss": 0.6339, "step": 2511 }, { "epoch": 0.9926401580637194, "grad_norm": 0.47208844535109873, "learning_rate": 4.979557544281677e-06, "loss": 0.6087, "step": 2512 }, { "epoch": 0.9930353173623117, "grad_norm": 0.48119092703509836, "learning_rate": 4.979537544750055e-06, "loss": 0.6201, "step": 2513 }, { "epoch": 0.9934304766609039, "grad_norm": 0.477557865345246, "learning_rate": 4.979517535480315e-06, "loss": 0.6143, "step": 2514 }, { "epoch": 0.9938256359594961, "grad_norm": 0.4585124296524369, "learning_rate": 4.979497516472535e-06, "loss": 0.5979, "step": 2515 }, { "epoch": 0.9942207952580884, "grad_norm": 0.47684697114164715, "learning_rate": 4.979477487726796e-06, "loss": 0.6031, "step": 2516 }, { "epoch": 0.9946159545566806, "grad_norm": 0.46484256613871744, "learning_rate": 4.979457449243174e-06, "loss": 0.5965, "step": 2517 }, { "epoch": 0.9950111138552729, "grad_norm": 0.46418868296419014, "learning_rate": 4.97943740102175e-06, "loss": 0.6187, "step": 2518 }, { "epoch": 0.9954062731538652, "grad_norm": 0.4712799089967701, "learning_rate": 4.9794173430626e-06, "loss": 0.6179, "step": 2519 }, { "epoch": 0.9958014324524574, "grad_norm": 0.4898574169755568, "learning_rate": 4.979397275365804e-06, "loss": 0.6182, "step": 2520 }, { "epoch": 0.9961965917510497, "grad_norm": 0.4721726580593012, "learning_rate": 4.979377197931442e-06, "loss": 0.6141, "step": 2521 }, { "epoch": 0.9965917510496419, "grad_norm": 0.4696253306480775, "learning_rate": 4.979357110759592e-06, "loss": 0.6024, "step": 2522 }, { "epoch": 0.9969869103482342, "grad_norm": 0.4695198005530173, "learning_rate": 4.979337013850332e-06, "loss": 0.6183, "step": 2523 }, { "epoch": 0.9973820696468264, "grad_norm": 0.46544585435480235, "learning_rate": 4.979316907203743e-06, "loss": 0.5947, "step": 2524 }, { "epoch": 0.9977772289454186, "grad_norm": 0.4673922524133896, "learning_rate": 4.979296790819901e-06, "loss": 0.6276, "step": 2525 }, { "epoch": 0.9981723882440109, "grad_norm": 0.4784936628964307, "learning_rate": 4.979276664698888e-06, "loss": 0.6163, "step": 2526 }, { "epoch": 0.9985675475426031, "grad_norm": 0.46022244379627936, "learning_rate": 4.979256528840782e-06, "loss": 0.6152, "step": 2527 }, { "epoch": 0.9989627068411954, "grad_norm": 0.46842270777733813, "learning_rate": 4.979236383245661e-06, "loss": 0.611, "step": 2528 }, { "epoch": 0.9993578661397876, "grad_norm": 0.45419251266565824, "learning_rate": 4.979216227913605e-06, "loss": 0.601, "step": 2529 }, { "epoch": 0.9997530254383798, "grad_norm": 0.4731512690482939, "learning_rate": 4.9791960628446935e-06, "loss": 0.6229, "step": 2530 }, { "epoch": 1.0003951592985922, "grad_norm": 0.9039832839745028, "learning_rate": 4.979175888039005e-06, "loss": 0.5558, "step": 2531 }, { "epoch": 1.0007903185971845, "grad_norm": 2.641950999929186, "learning_rate": 4.979155703496619e-06, "loss": 0.5752, "step": 2532 }, { "epoch": 1.0011854778957767, "grad_norm": 1.076378753297078, "learning_rate": 4.979135509217615e-06, "loss": 0.5912, "step": 2533 }, { "epoch": 1.001580637194369, "grad_norm": 0.8858294098499361, "learning_rate": 4.979115305202073e-06, "loss": 0.5723, "step": 2534 }, { "epoch": 1.0019757964929612, "grad_norm": 0.9699126038857411, "learning_rate": 4.9790950914500705e-06, "loss": 0.5786, "step": 2535 }, { "epoch": 1.0023709557915534, "grad_norm": 0.6877025575827704, "learning_rate": 4.979074867961687e-06, "loss": 0.5676, "step": 2536 }, { "epoch": 1.0027661150901457, "grad_norm": 0.6507721272783759, "learning_rate": 4.979054634737004e-06, "loss": 0.5813, "step": 2537 }, { "epoch": 1.003161274388738, "grad_norm": 0.7752079766225948, "learning_rate": 4.9790343917761e-06, "loss": 0.5741, "step": 2538 }, { "epoch": 1.0035564336873302, "grad_norm": 0.728864402953281, "learning_rate": 4.979014139079053e-06, "loss": 0.5594, "step": 2539 }, { "epoch": 1.0039515929859224, "grad_norm": 0.5774709293399436, "learning_rate": 4.978993876645945e-06, "loss": 0.5635, "step": 2540 }, { "epoch": 1.0043467522845146, "grad_norm": 0.5686297060429989, "learning_rate": 4.978973604476855e-06, "loss": 0.5701, "step": 2541 }, { "epoch": 1.0047419115831069, "grad_norm": 0.5887544543582653, "learning_rate": 4.97895332257186e-06, "loss": 0.5632, "step": 2542 }, { "epoch": 1.0051370708816991, "grad_norm": 0.6127637272155544, "learning_rate": 4.978933030931042e-06, "loss": 0.5594, "step": 2543 }, { "epoch": 1.0055322301802914, "grad_norm": 0.5483995865865325, "learning_rate": 4.978912729554481e-06, "loss": 0.5557, "step": 2544 }, { "epoch": 1.0059273894788836, "grad_norm": 0.5341010434760727, "learning_rate": 4.978892418442256e-06, "loss": 0.5553, "step": 2545 }, { "epoch": 1.0063225487774758, "grad_norm": 0.5396189198412058, "learning_rate": 4.9788720975944475e-06, "loss": 0.572, "step": 2546 }, { "epoch": 1.006717708076068, "grad_norm": 0.5603122829844251, "learning_rate": 4.978851767011134e-06, "loss": 0.554, "step": 2547 }, { "epoch": 1.0071128673746603, "grad_norm": 0.5973006671495175, "learning_rate": 4.978831426692397e-06, "loss": 0.573, "step": 2548 }, { "epoch": 1.0075080266732526, "grad_norm": 0.5537555552956865, "learning_rate": 4.9788110766383135e-06, "loss": 0.5725, "step": 2549 }, { "epoch": 1.0079031859718448, "grad_norm": 0.506831024700252, "learning_rate": 4.978790716848966e-06, "loss": 0.5605, "step": 2550 }, { "epoch": 1.008298345270437, "grad_norm": 0.5146187226178434, "learning_rate": 4.9787703473244346e-06, "loss": 0.5725, "step": 2551 }, { "epoch": 1.0086935045690293, "grad_norm": 0.584671063878508, "learning_rate": 4.9787499680647975e-06, "loss": 0.5535, "step": 2552 }, { "epoch": 1.0090886638676215, "grad_norm": 0.517272250939791, "learning_rate": 4.978729579070136e-06, "loss": 0.5694, "step": 2553 }, { "epoch": 1.009483823166214, "grad_norm": 0.5197158410846429, "learning_rate": 4.978709180340529e-06, "loss": 0.5703, "step": 2554 }, { "epoch": 1.0098789824648062, "grad_norm": 0.5316647592420108, "learning_rate": 4.978688771876059e-06, "loss": 0.5614, "step": 2555 }, { "epoch": 1.0102741417633985, "grad_norm": 0.47946328549941336, "learning_rate": 4.978668353676804e-06, "loss": 0.574, "step": 2556 }, { "epoch": 1.0106693010619907, "grad_norm": 0.4799139075965923, "learning_rate": 4.9786479257428455e-06, "loss": 0.5638, "step": 2557 }, { "epoch": 1.011064460360583, "grad_norm": 0.5034447217830896, "learning_rate": 4.978627488074263e-06, "loss": 0.576, "step": 2558 }, { "epoch": 1.0114596196591752, "grad_norm": 0.5176884626827072, "learning_rate": 4.978607040671136e-06, "loss": 0.5636, "step": 2559 }, { "epoch": 1.0118547789577674, "grad_norm": 0.5148490669814971, "learning_rate": 4.978586583533545e-06, "loss": 0.5838, "step": 2560 }, { "epoch": 1.0122499382563597, "grad_norm": 0.4542757059816822, "learning_rate": 4.978566116661573e-06, "loss": 0.5532, "step": 2561 }, { "epoch": 1.012645097554952, "grad_norm": 0.4811513221663893, "learning_rate": 4.978545640055297e-06, "loss": 0.5658, "step": 2562 }, { "epoch": 1.0130402568535442, "grad_norm": 0.500097716390493, "learning_rate": 4.978525153714799e-06, "loss": 0.5744, "step": 2563 }, { "epoch": 1.0134354161521364, "grad_norm": 0.5142140734871467, "learning_rate": 4.978504657640159e-06, "loss": 0.5538, "step": 2564 }, { "epoch": 1.0138305754507286, "grad_norm": 0.47546571996143905, "learning_rate": 4.978484151831458e-06, "loss": 0.5587, "step": 2565 }, { "epoch": 1.0142257347493209, "grad_norm": 0.472451374021239, "learning_rate": 4.978463636288776e-06, "loss": 0.5881, "step": 2566 }, { "epoch": 1.0146208940479131, "grad_norm": 0.4541293970758113, "learning_rate": 4.978443111012195e-06, "loss": 0.5572, "step": 2567 }, { "epoch": 1.0150160533465054, "grad_norm": 0.47790081006874097, "learning_rate": 4.978422576001793e-06, "loss": 0.552, "step": 2568 }, { "epoch": 1.0154112126450976, "grad_norm": 0.4983815727839767, "learning_rate": 4.978402031257653e-06, "loss": 0.5655, "step": 2569 }, { "epoch": 1.0158063719436898, "grad_norm": 0.484727652450049, "learning_rate": 4.9783814767798545e-06, "loss": 0.5726, "step": 2570 }, { "epoch": 1.016201531242282, "grad_norm": 0.4852767066955579, "learning_rate": 4.978360912568479e-06, "loss": 0.5537, "step": 2571 }, { "epoch": 1.0165966905408743, "grad_norm": 0.4485626151233385, "learning_rate": 4.978340338623606e-06, "loss": 0.5555, "step": 2572 }, { "epoch": 1.0169918498394666, "grad_norm": 0.5060512578748023, "learning_rate": 4.9783197549453164e-06, "loss": 0.5898, "step": 2573 }, { "epoch": 1.0173870091380588, "grad_norm": 0.4646178927626715, "learning_rate": 4.978299161533693e-06, "loss": 0.5744, "step": 2574 }, { "epoch": 1.017782168436651, "grad_norm": 0.461846845018728, "learning_rate": 4.978278558388815e-06, "loss": 0.5654, "step": 2575 }, { "epoch": 1.0181773277352433, "grad_norm": 0.6011786743216133, "learning_rate": 4.978257945510764e-06, "loss": 0.5593, "step": 2576 }, { "epoch": 1.0185724870338355, "grad_norm": 0.523836605126601, "learning_rate": 4.978237322899621e-06, "loss": 0.572, "step": 2577 }, { "epoch": 1.0189676463324278, "grad_norm": 0.4752809951230506, "learning_rate": 4.978216690555465e-06, "loss": 0.583, "step": 2578 }, { "epoch": 1.01936280563102, "grad_norm": 0.45433798164574274, "learning_rate": 4.978196048478381e-06, "loss": 0.568, "step": 2579 }, { "epoch": 1.0197579649296122, "grad_norm": 0.46744153312914954, "learning_rate": 4.9781753966684455e-06, "loss": 0.5526, "step": 2580 }, { "epoch": 1.0201531242282045, "grad_norm": 0.47136324904331967, "learning_rate": 4.978154735125743e-06, "loss": 0.5734, "step": 2581 }, { "epoch": 1.0205482835267967, "grad_norm": 0.476581814748425, "learning_rate": 4.9781340638503536e-06, "loss": 0.596, "step": 2582 }, { "epoch": 1.020943442825389, "grad_norm": 0.4541300104338508, "learning_rate": 4.9781133828423585e-06, "loss": 0.5553, "step": 2583 }, { "epoch": 1.0213386021239812, "grad_norm": 0.45621750533610406, "learning_rate": 4.978092692101838e-06, "loss": 0.5561, "step": 2584 }, { "epoch": 1.0217337614225734, "grad_norm": 0.47098693586154, "learning_rate": 4.978071991628875e-06, "loss": 0.5524, "step": 2585 }, { "epoch": 1.0221289207211657, "grad_norm": 0.4259588250799944, "learning_rate": 4.97805128142355e-06, "loss": 0.5235, "step": 2586 }, { "epoch": 1.022524080019758, "grad_norm": 0.4686581079113878, "learning_rate": 4.9780305614859435e-06, "loss": 0.5574, "step": 2587 }, { "epoch": 1.0229192393183502, "grad_norm": 0.46911512358386387, "learning_rate": 4.9780098318161385e-06, "loss": 0.5777, "step": 2588 }, { "epoch": 1.0233143986169424, "grad_norm": 0.4642643584963139, "learning_rate": 4.9779890924142155e-06, "loss": 0.5726, "step": 2589 }, { "epoch": 1.0237095579155346, "grad_norm": 0.4712873655824191, "learning_rate": 4.977968343280256e-06, "loss": 0.5823, "step": 2590 }, { "epoch": 1.0241047172141269, "grad_norm": 0.4482243472573749, "learning_rate": 4.977947584414341e-06, "loss": 0.564, "step": 2591 }, { "epoch": 1.0244998765127191, "grad_norm": 0.4530647693811047, "learning_rate": 4.977926815816553e-06, "loss": 0.5809, "step": 2592 }, { "epoch": 1.0248950358113114, "grad_norm": 0.4479882772757105, "learning_rate": 4.977906037486974e-06, "loss": 0.5616, "step": 2593 }, { "epoch": 1.0252901951099036, "grad_norm": 0.4610471046625982, "learning_rate": 4.977885249425684e-06, "loss": 0.5472, "step": 2594 }, { "epoch": 1.0256853544084958, "grad_norm": 0.47862490507477046, "learning_rate": 4.977864451632764e-06, "loss": 0.5902, "step": 2595 }, { "epoch": 1.026080513707088, "grad_norm": 0.5047546461386095, "learning_rate": 4.977843644108299e-06, "loss": 0.5884, "step": 2596 }, { "epoch": 1.0264756730056803, "grad_norm": 0.4451423123413334, "learning_rate": 4.977822826852369e-06, "loss": 0.5642, "step": 2597 }, { "epoch": 1.0268708323042726, "grad_norm": 0.4560284371925842, "learning_rate": 4.977801999865054e-06, "loss": 0.5606, "step": 2598 }, { "epoch": 1.0272659916028648, "grad_norm": 0.4461067403640707, "learning_rate": 4.977781163146438e-06, "loss": 0.5649, "step": 2599 }, { "epoch": 1.027661150901457, "grad_norm": 0.451597159891192, "learning_rate": 4.977760316696603e-06, "loss": 0.5634, "step": 2600 }, { "epoch": 1.0280563102000495, "grad_norm": 0.4519191659487248, "learning_rate": 4.977739460515629e-06, "loss": 0.5796, "step": 2601 }, { "epoch": 1.0284514694986417, "grad_norm": 0.4596070611618375, "learning_rate": 4.977718594603599e-06, "loss": 0.5764, "step": 2602 }, { "epoch": 1.028846628797234, "grad_norm": 0.440619261667536, "learning_rate": 4.977697718960595e-06, "loss": 0.557, "step": 2603 }, { "epoch": 1.0292417880958262, "grad_norm": 0.4585810235456444, "learning_rate": 4.977676833586699e-06, "loss": 0.5618, "step": 2604 }, { "epoch": 1.0296369473944185, "grad_norm": 0.489223887277486, "learning_rate": 4.977655938481994e-06, "loss": 0.6054, "step": 2605 }, { "epoch": 1.0300321066930107, "grad_norm": 0.4590644227459909, "learning_rate": 4.97763503364656e-06, "loss": 0.5575, "step": 2606 }, { "epoch": 1.030427265991603, "grad_norm": 0.4676586979127894, "learning_rate": 4.97761411908048e-06, "loss": 0.5715, "step": 2607 }, { "epoch": 1.0308224252901952, "grad_norm": 0.44950227149153915, "learning_rate": 4.977593194783836e-06, "loss": 0.5473, "step": 2608 }, { "epoch": 1.0312175845887874, "grad_norm": 0.44607152531853783, "learning_rate": 4.977572260756711e-06, "loss": 0.5318, "step": 2609 }, { "epoch": 1.0316127438873797, "grad_norm": 0.45941977696779834, "learning_rate": 4.977551316999186e-06, "loss": 0.5453, "step": 2610 }, { "epoch": 1.032007903185972, "grad_norm": 0.4624807401390588, "learning_rate": 4.977530363511344e-06, "loss": 0.5721, "step": 2611 }, { "epoch": 1.0324030624845641, "grad_norm": 0.4486414918582425, "learning_rate": 4.977509400293268e-06, "loss": 0.5634, "step": 2612 }, { "epoch": 1.0327982217831564, "grad_norm": 0.460795352195087, "learning_rate": 4.977488427345039e-06, "loss": 0.5663, "step": 2613 }, { "epoch": 1.0331933810817486, "grad_norm": 0.47567928790908864, "learning_rate": 4.977467444666739e-06, "loss": 0.586, "step": 2614 }, { "epoch": 1.0335885403803409, "grad_norm": 0.4486438855377463, "learning_rate": 4.977446452258452e-06, "loss": 0.5904, "step": 2615 }, { "epoch": 1.033983699678933, "grad_norm": 0.4415373719504563, "learning_rate": 4.97742545012026e-06, "loss": 0.5523, "step": 2616 }, { "epoch": 1.0343788589775254, "grad_norm": 0.46461856034519633, "learning_rate": 4.977404438252245e-06, "loss": 0.556, "step": 2617 }, { "epoch": 1.0347740182761176, "grad_norm": 0.45898412914751924, "learning_rate": 4.97738341665449e-06, "loss": 0.5868, "step": 2618 }, { "epoch": 1.0351691775747098, "grad_norm": 0.44026432413454175, "learning_rate": 4.977362385327077e-06, "loss": 0.5399, "step": 2619 }, { "epoch": 1.035564336873302, "grad_norm": 0.4536936643831469, "learning_rate": 4.977341344270088e-06, "loss": 0.5572, "step": 2620 }, { "epoch": 1.0359594961718943, "grad_norm": 0.45300210621077247, "learning_rate": 4.977320293483608e-06, "loss": 0.5581, "step": 2621 }, { "epoch": 1.0363546554704866, "grad_norm": 0.4768261944797516, "learning_rate": 4.977299232967717e-06, "loss": 0.5567, "step": 2622 }, { "epoch": 1.0367498147690788, "grad_norm": 0.4490373651630668, "learning_rate": 4.9772781627225e-06, "loss": 0.5613, "step": 2623 }, { "epoch": 1.037144974067671, "grad_norm": 0.4542757319001758, "learning_rate": 4.977257082748038e-06, "loss": 0.5598, "step": 2624 }, { "epoch": 1.0375401333662633, "grad_norm": 0.4696319591373662, "learning_rate": 4.977235993044415e-06, "loss": 0.574, "step": 2625 }, { "epoch": 1.0379352926648555, "grad_norm": 0.4544516411565146, "learning_rate": 4.977214893611713e-06, "loss": 0.5647, "step": 2626 }, { "epoch": 1.0383304519634478, "grad_norm": 0.4636968131984274, "learning_rate": 4.977193784450015e-06, "loss": 0.5776, "step": 2627 }, { "epoch": 1.03872561126204, "grad_norm": 0.4549508464045885, "learning_rate": 4.977172665559403e-06, "loss": 0.5721, "step": 2628 }, { "epoch": 1.0391207705606322, "grad_norm": 0.4432377804345881, "learning_rate": 4.9771515369399625e-06, "loss": 0.5624, "step": 2629 }, { "epoch": 1.0395159298592245, "grad_norm": 0.4512832309113786, "learning_rate": 4.977130398591775e-06, "loss": 0.5793, "step": 2630 }, { "epoch": 1.0399110891578167, "grad_norm": 0.44957500296377784, "learning_rate": 4.977109250514923e-06, "loss": 0.5649, "step": 2631 }, { "epoch": 1.040306248456409, "grad_norm": 0.45057963341150464, "learning_rate": 4.97708809270949e-06, "loss": 0.5607, "step": 2632 }, { "epoch": 1.0407014077550012, "grad_norm": 0.4542174831876239, "learning_rate": 4.977066925175559e-06, "loss": 0.5539, "step": 2633 }, { "epoch": 1.0410965670535934, "grad_norm": 0.46807162702065624, "learning_rate": 4.977045747913213e-06, "loss": 0.563, "step": 2634 }, { "epoch": 1.0414917263521857, "grad_norm": 0.457076849040818, "learning_rate": 4.977024560922537e-06, "loss": 0.561, "step": 2635 }, { "epoch": 1.041886885650778, "grad_norm": 0.45562089282056256, "learning_rate": 4.9770033642036105e-06, "loss": 0.5779, "step": 2636 }, { "epoch": 1.0422820449493702, "grad_norm": 0.4525510725985377, "learning_rate": 4.97698215775652e-06, "loss": 0.6033, "step": 2637 }, { "epoch": 1.0426772042479624, "grad_norm": 0.5400191406643862, "learning_rate": 4.976960941581348e-06, "loss": 0.5704, "step": 2638 }, { "epoch": 1.0430723635465546, "grad_norm": 0.44608237054831834, "learning_rate": 4.976939715678178e-06, "loss": 0.5611, "step": 2639 }, { "epoch": 1.0434675228451469, "grad_norm": 0.5052264389295701, "learning_rate": 4.9769184800470915e-06, "loss": 0.5755, "step": 2640 }, { "epoch": 1.0438626821437391, "grad_norm": 0.44348147340006067, "learning_rate": 4.976897234688174e-06, "loss": 0.5598, "step": 2641 }, { "epoch": 1.0442578414423314, "grad_norm": 0.451998380744831, "learning_rate": 4.976875979601508e-06, "loss": 0.5866, "step": 2642 }, { "epoch": 1.0446530007409236, "grad_norm": 0.4450349262521982, "learning_rate": 4.976854714787177e-06, "loss": 0.5612, "step": 2643 }, { "epoch": 1.0450481600395158, "grad_norm": 0.4554004140171642, "learning_rate": 4.976833440245265e-06, "loss": 0.5752, "step": 2644 }, { "epoch": 1.045443319338108, "grad_norm": 0.4717355641847888, "learning_rate": 4.976812155975855e-06, "loss": 0.5696, "step": 2645 }, { "epoch": 1.0458384786367003, "grad_norm": 0.451389235554898, "learning_rate": 4.976790861979031e-06, "loss": 0.5896, "step": 2646 }, { "epoch": 1.0462336379352926, "grad_norm": 0.4587582612615821, "learning_rate": 4.976769558254877e-06, "loss": 0.5905, "step": 2647 }, { "epoch": 1.0466287972338848, "grad_norm": 0.4587087573396345, "learning_rate": 4.976748244803475e-06, "loss": 0.5823, "step": 2648 }, { "epoch": 1.0470239565324773, "grad_norm": 0.4477959820273805, "learning_rate": 4.976726921624911e-06, "loss": 0.5657, "step": 2649 }, { "epoch": 1.0474191158310695, "grad_norm": 0.43767555375829137, "learning_rate": 4.976705588719267e-06, "loss": 0.5649, "step": 2650 }, { "epoch": 1.0478142751296617, "grad_norm": 0.453863897792215, "learning_rate": 4.976684246086627e-06, "loss": 0.563, "step": 2651 }, { "epoch": 1.048209434428254, "grad_norm": 0.44586538451216967, "learning_rate": 4.976662893727076e-06, "loss": 0.5541, "step": 2652 }, { "epoch": 1.0486045937268462, "grad_norm": 0.46349135729431057, "learning_rate": 4.9766415316406965e-06, "loss": 0.5621, "step": 2653 }, { "epoch": 1.0489997530254385, "grad_norm": 0.4485724204935941, "learning_rate": 4.976620159827574e-06, "loss": 0.5465, "step": 2654 }, { "epoch": 1.0493949123240307, "grad_norm": 0.4388668643967193, "learning_rate": 4.97659877828779e-06, "loss": 0.5592, "step": 2655 }, { "epoch": 1.049790071622623, "grad_norm": 0.46226835952230305, "learning_rate": 4.976577387021431e-06, "loss": 0.5814, "step": 2656 }, { "epoch": 1.0501852309212152, "grad_norm": 0.4551414886420647, "learning_rate": 4.97655598602858e-06, "loss": 0.5489, "step": 2657 }, { "epoch": 1.0505803902198074, "grad_norm": 0.4503034545836757, "learning_rate": 4.97653457530932e-06, "loss": 0.5847, "step": 2658 }, { "epoch": 1.0509755495183997, "grad_norm": 0.4510190880823167, "learning_rate": 4.976513154863735e-06, "loss": 0.5638, "step": 2659 }, { "epoch": 1.051370708816992, "grad_norm": 0.45601750337175206, "learning_rate": 4.976491724691912e-06, "loss": 0.5629, "step": 2660 }, { "epoch": 1.0517658681155841, "grad_norm": 0.44161293110782124, "learning_rate": 4.976470284793933e-06, "loss": 0.5638, "step": 2661 }, { "epoch": 1.0521610274141764, "grad_norm": 0.4971279919030968, "learning_rate": 4.976448835169882e-06, "loss": 0.5602, "step": 2662 }, { "epoch": 1.0525561867127686, "grad_norm": 0.46201452125116643, "learning_rate": 4.976427375819844e-06, "loss": 0.5645, "step": 2663 }, { "epoch": 1.0529513460113609, "grad_norm": 0.45839768743558873, "learning_rate": 4.9764059067439045e-06, "loss": 0.5729, "step": 2664 }, { "epoch": 1.053346505309953, "grad_norm": 0.45571009890945263, "learning_rate": 4.9763844279421444e-06, "loss": 0.5669, "step": 2665 }, { "epoch": 1.0537416646085453, "grad_norm": 0.4404670408454727, "learning_rate": 4.97636293941465e-06, "loss": 0.5537, "step": 2666 }, { "epoch": 1.0541368239071376, "grad_norm": 0.4618688908161834, "learning_rate": 4.976341441161507e-06, "loss": 0.5666, "step": 2667 }, { "epoch": 1.0545319832057298, "grad_norm": 0.4678051099127983, "learning_rate": 4.976319933182797e-06, "loss": 0.5852, "step": 2668 }, { "epoch": 1.054927142504322, "grad_norm": 0.45030359679512383, "learning_rate": 4.9762984154786075e-06, "loss": 0.5749, "step": 2669 }, { "epoch": 1.0553223018029143, "grad_norm": 0.4553020196001742, "learning_rate": 4.9762768880490205e-06, "loss": 0.5595, "step": 2670 }, { "epoch": 1.0557174611015065, "grad_norm": 0.45256331743406897, "learning_rate": 4.976255350894122e-06, "loss": 0.5635, "step": 2671 }, { "epoch": 1.0561126204000988, "grad_norm": 0.44922321136235305, "learning_rate": 4.9762338040139965e-06, "loss": 0.5488, "step": 2672 }, { "epoch": 1.056507779698691, "grad_norm": 0.45045666513262506, "learning_rate": 4.976212247408727e-06, "loss": 0.5595, "step": 2673 }, { "epoch": 1.0569029389972833, "grad_norm": 0.45788018345436676, "learning_rate": 4.976190681078401e-06, "loss": 0.5693, "step": 2674 }, { "epoch": 1.0572980982958755, "grad_norm": 0.48385791595148814, "learning_rate": 4.976169105023101e-06, "loss": 0.5967, "step": 2675 }, { "epoch": 1.0576932575944678, "grad_norm": 0.45899699704778635, "learning_rate": 4.976147519242912e-06, "loss": 0.5755, "step": 2676 }, { "epoch": 1.05808841689306, "grad_norm": 0.44447734769605085, "learning_rate": 4.976125923737919e-06, "loss": 0.5678, "step": 2677 }, { "epoch": 1.0584835761916522, "grad_norm": 0.4578877530549744, "learning_rate": 4.976104318508207e-06, "loss": 0.5781, "step": 2678 }, { "epoch": 1.0588787354902445, "grad_norm": 0.4522345638325663, "learning_rate": 4.9760827035538614e-06, "loss": 0.5733, "step": 2679 }, { "epoch": 1.0592738947888367, "grad_norm": 0.45242325394735017, "learning_rate": 4.9760610788749665e-06, "loss": 0.5781, "step": 2680 }, { "epoch": 1.059669054087429, "grad_norm": 0.45775529697399175, "learning_rate": 4.976039444471607e-06, "loss": 0.5627, "step": 2681 }, { "epoch": 1.0600642133860212, "grad_norm": 0.44249752472475534, "learning_rate": 4.976017800343868e-06, "loss": 0.5471, "step": 2682 }, { "epoch": 1.0604593726846134, "grad_norm": 0.47201137623979633, "learning_rate": 4.975996146491834e-06, "loss": 0.5786, "step": 2683 }, { "epoch": 1.0608545319832057, "grad_norm": 0.4775255993907024, "learning_rate": 4.975974482915592e-06, "loss": 0.5936, "step": 2684 }, { "epoch": 1.061249691281798, "grad_norm": 0.44639736931494717, "learning_rate": 4.975952809615225e-06, "loss": 0.5579, "step": 2685 }, { "epoch": 1.0616448505803902, "grad_norm": 0.44958051847488756, "learning_rate": 4.975931126590819e-06, "loss": 0.5706, "step": 2686 }, { "epoch": 1.0620400098789824, "grad_norm": 0.4507031529753823, "learning_rate": 4.975909433842459e-06, "loss": 0.557, "step": 2687 }, { "epoch": 1.0624351691775746, "grad_norm": 0.45147386789174, "learning_rate": 4.975887731370229e-06, "loss": 0.5561, "step": 2688 }, { "epoch": 1.0628303284761669, "grad_norm": 0.45782580821366853, "learning_rate": 4.975866019174217e-06, "loss": 0.5511, "step": 2689 }, { "epoch": 1.0632254877747591, "grad_norm": 0.4707517108424949, "learning_rate": 4.975844297254506e-06, "loss": 0.5485, "step": 2690 }, { "epoch": 1.0636206470733514, "grad_norm": 0.451561493525649, "learning_rate": 4.975822565611183e-06, "loss": 0.5615, "step": 2691 }, { "epoch": 1.0640158063719436, "grad_norm": 0.4669716121243911, "learning_rate": 4.975800824244331e-06, "loss": 0.5676, "step": 2692 }, { "epoch": 1.0644109656705358, "grad_norm": 0.4657398109153327, "learning_rate": 4.975779073154038e-06, "loss": 0.5609, "step": 2693 }, { "epoch": 1.064806124969128, "grad_norm": 0.4577124290606098, "learning_rate": 4.975757312340387e-06, "loss": 0.5649, "step": 2694 }, { "epoch": 1.0652012842677205, "grad_norm": 0.459041880221058, "learning_rate": 4.975735541803465e-06, "loss": 0.5649, "step": 2695 }, { "epoch": 1.0655964435663128, "grad_norm": 0.4665780766862844, "learning_rate": 4.975713761543357e-06, "loss": 0.5837, "step": 2696 }, { "epoch": 1.065991602864905, "grad_norm": 0.45328946916325785, "learning_rate": 4.975691971560149e-06, "loss": 0.5687, "step": 2697 }, { "epoch": 1.0663867621634973, "grad_norm": 0.46683872005405097, "learning_rate": 4.975670171853926e-06, "loss": 0.564, "step": 2698 }, { "epoch": 1.0667819214620895, "grad_norm": 0.4490782215754304, "learning_rate": 4.9756483624247745e-06, "loss": 0.5694, "step": 2699 }, { "epoch": 1.0671770807606817, "grad_norm": 0.4602344049001532, "learning_rate": 4.975626543272779e-06, "loss": 0.5829, "step": 2700 }, { "epoch": 1.067572240059274, "grad_norm": 0.47778949312409547, "learning_rate": 4.975604714398026e-06, "loss": 0.5849, "step": 2701 }, { "epoch": 1.0679673993578662, "grad_norm": 0.4766565902077033, "learning_rate": 4.9755828758006e-06, "loss": 0.5803, "step": 2702 }, { "epoch": 1.0683625586564585, "grad_norm": 0.46556612189369995, "learning_rate": 4.975561027480589e-06, "loss": 0.565, "step": 2703 }, { "epoch": 1.0687577179550507, "grad_norm": 0.4681074034544366, "learning_rate": 4.975539169438077e-06, "loss": 0.5826, "step": 2704 }, { "epoch": 1.069152877253643, "grad_norm": 0.4638743898250165, "learning_rate": 4.975517301673151e-06, "loss": 0.5734, "step": 2705 }, { "epoch": 1.0695480365522352, "grad_norm": 0.4791989380557464, "learning_rate": 4.975495424185895e-06, "loss": 0.5706, "step": 2706 }, { "epoch": 1.0699431958508274, "grad_norm": 0.4648061747202406, "learning_rate": 4.975473536976397e-06, "loss": 0.5613, "step": 2707 }, { "epoch": 1.0703383551494197, "grad_norm": 0.4758277574553136, "learning_rate": 4.975451640044742e-06, "loss": 0.5758, "step": 2708 }, { "epoch": 1.070733514448012, "grad_norm": 0.46226325782703626, "learning_rate": 4.975429733391016e-06, "loss": 0.5685, "step": 2709 }, { "epoch": 1.0711286737466041, "grad_norm": 0.45170576388419686, "learning_rate": 4.975407817015306e-06, "loss": 0.5501, "step": 2710 }, { "epoch": 1.0715238330451964, "grad_norm": 0.4587950204803136, "learning_rate": 4.975385890917696e-06, "loss": 0.5627, "step": 2711 }, { "epoch": 1.0719189923437886, "grad_norm": 0.5105258657179963, "learning_rate": 4.975363955098273e-06, "loss": 0.5664, "step": 2712 }, { "epoch": 1.0723141516423809, "grad_norm": 0.49413851191960967, "learning_rate": 4.975342009557125e-06, "loss": 0.562, "step": 2713 }, { "epoch": 1.072709310940973, "grad_norm": 0.45320917178937237, "learning_rate": 4.975320054294336e-06, "loss": 0.5385, "step": 2714 }, { "epoch": 1.0731044702395653, "grad_norm": 0.47467856876046166, "learning_rate": 4.975298089309993e-06, "loss": 0.5721, "step": 2715 }, { "epoch": 1.0734996295381576, "grad_norm": 0.4600018582339907, "learning_rate": 4.975276114604182e-06, "loss": 0.5653, "step": 2716 }, { "epoch": 1.0738947888367498, "grad_norm": 0.4708300385350632, "learning_rate": 4.97525413017699e-06, "loss": 0.5603, "step": 2717 }, { "epoch": 1.074289948135342, "grad_norm": 0.4506434681952806, "learning_rate": 4.975232136028502e-06, "loss": 0.5381, "step": 2718 }, { "epoch": 1.0746851074339343, "grad_norm": 0.46055573294638946, "learning_rate": 4.975210132158805e-06, "loss": 0.5503, "step": 2719 }, { "epoch": 1.0750802667325265, "grad_norm": 0.5855856780535553, "learning_rate": 4.975188118567987e-06, "loss": 0.582, "step": 2720 }, { "epoch": 1.0754754260311188, "grad_norm": 0.48475880634450325, "learning_rate": 4.975166095256132e-06, "loss": 0.5735, "step": 2721 }, { "epoch": 1.075870585329711, "grad_norm": 0.48720473179501395, "learning_rate": 4.975144062223328e-06, "loss": 0.5719, "step": 2722 }, { "epoch": 1.0762657446283033, "grad_norm": 0.4703550004447043, "learning_rate": 4.9751220194696615e-06, "loss": 0.6014, "step": 2723 }, { "epoch": 1.0766609039268955, "grad_norm": 0.47236077192721254, "learning_rate": 4.975099966995218e-06, "loss": 0.5712, "step": 2724 }, { "epoch": 1.0770560632254877, "grad_norm": 0.46826552794346366, "learning_rate": 4.975077904800086e-06, "loss": 0.5742, "step": 2725 }, { "epoch": 1.07745122252408, "grad_norm": 0.4976523466784567, "learning_rate": 4.975055832884349e-06, "loss": 0.5837, "step": 2726 }, { "epoch": 1.0778463818226722, "grad_norm": 0.5218434843940184, "learning_rate": 4.975033751248096e-06, "loss": 0.5422, "step": 2727 }, { "epoch": 1.0782415411212645, "grad_norm": 0.4931023411702474, "learning_rate": 4.975011659891415e-06, "loss": 0.5729, "step": 2728 }, { "epoch": 1.0786367004198567, "grad_norm": 0.5049728108245582, "learning_rate": 4.974989558814389e-06, "loss": 0.5665, "step": 2729 }, { "epoch": 1.079031859718449, "grad_norm": 0.49433774891751775, "learning_rate": 4.974967448017109e-06, "loss": 0.5706, "step": 2730 }, { "epoch": 1.0794270190170412, "grad_norm": 0.4620589073782666, "learning_rate": 4.974945327499658e-06, "loss": 0.5621, "step": 2731 }, { "epoch": 1.0798221783156334, "grad_norm": 0.48242430406086245, "learning_rate": 4.974923197262126e-06, "loss": 0.5584, "step": 2732 }, { "epoch": 1.0802173376142257, "grad_norm": 0.46908301271787106, "learning_rate": 4.974901057304598e-06, "loss": 0.575, "step": 2733 }, { "epoch": 1.080612496912818, "grad_norm": 0.4635792423887607, "learning_rate": 4.974878907627161e-06, "loss": 0.5666, "step": 2734 }, { "epoch": 1.0810076562114102, "grad_norm": 0.45678202788616845, "learning_rate": 4.974856748229902e-06, "loss": 0.5513, "step": 2735 }, { "epoch": 1.0814028155100024, "grad_norm": 0.48311485248397, "learning_rate": 4.97483457911291e-06, "loss": 0.5517, "step": 2736 }, { "epoch": 1.0817979748085946, "grad_norm": 0.4636165852005326, "learning_rate": 4.97481240027627e-06, "loss": 0.5597, "step": 2737 }, { "epoch": 1.0821931341071869, "grad_norm": 0.4417447450007961, "learning_rate": 4.97479021172007e-06, "loss": 0.5705, "step": 2738 }, { "epoch": 1.0825882934057791, "grad_norm": 0.4518840030411349, "learning_rate": 4.974768013444395e-06, "loss": 0.5653, "step": 2739 }, { "epoch": 1.0829834527043714, "grad_norm": 1.1302182473095272, "learning_rate": 4.974745805449336e-06, "loss": 0.5999, "step": 2740 }, { "epoch": 1.0833786120029636, "grad_norm": 0.4646198565639412, "learning_rate": 4.974723587734977e-06, "loss": 0.5814, "step": 2741 }, { "epoch": 1.0837737713015558, "grad_norm": 0.4561905295641323, "learning_rate": 4.974701360301408e-06, "loss": 0.5699, "step": 2742 }, { "epoch": 1.084168930600148, "grad_norm": 0.48160057985917565, "learning_rate": 4.974679123148713e-06, "loss": 0.556, "step": 2743 }, { "epoch": 1.0845640898987403, "grad_norm": 0.4611451474715039, "learning_rate": 4.974656876276983e-06, "loss": 0.577, "step": 2744 }, { "epoch": 1.0849592491973328, "grad_norm": 0.4954859328073638, "learning_rate": 4.974634619686303e-06, "loss": 0.5785, "step": 2745 }, { "epoch": 1.085354408495925, "grad_norm": 0.46526474266981815, "learning_rate": 4.97461235337676e-06, "loss": 0.5627, "step": 2746 }, { "epoch": 1.0857495677945173, "grad_norm": 0.47088544330789683, "learning_rate": 4.974590077348442e-06, "loss": 0.5735, "step": 2747 }, { "epoch": 1.0861447270931095, "grad_norm": 0.44275077749114805, "learning_rate": 4.974567791601438e-06, "loss": 0.56, "step": 2748 }, { "epoch": 1.0865398863917017, "grad_norm": 0.44909868576930495, "learning_rate": 4.974545496135834e-06, "loss": 0.5626, "step": 2749 }, { "epoch": 1.086935045690294, "grad_norm": 0.4435390432235627, "learning_rate": 4.974523190951718e-06, "loss": 0.5489, "step": 2750 }, { "epoch": 1.0873302049888862, "grad_norm": 0.46104078067735216, "learning_rate": 4.974500876049177e-06, "loss": 0.5575, "step": 2751 }, { "epoch": 1.0877253642874785, "grad_norm": 0.47232606154854884, "learning_rate": 4.9744785514283e-06, "loss": 0.5636, "step": 2752 }, { "epoch": 1.0881205235860707, "grad_norm": 0.45996006809985085, "learning_rate": 4.974456217089173e-06, "loss": 0.5707, "step": 2753 }, { "epoch": 1.088515682884663, "grad_norm": 0.45991550984476076, "learning_rate": 4.9744338730318846e-06, "loss": 0.565, "step": 2754 }, { "epoch": 1.0889108421832552, "grad_norm": 0.45264942810273506, "learning_rate": 4.974411519256523e-06, "loss": 0.5611, "step": 2755 }, { "epoch": 1.0893060014818474, "grad_norm": 0.4603092385989538, "learning_rate": 4.974389155763175e-06, "loss": 0.5729, "step": 2756 }, { "epoch": 1.0897011607804397, "grad_norm": 0.4622551865908486, "learning_rate": 4.974366782551929e-06, "loss": 0.5706, "step": 2757 }, { "epoch": 1.090096320079032, "grad_norm": 0.45746013561890175, "learning_rate": 4.974344399622874e-06, "loss": 0.5527, "step": 2758 }, { "epoch": 1.0904914793776241, "grad_norm": 0.4503667034204893, "learning_rate": 4.974322006976095e-06, "loss": 0.56, "step": 2759 }, { "epoch": 1.0908866386762164, "grad_norm": 0.45807333150826124, "learning_rate": 4.974299604611682e-06, "loss": 0.5475, "step": 2760 }, { "epoch": 1.0912817979748086, "grad_norm": 0.46938442181289014, "learning_rate": 4.974277192529723e-06, "loss": 0.5607, "step": 2761 }, { "epoch": 1.0916769572734009, "grad_norm": 0.4504533242644673, "learning_rate": 4.974254770730306e-06, "loss": 0.5817, "step": 2762 }, { "epoch": 1.092072116571993, "grad_norm": 0.44892490581416017, "learning_rate": 4.974232339213519e-06, "loss": 0.5572, "step": 2763 }, { "epoch": 1.0924672758705853, "grad_norm": 0.5598674028961942, "learning_rate": 4.974209897979448e-06, "loss": 0.5688, "step": 2764 }, { "epoch": 1.0928624351691776, "grad_norm": 0.4463594082886901, "learning_rate": 4.974187447028185e-06, "loss": 0.5657, "step": 2765 }, { "epoch": 1.0932575944677698, "grad_norm": 0.45947250553137825, "learning_rate": 4.974164986359814e-06, "loss": 0.5719, "step": 2766 }, { "epoch": 1.093652753766362, "grad_norm": 0.4621085715770585, "learning_rate": 4.974142515974427e-06, "loss": 0.5801, "step": 2767 }, { "epoch": 1.0940479130649543, "grad_norm": 0.44428474945478147, "learning_rate": 4.97412003587211e-06, "loss": 0.5583, "step": 2768 }, { "epoch": 1.0944430723635465, "grad_norm": 0.4573957820670806, "learning_rate": 4.974097546052952e-06, "loss": 0.5731, "step": 2769 }, { "epoch": 1.0948382316621388, "grad_norm": 0.45565450583105144, "learning_rate": 4.9740750465170415e-06, "loss": 0.5809, "step": 2770 }, { "epoch": 1.095233390960731, "grad_norm": 0.4482788589021872, "learning_rate": 4.974052537264465e-06, "loss": 0.5792, "step": 2771 }, { "epoch": 1.0956285502593233, "grad_norm": 0.4461820839459798, "learning_rate": 4.974030018295314e-06, "loss": 0.5786, "step": 2772 }, { "epoch": 1.0960237095579155, "grad_norm": 0.44439297274093054, "learning_rate": 4.974007489609675e-06, "loss": 0.5887, "step": 2773 }, { "epoch": 1.0964188688565077, "grad_norm": 0.44582844653174675, "learning_rate": 4.973984951207638e-06, "loss": 0.5897, "step": 2774 }, { "epoch": 1.0968140281551, "grad_norm": 0.4553585727658302, "learning_rate": 4.9739624030892885e-06, "loss": 0.5661, "step": 2775 }, { "epoch": 1.0972091874536922, "grad_norm": 0.4479235148347027, "learning_rate": 4.973939845254718e-06, "loss": 0.5931, "step": 2776 }, { "epoch": 1.0976043467522845, "grad_norm": 0.4428344941159122, "learning_rate": 4.973917277704014e-06, "loss": 0.5598, "step": 2777 }, { "epoch": 1.0979995060508767, "grad_norm": 0.476027880314366, "learning_rate": 4.973894700437265e-06, "loss": 0.5873, "step": 2778 }, { "epoch": 1.098394665349469, "grad_norm": 0.4404118923653951, "learning_rate": 4.97387211345456e-06, "loss": 0.5493, "step": 2779 }, { "epoch": 1.0987898246480612, "grad_norm": 0.4574987015710223, "learning_rate": 4.973849516755987e-06, "loss": 0.5498, "step": 2780 }, { "epoch": 1.0991849839466534, "grad_norm": 0.45123369032502647, "learning_rate": 4.973826910341636e-06, "loss": 0.5636, "step": 2781 }, { "epoch": 1.0995801432452457, "grad_norm": 0.4617618303547862, "learning_rate": 4.973804294211595e-06, "loss": 0.5835, "step": 2782 }, { "epoch": 1.099975302543838, "grad_norm": 0.4579139636190123, "learning_rate": 4.973781668365953e-06, "loss": 0.5737, "step": 2783 }, { "epoch": 1.1003704618424301, "grad_norm": 0.4771120050290431, "learning_rate": 4.973759032804798e-06, "loss": 0.581, "step": 2784 }, { "epoch": 1.1007656211410224, "grad_norm": 0.4573835551232583, "learning_rate": 4.973736387528219e-06, "loss": 0.5536, "step": 2785 }, { "epoch": 1.1011607804396146, "grad_norm": 0.43751748320789247, "learning_rate": 4.973713732536307e-06, "loss": 0.5533, "step": 2786 }, { "epoch": 1.1015559397382069, "grad_norm": 0.4551112523172073, "learning_rate": 4.97369106782915e-06, "loss": 0.5727, "step": 2787 }, { "epoch": 1.101951099036799, "grad_norm": 0.4728095976072211, "learning_rate": 4.973668393406835e-06, "loss": 0.5828, "step": 2788 }, { "epoch": 1.1023462583353916, "grad_norm": 0.46554008898501564, "learning_rate": 4.9736457092694545e-06, "loss": 0.5754, "step": 2789 }, { "epoch": 1.1027414176339838, "grad_norm": 0.44230286912396166, "learning_rate": 4.9736230154170945e-06, "loss": 0.5659, "step": 2790 }, { "epoch": 1.103136576932576, "grad_norm": 0.44947861957297847, "learning_rate": 4.973600311849845e-06, "loss": 0.5709, "step": 2791 }, { "epoch": 1.1035317362311683, "grad_norm": 0.44653133824766644, "learning_rate": 4.973577598567797e-06, "loss": 0.5887, "step": 2792 }, { "epoch": 1.1039268955297605, "grad_norm": 0.46671707574431837, "learning_rate": 4.9735548755710374e-06, "loss": 0.5871, "step": 2793 }, { "epoch": 1.1043220548283528, "grad_norm": 0.45380131690571734, "learning_rate": 4.973532142859656e-06, "loss": 0.5517, "step": 2794 }, { "epoch": 1.104717214126945, "grad_norm": 0.47357589275123324, "learning_rate": 4.973509400433743e-06, "loss": 0.5503, "step": 2795 }, { "epoch": 1.1051123734255373, "grad_norm": 0.45961644968240184, "learning_rate": 4.9734866482933865e-06, "loss": 0.5652, "step": 2796 }, { "epoch": 1.1055075327241295, "grad_norm": 0.4625546092783896, "learning_rate": 4.973463886438676e-06, "loss": 0.5787, "step": 2797 }, { "epoch": 1.1059026920227217, "grad_norm": 0.46020081411653113, "learning_rate": 4.9734411148697025e-06, "loss": 0.5609, "step": 2798 }, { "epoch": 1.106297851321314, "grad_norm": 0.46162652087403155, "learning_rate": 4.973418333586553e-06, "loss": 0.5872, "step": 2799 }, { "epoch": 1.1066930106199062, "grad_norm": 0.44195379442185856, "learning_rate": 4.97339554258932e-06, "loss": 0.5497, "step": 2800 }, { "epoch": 1.1070881699184985, "grad_norm": 0.449613283566657, "learning_rate": 4.97337274187809e-06, "loss": 0.5603, "step": 2801 }, { "epoch": 1.1074833292170907, "grad_norm": 0.45072734141619586, "learning_rate": 4.973349931452953e-06, "loss": 0.5577, "step": 2802 }, { "epoch": 1.107878488515683, "grad_norm": 0.4674608199843895, "learning_rate": 4.973327111314e-06, "loss": 0.5786, "step": 2803 }, { "epoch": 1.1082736478142752, "grad_norm": 0.4630627894242256, "learning_rate": 4.9733042814613205e-06, "loss": 0.5735, "step": 2804 }, { "epoch": 1.1086688071128674, "grad_norm": 0.4583380745367057, "learning_rate": 4.973281441895004e-06, "loss": 0.56, "step": 2805 }, { "epoch": 1.1090639664114597, "grad_norm": 0.4733069612546895, "learning_rate": 4.973258592615139e-06, "loss": 0.5684, "step": 2806 }, { "epoch": 1.109459125710052, "grad_norm": 0.454537320554346, "learning_rate": 4.973235733621816e-06, "loss": 0.5611, "step": 2807 }, { "epoch": 1.1098542850086441, "grad_norm": 0.46762331052618233, "learning_rate": 4.973212864915126e-06, "loss": 0.5712, "step": 2808 }, { "epoch": 1.1102494443072364, "grad_norm": 0.46039286243703786, "learning_rate": 4.973189986495157e-06, "loss": 0.5706, "step": 2809 }, { "epoch": 1.1106446036058286, "grad_norm": 0.45645844421382126, "learning_rate": 4.973167098362e-06, "loss": 0.5969, "step": 2810 }, { "epoch": 1.1110397629044209, "grad_norm": 0.44815006924446793, "learning_rate": 4.973144200515742e-06, "loss": 0.5622, "step": 2811 }, { "epoch": 1.111434922203013, "grad_norm": 0.45673663581682494, "learning_rate": 4.9731212929564785e-06, "loss": 0.5713, "step": 2812 }, { "epoch": 1.1118300815016053, "grad_norm": 0.4410699289674135, "learning_rate": 4.973098375684295e-06, "loss": 0.5572, "step": 2813 }, { "epoch": 1.1122252408001976, "grad_norm": 0.45953779864395006, "learning_rate": 4.973075448699283e-06, "loss": 0.5721, "step": 2814 }, { "epoch": 1.1126204000987898, "grad_norm": 0.48969886056398254, "learning_rate": 4.973052512001532e-06, "loss": 0.5961, "step": 2815 }, { "epoch": 1.113015559397382, "grad_norm": 0.4582083675335303, "learning_rate": 4.9730295655911325e-06, "loss": 0.5664, "step": 2816 }, { "epoch": 1.1134107186959743, "grad_norm": 0.45873792204035635, "learning_rate": 4.973006609468175e-06, "loss": 0.5798, "step": 2817 }, { "epoch": 1.1138058779945665, "grad_norm": 0.4595469818479189, "learning_rate": 4.9729836436327486e-06, "loss": 0.5583, "step": 2818 }, { "epoch": 1.1142010372931588, "grad_norm": 0.4660954028217541, "learning_rate": 4.972960668084945e-06, "loss": 0.586, "step": 2819 }, { "epoch": 1.114596196591751, "grad_norm": 0.45385089149228297, "learning_rate": 4.972937682824853e-06, "loss": 0.5617, "step": 2820 }, { "epoch": 1.1149913558903433, "grad_norm": 0.4541868015985183, "learning_rate": 4.972914687852564e-06, "loss": 0.5709, "step": 2821 }, { "epoch": 1.1153865151889355, "grad_norm": 0.4712498210514924, "learning_rate": 4.972891683168166e-06, "loss": 0.5643, "step": 2822 }, { "epoch": 1.1157816744875277, "grad_norm": 0.4528912615721515, "learning_rate": 4.9728686687717534e-06, "loss": 0.5677, "step": 2823 }, { "epoch": 1.11617683378612, "grad_norm": 0.5454379549098186, "learning_rate": 4.972845644663412e-06, "loss": 0.5848, "step": 2824 }, { "epoch": 1.1165719930847122, "grad_norm": 0.46204902182734076, "learning_rate": 4.972822610843236e-06, "loss": 0.5509, "step": 2825 }, { "epoch": 1.1169671523833045, "grad_norm": 0.45981650702917953, "learning_rate": 4.972799567311314e-06, "loss": 0.5589, "step": 2826 }, { "epoch": 1.1173623116818967, "grad_norm": 0.4748451628776366, "learning_rate": 4.9727765140677374e-06, "loss": 0.5735, "step": 2827 }, { "epoch": 1.117757470980489, "grad_norm": 0.43824092167576956, "learning_rate": 4.972753451112596e-06, "loss": 0.5595, "step": 2828 }, { "epoch": 1.1181526302790812, "grad_norm": 0.4532662779925716, "learning_rate": 4.97273037844598e-06, "loss": 0.5644, "step": 2829 }, { "epoch": 1.1185477895776734, "grad_norm": 0.45748152598351954, "learning_rate": 4.972707296067981e-06, "loss": 0.5677, "step": 2830 }, { "epoch": 1.1189429488762657, "grad_norm": 0.4588021846664917, "learning_rate": 4.972684203978689e-06, "loss": 0.5741, "step": 2831 }, { "epoch": 1.119338108174858, "grad_norm": 0.4571577453972498, "learning_rate": 4.972661102178196e-06, "loss": 0.5521, "step": 2832 }, { "epoch": 1.1197332674734501, "grad_norm": 0.4584124940006824, "learning_rate": 4.97263799066659e-06, "loss": 0.555, "step": 2833 }, { "epoch": 1.1201284267720424, "grad_norm": 0.4768334321839287, "learning_rate": 4.972614869443965e-06, "loss": 0.5554, "step": 2834 }, { "epoch": 1.1205235860706346, "grad_norm": 0.45878358249431184, "learning_rate": 4.972591738510409e-06, "loss": 0.5652, "step": 2835 }, { "epoch": 1.1209187453692269, "grad_norm": 0.4603002756472343, "learning_rate": 4.972568597866014e-06, "loss": 0.5706, "step": 2836 }, { "epoch": 1.121313904667819, "grad_norm": 0.4331713760224906, "learning_rate": 4.9725454475108714e-06, "loss": 0.5544, "step": 2837 }, { "epoch": 1.1217090639664113, "grad_norm": 0.4527006951431627, "learning_rate": 4.972522287445072e-06, "loss": 0.5435, "step": 2838 }, { "epoch": 1.1221042232650038, "grad_norm": 0.4757703333155185, "learning_rate": 4.972499117668707e-06, "loss": 0.5851, "step": 2839 }, { "epoch": 1.122499382563596, "grad_norm": 0.45557041972406903, "learning_rate": 4.972475938181866e-06, "loss": 0.5624, "step": 2840 }, { "epoch": 1.1228945418621883, "grad_norm": 0.4512928810062557, "learning_rate": 4.972452748984641e-06, "loss": 0.5485, "step": 2841 }, { "epoch": 1.1232897011607805, "grad_norm": 0.47660422203668557, "learning_rate": 4.972429550077122e-06, "loss": 0.5785, "step": 2842 }, { "epoch": 1.1236848604593728, "grad_norm": 0.45195553481987427, "learning_rate": 4.972406341459403e-06, "loss": 0.5754, "step": 2843 }, { "epoch": 1.124080019757965, "grad_norm": 0.4692003584527425, "learning_rate": 4.972383123131572e-06, "loss": 0.5713, "step": 2844 }, { "epoch": 1.1244751790565573, "grad_norm": 0.46001788143359484, "learning_rate": 4.9723598950937216e-06, "loss": 0.5527, "step": 2845 }, { "epoch": 1.1248703383551495, "grad_norm": 0.4530700630207127, "learning_rate": 4.972336657345943e-06, "loss": 0.5762, "step": 2846 }, { "epoch": 1.1252654976537417, "grad_norm": 0.4814747648947358, "learning_rate": 4.972313409888327e-06, "loss": 0.5757, "step": 2847 }, { "epoch": 1.125660656952334, "grad_norm": 0.4644316998225435, "learning_rate": 4.972290152720965e-06, "loss": 0.5932, "step": 2848 }, { "epoch": 1.1260558162509262, "grad_norm": 0.43522985761057875, "learning_rate": 4.97226688584395e-06, "loss": 0.548, "step": 2849 }, { "epoch": 1.1264509755495185, "grad_norm": 0.44885828698030505, "learning_rate": 4.97224360925737e-06, "loss": 0.5673, "step": 2850 }, { "epoch": 1.1268461348481107, "grad_norm": 0.4546348191990088, "learning_rate": 4.972220322961318e-06, "loss": 0.5717, "step": 2851 }, { "epoch": 1.127241294146703, "grad_norm": 0.45766425039161684, "learning_rate": 4.972197026955888e-06, "loss": 0.5738, "step": 2852 }, { "epoch": 1.1276364534452952, "grad_norm": 0.48259567077817345, "learning_rate": 4.972173721241168e-06, "loss": 0.5711, "step": 2853 }, { "epoch": 1.1280316127438874, "grad_norm": 0.4465075582537828, "learning_rate": 4.972150405817251e-06, "loss": 0.5608, "step": 2854 }, { "epoch": 1.1284267720424797, "grad_norm": 0.4494060907226681, "learning_rate": 4.972127080684228e-06, "loss": 0.5597, "step": 2855 }, { "epoch": 1.128821931341072, "grad_norm": 0.45776592151810525, "learning_rate": 4.972103745842192e-06, "loss": 0.5696, "step": 2856 }, { "epoch": 1.1292170906396641, "grad_norm": 0.4517897855267896, "learning_rate": 4.9720804012912325e-06, "loss": 0.5597, "step": 2857 }, { "epoch": 1.1296122499382564, "grad_norm": 0.45557106783434753, "learning_rate": 4.9720570470314435e-06, "loss": 0.585, "step": 2858 }, { "epoch": 1.1300074092368486, "grad_norm": 0.44670409565234737, "learning_rate": 4.9720336830629145e-06, "loss": 0.5609, "step": 2859 }, { "epoch": 1.1304025685354409, "grad_norm": 0.4616073635588755, "learning_rate": 4.972010309385739e-06, "loss": 0.5669, "step": 2860 }, { "epoch": 1.130797727834033, "grad_norm": 0.48898771950037234, "learning_rate": 4.971986926000008e-06, "loss": 0.5781, "step": 2861 }, { "epoch": 1.1311928871326253, "grad_norm": 0.47129122376722427, "learning_rate": 4.971963532905812e-06, "loss": 0.5856, "step": 2862 }, { "epoch": 1.1315880464312176, "grad_norm": 0.4367114279377543, "learning_rate": 4.971940130103245e-06, "loss": 0.5498, "step": 2863 }, { "epoch": 1.1319832057298098, "grad_norm": 0.45445984797614775, "learning_rate": 4.9719167175924e-06, "loss": 0.5585, "step": 2864 }, { "epoch": 1.132378365028402, "grad_norm": 0.47288776531263177, "learning_rate": 4.971893295373366e-06, "loss": 0.5699, "step": 2865 }, { "epoch": 1.1327735243269943, "grad_norm": 0.46037768185257905, "learning_rate": 4.971869863446235e-06, "loss": 0.555, "step": 2866 }, { "epoch": 1.1331686836255865, "grad_norm": 0.4409672704352513, "learning_rate": 4.971846421811101e-06, "loss": 0.5741, "step": 2867 }, { "epoch": 1.1335638429241788, "grad_norm": 0.4735947826477942, "learning_rate": 4.971822970468056e-06, "loss": 0.5751, "step": 2868 }, { "epoch": 1.133959002222771, "grad_norm": 0.4985409552375511, "learning_rate": 4.97179950941719e-06, "loss": 0.5831, "step": 2869 }, { "epoch": 1.1343541615213633, "grad_norm": 0.44396729751506975, "learning_rate": 4.971776038658598e-06, "loss": 0.5666, "step": 2870 }, { "epoch": 1.1347493208199555, "grad_norm": 0.46267223426001386, "learning_rate": 4.97175255819237e-06, "loss": 0.5596, "step": 2871 }, { "epoch": 1.1351444801185477, "grad_norm": 0.46568413072052744, "learning_rate": 4.9717290680185985e-06, "loss": 0.5686, "step": 2872 }, { "epoch": 1.13553963941714, "grad_norm": 0.4673886583205963, "learning_rate": 4.971705568137376e-06, "loss": 0.5756, "step": 2873 }, { "epoch": 1.1359347987157322, "grad_norm": 0.45942651811103113, "learning_rate": 4.971682058548795e-06, "loss": 0.5676, "step": 2874 }, { "epoch": 1.1363299580143245, "grad_norm": 0.45192092503324155, "learning_rate": 4.971658539252948e-06, "loss": 0.5729, "step": 2875 }, { "epoch": 1.1367251173129167, "grad_norm": 0.4428557809778483, "learning_rate": 4.971635010249928e-06, "loss": 0.5751, "step": 2876 }, { "epoch": 1.137120276611509, "grad_norm": 0.4506504247352054, "learning_rate": 4.971611471539826e-06, "loss": 0.5628, "step": 2877 }, { "epoch": 1.1375154359101012, "grad_norm": 0.45545331444170223, "learning_rate": 4.971587923122734e-06, "loss": 0.5642, "step": 2878 }, { "epoch": 1.1379105952086934, "grad_norm": 0.479413221980225, "learning_rate": 4.971564364998747e-06, "loss": 0.5749, "step": 2879 }, { "epoch": 1.1383057545072857, "grad_norm": 0.4439935558628392, "learning_rate": 4.971540797167954e-06, "loss": 0.5599, "step": 2880 }, { "epoch": 1.138700913805878, "grad_norm": 0.4444069130578539, "learning_rate": 4.971517219630451e-06, "loss": 0.5862, "step": 2881 }, { "epoch": 1.1390960731044704, "grad_norm": 0.4552609885225358, "learning_rate": 4.971493632386329e-06, "loss": 0.5575, "step": 2882 }, { "epoch": 1.1394912324030626, "grad_norm": 0.4382492830615158, "learning_rate": 4.97147003543568e-06, "loss": 0.5589, "step": 2883 }, { "epoch": 1.1398863917016548, "grad_norm": 0.4437390367384225, "learning_rate": 4.971446428778599e-06, "loss": 0.5714, "step": 2884 }, { "epoch": 1.140281551000247, "grad_norm": 0.4456567659509046, "learning_rate": 4.971422812415176e-06, "loss": 0.5526, "step": 2885 }, { "epoch": 1.1406767102988393, "grad_norm": 0.44730543435927383, "learning_rate": 4.971399186345505e-06, "loss": 0.5579, "step": 2886 }, { "epoch": 1.1410718695974316, "grad_norm": 0.45573671820566486, "learning_rate": 4.97137555056968e-06, "loss": 0.56, "step": 2887 }, { "epoch": 1.1414670288960238, "grad_norm": 0.4505940700048066, "learning_rate": 4.971351905087791e-06, "loss": 0.5677, "step": 2888 }, { "epoch": 1.141862188194616, "grad_norm": 0.44421960402517957, "learning_rate": 4.971328249899932e-06, "loss": 0.5494, "step": 2889 }, { "epoch": 1.1422573474932083, "grad_norm": 0.4494388053299841, "learning_rate": 4.971304585006198e-06, "loss": 0.5808, "step": 2890 }, { "epoch": 1.1426525067918005, "grad_norm": 0.454206814685918, "learning_rate": 4.971280910406679e-06, "loss": 0.5647, "step": 2891 }, { "epoch": 1.1430476660903928, "grad_norm": 0.4570937269415416, "learning_rate": 4.971257226101469e-06, "loss": 0.5645, "step": 2892 }, { "epoch": 1.143442825388985, "grad_norm": 0.4424306427628629, "learning_rate": 4.971233532090662e-06, "loss": 0.5617, "step": 2893 }, { "epoch": 1.1438379846875772, "grad_norm": 0.43897777777159125, "learning_rate": 4.97120982837435e-06, "loss": 0.5649, "step": 2894 }, { "epoch": 1.1442331439861695, "grad_norm": 0.44861248994251357, "learning_rate": 4.971186114952628e-06, "loss": 0.565, "step": 2895 }, { "epoch": 1.1446283032847617, "grad_norm": 0.47001095027879825, "learning_rate": 4.971162391825586e-06, "loss": 0.5803, "step": 2896 }, { "epoch": 1.145023462583354, "grad_norm": 0.44454980270407224, "learning_rate": 4.971138658993318e-06, "loss": 0.561, "step": 2897 }, { "epoch": 1.1454186218819462, "grad_norm": 0.4467127010573676, "learning_rate": 4.971114916455919e-06, "loss": 0.5531, "step": 2898 }, { "epoch": 1.1458137811805384, "grad_norm": 0.46879881901374293, "learning_rate": 4.9710911642134805e-06, "loss": 0.5943, "step": 2899 }, { "epoch": 1.1462089404791307, "grad_norm": 0.46048699268271154, "learning_rate": 4.971067402266097e-06, "loss": 0.5785, "step": 2900 }, { "epoch": 1.146604099777723, "grad_norm": 0.4578268867042688, "learning_rate": 4.971043630613861e-06, "loss": 0.5627, "step": 2901 }, { "epoch": 1.1469992590763152, "grad_norm": 0.44834695982191136, "learning_rate": 4.971019849256866e-06, "loss": 0.5596, "step": 2902 }, { "epoch": 1.1473944183749074, "grad_norm": 0.4447032230484451, "learning_rate": 4.970996058195206e-06, "loss": 0.555, "step": 2903 }, { "epoch": 1.1477895776734997, "grad_norm": 0.4655101994383645, "learning_rate": 4.970972257428973e-06, "loss": 0.5904, "step": 2904 }, { "epoch": 1.148184736972092, "grad_norm": 0.48666828141831653, "learning_rate": 4.970948446958262e-06, "loss": 0.5872, "step": 2905 }, { "epoch": 1.1485798962706841, "grad_norm": 0.45560221158301, "learning_rate": 4.970924626783165e-06, "loss": 0.572, "step": 2906 }, { "epoch": 1.1489750555692764, "grad_norm": 0.4542327270574395, "learning_rate": 4.970900796903778e-06, "loss": 0.5807, "step": 2907 }, { "epoch": 1.1493702148678686, "grad_norm": 0.45302601284820004, "learning_rate": 4.970876957320193e-06, "loss": 0.5556, "step": 2908 }, { "epoch": 1.1497653741664609, "grad_norm": 0.45085641190037123, "learning_rate": 4.970853108032503e-06, "loss": 0.553, "step": 2909 }, { "epoch": 1.150160533465053, "grad_norm": 0.44311699037774255, "learning_rate": 4.970829249040803e-06, "loss": 0.5783, "step": 2910 }, { "epoch": 1.1505556927636453, "grad_norm": 0.4565247742626593, "learning_rate": 4.970805380345186e-06, "loss": 0.5668, "step": 2911 }, { "epoch": 1.1509508520622376, "grad_norm": 0.442670058390068, "learning_rate": 4.970781501945745e-06, "loss": 0.5804, "step": 2912 }, { "epoch": 1.1513460113608298, "grad_norm": 0.4587903036700737, "learning_rate": 4.970757613842575e-06, "loss": 0.5436, "step": 2913 }, { "epoch": 1.151741170659422, "grad_norm": 0.4605719839378815, "learning_rate": 4.970733716035769e-06, "loss": 0.5708, "step": 2914 }, { "epoch": 1.1521363299580143, "grad_norm": 0.4811516114353964, "learning_rate": 4.970709808525423e-06, "loss": 0.5695, "step": 2915 }, { "epoch": 1.1525314892566065, "grad_norm": 0.4465598040548844, "learning_rate": 4.970685891311627e-06, "loss": 0.5585, "step": 2916 }, { "epoch": 1.1529266485551988, "grad_norm": 0.4482126904622925, "learning_rate": 4.970661964394479e-06, "loss": 0.5531, "step": 2917 }, { "epoch": 1.153321807853791, "grad_norm": 0.45465037670607433, "learning_rate": 4.97063802777407e-06, "loss": 0.5552, "step": 2918 }, { "epoch": 1.1537169671523833, "grad_norm": 0.44832182812879506, "learning_rate": 4.970614081450495e-06, "loss": 0.5564, "step": 2919 }, { "epoch": 1.1541121264509755, "grad_norm": 0.45584014338048706, "learning_rate": 4.9705901254238485e-06, "loss": 0.5569, "step": 2920 }, { "epoch": 1.1545072857495677, "grad_norm": 0.44834971508394056, "learning_rate": 4.970566159694224e-06, "loss": 0.5461, "step": 2921 }, { "epoch": 1.15490244504816, "grad_norm": 0.4578941775430072, "learning_rate": 4.970542184261716e-06, "loss": 0.5668, "step": 2922 }, { "epoch": 1.1552976043467522, "grad_norm": 0.4817857268031156, "learning_rate": 4.9705181991264185e-06, "loss": 0.5776, "step": 2923 }, { "epoch": 1.1556927636453445, "grad_norm": 0.4608535112462551, "learning_rate": 4.9704942042884256e-06, "loss": 0.5665, "step": 2924 }, { "epoch": 1.1560879229439367, "grad_norm": 0.45851740053649265, "learning_rate": 4.970470199747831e-06, "loss": 0.5699, "step": 2925 }, { "epoch": 1.156483082242529, "grad_norm": 0.47180228641422883, "learning_rate": 4.97044618550473e-06, "loss": 0.572, "step": 2926 }, { "epoch": 1.1568782415411212, "grad_norm": 0.47156814207993075, "learning_rate": 4.970422161559217e-06, "loss": 0.5437, "step": 2927 }, { "epoch": 1.1572734008397134, "grad_norm": 0.4543173767246186, "learning_rate": 4.970398127911386e-06, "loss": 0.5796, "step": 2928 }, { "epoch": 1.1576685601383057, "grad_norm": 0.4467556140355343, "learning_rate": 4.97037408456133e-06, "loss": 0.5739, "step": 2929 }, { "epoch": 1.158063719436898, "grad_norm": 0.4572687150991262, "learning_rate": 4.970350031509146e-06, "loss": 0.5789, "step": 2930 }, { "epoch": 1.1584588787354901, "grad_norm": 0.46752603817680133, "learning_rate": 4.970325968754926e-06, "loss": 0.5902, "step": 2931 }, { "epoch": 1.1588540380340824, "grad_norm": 0.44878914889504196, "learning_rate": 4.970301896298767e-06, "loss": 0.5721, "step": 2932 }, { "epoch": 1.1592491973326746, "grad_norm": 0.4485973988983018, "learning_rate": 4.9702778141407615e-06, "loss": 0.5553, "step": 2933 }, { "epoch": 1.1596443566312669, "grad_norm": 0.46028448862594296, "learning_rate": 4.970253722281006e-06, "loss": 0.5714, "step": 2934 }, { "epoch": 1.160039515929859, "grad_norm": 0.43839159073806405, "learning_rate": 4.970229620719592e-06, "loss": 0.5682, "step": 2935 }, { "epoch": 1.1604346752284516, "grad_norm": 0.4502203330745019, "learning_rate": 4.970205509456617e-06, "loss": 0.5633, "step": 2936 }, { "epoch": 1.1608298345270438, "grad_norm": 0.4577454214170737, "learning_rate": 4.970181388492174e-06, "loss": 0.56, "step": 2937 }, { "epoch": 1.161224993825636, "grad_norm": 0.5137258014666423, "learning_rate": 4.970157257826359e-06, "loss": 0.5807, "step": 2938 }, { "epoch": 1.1616201531242283, "grad_norm": 0.456240539508233, "learning_rate": 4.970133117459266e-06, "loss": 0.5698, "step": 2939 }, { "epoch": 1.1620153124228205, "grad_norm": 0.4571385214359614, "learning_rate": 4.9701089673909905e-06, "loss": 0.5712, "step": 2940 }, { "epoch": 1.1624104717214128, "grad_norm": 0.43698412892461164, "learning_rate": 4.970084807621627e-06, "loss": 0.5518, "step": 2941 }, { "epoch": 1.162805631020005, "grad_norm": 0.4508512464344563, "learning_rate": 4.97006063815127e-06, "loss": 0.5543, "step": 2942 }, { "epoch": 1.1632007903185972, "grad_norm": 0.444206994150076, "learning_rate": 4.970036458980014e-06, "loss": 0.5581, "step": 2943 }, { "epoch": 1.1635959496171895, "grad_norm": 0.4524722456542353, "learning_rate": 4.9700122701079566e-06, "loss": 0.5542, "step": 2944 }, { "epoch": 1.1639911089157817, "grad_norm": 0.44578046719303255, "learning_rate": 4.969988071535189e-06, "loss": 0.5725, "step": 2945 }, { "epoch": 1.164386268214374, "grad_norm": 0.4549968477995691, "learning_rate": 4.969963863261808e-06, "loss": 0.5848, "step": 2946 }, { "epoch": 1.1647814275129662, "grad_norm": 0.4552555332588111, "learning_rate": 4.969939645287911e-06, "loss": 0.557, "step": 2947 }, { "epoch": 1.1651765868115584, "grad_norm": 0.4750098537013762, "learning_rate": 4.969915417613589e-06, "loss": 0.5864, "step": 2948 }, { "epoch": 1.1655717461101507, "grad_norm": 0.45082337345793494, "learning_rate": 4.96989118023894e-06, "loss": 0.5866, "step": 2949 }, { "epoch": 1.165966905408743, "grad_norm": 0.4455097203171396, "learning_rate": 4.969866933164057e-06, "loss": 0.5607, "step": 2950 }, { "epoch": 1.1663620647073352, "grad_norm": 0.4489253199947083, "learning_rate": 4.969842676389038e-06, "loss": 0.5662, "step": 2951 }, { "epoch": 1.1667572240059274, "grad_norm": 0.46293264922861316, "learning_rate": 4.969818409913976e-06, "loss": 0.5551, "step": 2952 }, { "epoch": 1.1671523833045196, "grad_norm": 0.45718754430330005, "learning_rate": 4.969794133738967e-06, "loss": 0.5541, "step": 2953 }, { "epoch": 1.1675475426031119, "grad_norm": 0.45394398829100463, "learning_rate": 4.9697698478641056e-06, "loss": 0.5543, "step": 2954 }, { "epoch": 1.1679427019017041, "grad_norm": 0.4580575806358461, "learning_rate": 4.969745552289489e-06, "loss": 0.5648, "step": 2955 }, { "epoch": 1.1683378612002964, "grad_norm": 0.45929415970435655, "learning_rate": 4.969721247015212e-06, "loss": 0.5802, "step": 2956 }, { "epoch": 1.1687330204988886, "grad_norm": 0.43706127656723726, "learning_rate": 4.969696932041369e-06, "loss": 0.5658, "step": 2957 }, { "epoch": 1.1691281797974808, "grad_norm": 0.45589757018205523, "learning_rate": 4.969672607368056e-06, "loss": 0.5451, "step": 2958 }, { "epoch": 1.169523339096073, "grad_norm": 0.4453274113893662, "learning_rate": 4.969648272995368e-06, "loss": 0.5651, "step": 2959 }, { "epoch": 1.1699184983946653, "grad_norm": 0.4422585180450584, "learning_rate": 4.9696239289234025e-06, "loss": 0.5513, "step": 2960 }, { "epoch": 1.1703136576932576, "grad_norm": 0.4609612248644218, "learning_rate": 4.969599575152253e-06, "loss": 0.579, "step": 2961 }, { "epoch": 1.1707088169918498, "grad_norm": 0.46053331483814797, "learning_rate": 4.969575211682016e-06, "loss": 0.5456, "step": 2962 }, { "epoch": 1.171103976290442, "grad_norm": 0.46253809484884606, "learning_rate": 4.969550838512787e-06, "loss": 0.5738, "step": 2963 }, { "epoch": 1.1714991355890343, "grad_norm": 0.45757469040052795, "learning_rate": 4.969526455644664e-06, "loss": 0.5835, "step": 2964 }, { "epoch": 1.1718942948876265, "grad_norm": 0.4649235194612874, "learning_rate": 4.969502063077738e-06, "loss": 0.5656, "step": 2965 }, { "epoch": 1.1722894541862188, "grad_norm": 0.4529442270814776, "learning_rate": 4.969477660812108e-06, "loss": 0.5475, "step": 2966 }, { "epoch": 1.172684613484811, "grad_norm": 0.46009271161789567, "learning_rate": 4.969453248847871e-06, "loss": 0.5638, "step": 2967 }, { "epoch": 1.1730797727834033, "grad_norm": 0.4543848817490557, "learning_rate": 4.96942882718512e-06, "loss": 0.556, "step": 2968 }, { "epoch": 1.1734749320819955, "grad_norm": 0.4486250704854233, "learning_rate": 4.9694043958239515e-06, "loss": 0.5475, "step": 2969 }, { "epoch": 1.1738700913805877, "grad_norm": 0.475212385037125, "learning_rate": 4.969379954764463e-06, "loss": 0.5694, "step": 2970 }, { "epoch": 1.17426525067918, "grad_norm": 0.46354530802940175, "learning_rate": 4.969355504006749e-06, "loss": 0.5693, "step": 2971 }, { "epoch": 1.1746604099777722, "grad_norm": 0.4654693704390786, "learning_rate": 4.969331043550907e-06, "loss": 0.5821, "step": 2972 }, { "epoch": 1.1750555692763645, "grad_norm": 0.45975428248741407, "learning_rate": 4.969306573397031e-06, "loss": 0.5763, "step": 2973 }, { "epoch": 1.1754507285749567, "grad_norm": 0.4567317408231308, "learning_rate": 4.969282093545218e-06, "loss": 0.5785, "step": 2974 }, { "epoch": 1.175845887873549, "grad_norm": 0.44978056274129696, "learning_rate": 4.969257603995566e-06, "loss": 0.5624, "step": 2975 }, { "epoch": 1.1762410471721412, "grad_norm": 0.4535109810307646, "learning_rate": 4.969233104748168e-06, "loss": 0.5568, "step": 2976 }, { "epoch": 1.1766362064707336, "grad_norm": 0.512694154610389, "learning_rate": 4.9692085958031225e-06, "loss": 0.5537, "step": 2977 }, { "epoch": 1.1770313657693259, "grad_norm": 0.4657562385746197, "learning_rate": 4.969184077160524e-06, "loss": 0.5754, "step": 2978 }, { "epoch": 1.1774265250679181, "grad_norm": 0.4676379011730835, "learning_rate": 4.96915954882047e-06, "loss": 0.5763, "step": 2979 }, { "epoch": 1.1778216843665104, "grad_norm": 0.44433392586627835, "learning_rate": 4.9691350107830575e-06, "loss": 0.5544, "step": 2980 }, { "epoch": 1.1782168436651026, "grad_norm": 0.4655458760369557, "learning_rate": 4.9691104630483825e-06, "loss": 0.5511, "step": 2981 }, { "epoch": 1.1786120029636948, "grad_norm": 0.4685666784171711, "learning_rate": 4.96908590561654e-06, "loss": 0.5601, "step": 2982 }, { "epoch": 1.179007162262287, "grad_norm": 0.46307465210699217, "learning_rate": 4.969061338487627e-06, "loss": 0.5804, "step": 2983 }, { "epoch": 1.1794023215608793, "grad_norm": 0.4751945587922622, "learning_rate": 4.969036761661741e-06, "loss": 0.5761, "step": 2984 }, { "epoch": 1.1797974808594716, "grad_norm": 0.4354064786650696, "learning_rate": 4.969012175138978e-06, "loss": 0.5496, "step": 2985 }, { "epoch": 1.1801926401580638, "grad_norm": 0.4410526844854051, "learning_rate": 4.968987578919434e-06, "loss": 0.574, "step": 2986 }, { "epoch": 1.180587799456656, "grad_norm": 0.5073421248174037, "learning_rate": 4.9689629730032065e-06, "loss": 0.5713, "step": 2987 }, { "epoch": 1.1809829587552483, "grad_norm": 0.45094193815320693, "learning_rate": 4.968938357390391e-06, "loss": 0.5618, "step": 2988 }, { "epoch": 1.1813781180538405, "grad_norm": 0.45984337128911706, "learning_rate": 4.968913732081085e-06, "loss": 0.5518, "step": 2989 }, { "epoch": 1.1817732773524328, "grad_norm": 0.44962428315636743, "learning_rate": 4.968889097075386e-06, "loss": 0.5603, "step": 2990 }, { "epoch": 1.182168436651025, "grad_norm": 0.4791783985253456, "learning_rate": 4.968864452373388e-06, "loss": 0.5695, "step": 2991 }, { "epoch": 1.1825635959496172, "grad_norm": 0.46504234897652474, "learning_rate": 4.968839797975192e-06, "loss": 0.57, "step": 2992 }, { "epoch": 1.1829587552482095, "grad_norm": 0.46532115508012195, "learning_rate": 4.96881513388089e-06, "loss": 0.5848, "step": 2993 }, { "epoch": 1.1833539145468017, "grad_norm": 0.4736857697301262, "learning_rate": 4.968790460090584e-06, "loss": 0.5536, "step": 2994 }, { "epoch": 1.183749073845394, "grad_norm": 0.47327160279398156, "learning_rate": 4.968765776604366e-06, "loss": 0.59, "step": 2995 }, { "epoch": 1.1841442331439862, "grad_norm": 0.44968273792335417, "learning_rate": 4.968741083422335e-06, "loss": 0.5602, "step": 2996 }, { "epoch": 1.1845393924425784, "grad_norm": 0.47160397918194813, "learning_rate": 4.968716380544589e-06, "loss": 0.578, "step": 2997 }, { "epoch": 1.1849345517411707, "grad_norm": 0.4674565580152445, "learning_rate": 4.968691667971224e-06, "loss": 0.565, "step": 2998 }, { "epoch": 1.185329711039763, "grad_norm": 0.45935108094182536, "learning_rate": 4.968666945702338e-06, "loss": 0.5511, "step": 2999 }, { "epoch": 1.1857248703383552, "grad_norm": 0.4551811442826113, "learning_rate": 4.9686422137380265e-06, "loss": 0.5646, "step": 3000 }, { "epoch": 1.1861200296369474, "grad_norm": 0.45211507022800734, "learning_rate": 4.968617472078388e-06, "loss": 0.5573, "step": 3001 }, { "epoch": 1.1865151889355396, "grad_norm": 0.451017086480957, "learning_rate": 4.968592720723518e-06, "loss": 0.5851, "step": 3002 }, { "epoch": 1.1869103482341319, "grad_norm": 0.45071015068421694, "learning_rate": 4.968567959673515e-06, "loss": 0.5484, "step": 3003 }, { "epoch": 1.1873055075327241, "grad_norm": 0.47688770951933446, "learning_rate": 4.968543188928476e-06, "loss": 0.577, "step": 3004 }, { "epoch": 1.1877006668313164, "grad_norm": 0.43827997699657956, "learning_rate": 4.9685184084885e-06, "loss": 0.5566, "step": 3005 }, { "epoch": 1.1880958261299086, "grad_norm": 0.4566633468170921, "learning_rate": 4.968493618353681e-06, "loss": 0.5539, "step": 3006 }, { "epoch": 1.1884909854285008, "grad_norm": 0.46120055132005444, "learning_rate": 4.968468818524118e-06, "loss": 0.5704, "step": 3007 }, { "epoch": 1.188886144727093, "grad_norm": 0.44778171241028825, "learning_rate": 4.968444008999909e-06, "loss": 0.5855, "step": 3008 }, { "epoch": 1.1892813040256853, "grad_norm": 0.4750224432409252, "learning_rate": 4.9684191897811505e-06, "loss": 0.5565, "step": 3009 }, { "epoch": 1.1896764633242776, "grad_norm": 0.4458948426804506, "learning_rate": 4.968394360867941e-06, "loss": 0.5585, "step": 3010 }, { "epoch": 1.1900716226228698, "grad_norm": 0.46227629169335344, "learning_rate": 4.968369522260377e-06, "loss": 0.5754, "step": 3011 }, { "epoch": 1.190466781921462, "grad_norm": 0.46386108301772255, "learning_rate": 4.968344673958556e-06, "loss": 0.5743, "step": 3012 }, { "epoch": 1.1908619412200543, "grad_norm": 0.45605726481124903, "learning_rate": 4.968319815962577e-06, "loss": 0.5698, "step": 3013 }, { "epoch": 1.1912571005186465, "grad_norm": 0.4430878703628007, "learning_rate": 4.968294948272535e-06, "loss": 0.5432, "step": 3014 }, { "epoch": 1.1916522598172388, "grad_norm": 0.45206251975568795, "learning_rate": 4.96827007088853e-06, "loss": 0.571, "step": 3015 }, { "epoch": 1.192047419115831, "grad_norm": 0.4552471461255095, "learning_rate": 4.968245183810659e-06, "loss": 0.5691, "step": 3016 }, { "epoch": 1.1924425784144232, "grad_norm": 0.4432473462755384, "learning_rate": 4.968220287039021e-06, "loss": 0.5401, "step": 3017 }, { "epoch": 1.1928377377130155, "grad_norm": 0.44587530242670237, "learning_rate": 4.9681953805737106e-06, "loss": 0.5461, "step": 3018 }, { "epoch": 1.1932328970116077, "grad_norm": 0.47050677473839214, "learning_rate": 4.968170464414828e-06, "loss": 0.563, "step": 3019 }, { "epoch": 1.1936280563102, "grad_norm": 0.44130303947137217, "learning_rate": 4.968145538562471e-06, "loss": 0.5693, "step": 3020 }, { "epoch": 1.1940232156087922, "grad_norm": 0.4544886703368673, "learning_rate": 4.968120603016737e-06, "loss": 0.5653, "step": 3021 }, { "epoch": 1.1944183749073845, "grad_norm": 0.4896624605231788, "learning_rate": 4.968095657777724e-06, "loss": 0.5822, "step": 3022 }, { "epoch": 1.1948135342059767, "grad_norm": 0.4703893538726537, "learning_rate": 4.968070702845529e-06, "loss": 0.5384, "step": 3023 }, { "epoch": 1.195208693504569, "grad_norm": 0.5189597430778528, "learning_rate": 4.968045738220252e-06, "loss": 0.578, "step": 3024 }, { "epoch": 1.1956038528031612, "grad_norm": 0.44433555063115776, "learning_rate": 4.96802076390199e-06, "loss": 0.5672, "step": 3025 }, { "epoch": 1.1959990121017534, "grad_norm": 0.44690653875268826, "learning_rate": 4.96799577989084e-06, "loss": 0.5483, "step": 3026 }, { "epoch": 1.1963941714003457, "grad_norm": 0.4456693441690437, "learning_rate": 4.967970786186903e-06, "loss": 0.5917, "step": 3027 }, { "epoch": 1.196789330698938, "grad_norm": 0.4656615654209235, "learning_rate": 4.967945782790275e-06, "loss": 0.6074, "step": 3028 }, { "epoch": 1.1971844899975301, "grad_norm": 0.4679283421472829, "learning_rate": 4.967920769701053e-06, "loss": 0.5562, "step": 3029 }, { "epoch": 1.1975796492961226, "grad_norm": 0.4599878221931696, "learning_rate": 4.967895746919339e-06, "loss": 0.5898, "step": 3030 }, { "epoch": 1.1979748085947148, "grad_norm": 0.44364183466084445, "learning_rate": 4.967870714445227e-06, "loss": 0.5518, "step": 3031 }, { "epoch": 1.198369967893307, "grad_norm": 0.45964094956857143, "learning_rate": 4.967845672278819e-06, "loss": 0.5554, "step": 3032 }, { "epoch": 1.1987651271918993, "grad_norm": 0.45133637183308356, "learning_rate": 4.967820620420211e-06, "loss": 0.5651, "step": 3033 }, { "epoch": 1.1991602864904916, "grad_norm": 0.4427547623086265, "learning_rate": 4.9677955588695025e-06, "loss": 0.5748, "step": 3034 }, { "epoch": 1.1995554457890838, "grad_norm": 0.6682331551539357, "learning_rate": 4.967770487626791e-06, "loss": 0.5567, "step": 3035 }, { "epoch": 1.199950605087676, "grad_norm": 0.47306363628338366, "learning_rate": 4.967745406692176e-06, "loss": 0.5675, "step": 3036 }, { "epoch": 1.2003457643862683, "grad_norm": 0.48270419923550345, "learning_rate": 4.967720316065756e-06, "loss": 0.5661, "step": 3037 }, { "epoch": 1.2007409236848605, "grad_norm": 0.4423873238428253, "learning_rate": 4.9676952157476285e-06, "loss": 0.5681, "step": 3038 }, { "epoch": 1.2011360829834528, "grad_norm": 0.4487466875408833, "learning_rate": 4.967670105737892e-06, "loss": 0.5489, "step": 3039 }, { "epoch": 1.201531242282045, "grad_norm": 0.5042106639999401, "learning_rate": 4.967644986036647e-06, "loss": 0.5611, "step": 3040 }, { "epoch": 1.2019264015806372, "grad_norm": 0.4759632066357272, "learning_rate": 4.96761985664399e-06, "loss": 0.5622, "step": 3041 }, { "epoch": 1.2023215608792295, "grad_norm": 0.47451217869994206, "learning_rate": 4.967594717560022e-06, "loss": 0.5804, "step": 3042 }, { "epoch": 1.2027167201778217, "grad_norm": 0.4593740341418387, "learning_rate": 4.967569568784839e-06, "loss": 0.5563, "step": 3043 }, { "epoch": 1.203111879476414, "grad_norm": 0.4790716838554254, "learning_rate": 4.967544410318541e-06, "loss": 0.5562, "step": 3044 }, { "epoch": 1.2035070387750062, "grad_norm": 0.44309498656578405, "learning_rate": 4.967519242161227e-06, "loss": 0.5516, "step": 3045 }, { "epoch": 1.2039021980735984, "grad_norm": 0.4895189026135667, "learning_rate": 4.967494064312996e-06, "loss": 0.5667, "step": 3046 }, { "epoch": 1.2042973573721907, "grad_norm": 0.4561602202621074, "learning_rate": 4.967468876773948e-06, "loss": 0.5629, "step": 3047 }, { "epoch": 1.204692516670783, "grad_norm": 0.4697256795259107, "learning_rate": 4.9674436795441795e-06, "loss": 0.5786, "step": 3048 }, { "epoch": 1.2050876759693752, "grad_norm": 0.5259340703235517, "learning_rate": 4.96741847262379e-06, "loss": 0.5583, "step": 3049 }, { "epoch": 1.2054828352679674, "grad_norm": 0.4894487391041935, "learning_rate": 4.967393256012879e-06, "loss": 0.5904, "step": 3050 }, { "epoch": 1.2058779945665596, "grad_norm": 0.46711138204567687, "learning_rate": 4.967368029711547e-06, "loss": 0.5702, "step": 3051 }, { "epoch": 1.2062731538651519, "grad_norm": 0.4888902860202162, "learning_rate": 4.96734279371989e-06, "loss": 0.5771, "step": 3052 }, { "epoch": 1.2066683131637441, "grad_norm": 0.5333072923347787, "learning_rate": 4.96731754803801e-06, "loss": 0.561, "step": 3053 }, { "epoch": 1.2070634724623364, "grad_norm": 0.46654604201036526, "learning_rate": 4.967292292666004e-06, "loss": 0.5799, "step": 3054 }, { "epoch": 1.2074586317609286, "grad_norm": 0.45887016609605846, "learning_rate": 4.967267027603972e-06, "loss": 0.5672, "step": 3055 }, { "epoch": 1.2078537910595208, "grad_norm": 0.477396686518973, "learning_rate": 4.967241752852015e-06, "loss": 0.5846, "step": 3056 }, { "epoch": 1.208248950358113, "grad_norm": 0.4613218129856247, "learning_rate": 4.967216468410229e-06, "loss": 0.5661, "step": 3057 }, { "epoch": 1.2086441096567053, "grad_norm": 0.5240029995870954, "learning_rate": 4.9671911742787145e-06, "loss": 0.5709, "step": 3058 }, { "epoch": 1.2090392689552976, "grad_norm": 0.4363976879978063, "learning_rate": 4.967165870457573e-06, "loss": 0.5458, "step": 3059 }, { "epoch": 1.2094344282538898, "grad_norm": 0.43183191736087323, "learning_rate": 4.9671405569469e-06, "loss": 0.5615, "step": 3060 }, { "epoch": 1.209829587552482, "grad_norm": 0.48194750159483546, "learning_rate": 4.967115233746798e-06, "loss": 0.5692, "step": 3061 }, { "epoch": 1.2102247468510743, "grad_norm": 0.4403230569615395, "learning_rate": 4.967089900857366e-06, "loss": 0.5541, "step": 3062 }, { "epoch": 1.2106199061496665, "grad_norm": 0.4521141643968015, "learning_rate": 4.9670645582787025e-06, "loss": 0.5683, "step": 3063 }, { "epoch": 1.2110150654482588, "grad_norm": 0.4436203962884348, "learning_rate": 4.967039206010908e-06, "loss": 0.5293, "step": 3064 }, { "epoch": 1.211410224746851, "grad_norm": 0.4412702021651393, "learning_rate": 4.967013844054081e-06, "loss": 0.5575, "step": 3065 }, { "epoch": 1.2118053840454432, "grad_norm": 0.44313069086594686, "learning_rate": 4.966988472408322e-06, "loss": 0.5701, "step": 3066 }, { "epoch": 1.2122005433440355, "grad_norm": 0.45845037915893444, "learning_rate": 4.96696309107373e-06, "loss": 0.5817, "step": 3067 }, { "epoch": 1.2125957026426277, "grad_norm": 0.4574367758060889, "learning_rate": 4.966937700050405e-06, "loss": 0.5705, "step": 3068 }, { "epoch": 1.21299086194122, "grad_norm": 0.4471000171164327, "learning_rate": 4.966912299338447e-06, "loss": 0.5413, "step": 3069 }, { "epoch": 1.2133860212398122, "grad_norm": 0.45685994308151356, "learning_rate": 4.966886888937955e-06, "loss": 0.5688, "step": 3070 }, { "epoch": 1.2137811805384044, "grad_norm": 0.4397052448052631, "learning_rate": 4.96686146884903e-06, "loss": 0.5555, "step": 3071 }, { "epoch": 1.214176339836997, "grad_norm": 0.4522674726127247, "learning_rate": 4.96683603907177e-06, "loss": 0.5723, "step": 3072 }, { "epoch": 1.2145714991355891, "grad_norm": 0.46257064891670174, "learning_rate": 4.966810599606277e-06, "loss": 0.5795, "step": 3073 }, { "epoch": 1.2149666584341814, "grad_norm": 0.4638428914792885, "learning_rate": 4.9667851504526495e-06, "loss": 0.5765, "step": 3074 }, { "epoch": 1.2153618177327736, "grad_norm": 0.4633324633840189, "learning_rate": 4.966759691610989e-06, "loss": 0.5901, "step": 3075 }, { "epoch": 1.2157569770313659, "grad_norm": 0.4694538016404579, "learning_rate": 4.966734223081392e-06, "loss": 0.5755, "step": 3076 }, { "epoch": 1.2161521363299581, "grad_norm": 0.44870540089281646, "learning_rate": 4.966708744863962e-06, "loss": 0.5664, "step": 3077 }, { "epoch": 1.2165472956285504, "grad_norm": 0.4633771864468751, "learning_rate": 4.966683256958799e-06, "loss": 0.5683, "step": 3078 }, { "epoch": 1.2169424549271426, "grad_norm": 0.4746476377257439, "learning_rate": 4.966657759366e-06, "loss": 0.5647, "step": 3079 }, { "epoch": 1.2173376142257348, "grad_norm": 0.45147878211938924, "learning_rate": 4.966632252085669e-06, "loss": 0.5752, "step": 3080 }, { "epoch": 1.217732773524327, "grad_norm": 0.44156118894699065, "learning_rate": 4.966606735117902e-06, "loss": 0.5563, "step": 3081 }, { "epoch": 1.2181279328229193, "grad_norm": 0.45196789781220703, "learning_rate": 4.966581208462804e-06, "loss": 0.5475, "step": 3082 }, { "epoch": 1.2185230921215116, "grad_norm": 0.4442256161242522, "learning_rate": 4.966555672120472e-06, "loss": 0.5695, "step": 3083 }, { "epoch": 1.2189182514201038, "grad_norm": 0.44730065552547726, "learning_rate": 4.966530126091007e-06, "loss": 0.5599, "step": 3084 }, { "epoch": 1.219313410718696, "grad_norm": 0.4544579855898216, "learning_rate": 4.966504570374509e-06, "loss": 0.5451, "step": 3085 }, { "epoch": 1.2197085700172883, "grad_norm": 0.45542641771326065, "learning_rate": 4.9664790049710795e-06, "loss": 0.5691, "step": 3086 }, { "epoch": 1.2201037293158805, "grad_norm": 0.46855478341533124, "learning_rate": 4.966453429880818e-06, "loss": 0.5757, "step": 3087 }, { "epoch": 1.2204988886144728, "grad_norm": 0.43240914194877017, "learning_rate": 4.966427845103825e-06, "loss": 0.5719, "step": 3088 }, { "epoch": 1.220894047913065, "grad_norm": 0.4182707219417738, "learning_rate": 4.966402250640201e-06, "loss": 0.5522, "step": 3089 }, { "epoch": 1.2212892072116572, "grad_norm": 0.44846470069043676, "learning_rate": 4.9663766464900465e-06, "loss": 0.5701, "step": 3090 }, { "epoch": 1.2216843665102495, "grad_norm": 0.48379385795172264, "learning_rate": 4.966351032653463e-06, "loss": 0.5632, "step": 3091 }, { "epoch": 1.2220795258088417, "grad_norm": 0.4608900432943671, "learning_rate": 4.966325409130549e-06, "loss": 0.5672, "step": 3092 }, { "epoch": 1.222474685107434, "grad_norm": 0.465833480879931, "learning_rate": 4.9662997759214074e-06, "loss": 0.5651, "step": 3093 }, { "epoch": 1.2228698444060262, "grad_norm": 0.45118044139514435, "learning_rate": 4.966274133026138e-06, "loss": 0.573, "step": 3094 }, { "epoch": 1.2232650037046184, "grad_norm": 0.4421531493404346, "learning_rate": 4.966248480444841e-06, "loss": 0.5657, "step": 3095 }, { "epoch": 1.2236601630032107, "grad_norm": 0.44492466262445596, "learning_rate": 4.966222818177617e-06, "loss": 0.5542, "step": 3096 }, { "epoch": 1.224055322301803, "grad_norm": 0.4389512642580722, "learning_rate": 4.966197146224568e-06, "loss": 0.5646, "step": 3097 }, { "epoch": 1.2244504816003952, "grad_norm": 0.44609105957073, "learning_rate": 4.966171464585794e-06, "loss": 0.5512, "step": 3098 }, { "epoch": 1.2248456408989874, "grad_norm": 0.45028716091193877, "learning_rate": 4.966145773261396e-06, "loss": 0.5528, "step": 3099 }, { "epoch": 1.2252408001975796, "grad_norm": 0.4430745962507206, "learning_rate": 4.966120072251475e-06, "loss": 0.5486, "step": 3100 }, { "epoch": 1.2256359594961719, "grad_norm": 0.4565725474643139, "learning_rate": 4.966094361556132e-06, "loss": 0.5637, "step": 3101 }, { "epoch": 1.2260311187947641, "grad_norm": 0.4603270298388148, "learning_rate": 4.966068641175469e-06, "loss": 0.5742, "step": 3102 }, { "epoch": 1.2264262780933564, "grad_norm": 0.4413064560746586, "learning_rate": 4.966042911109584e-06, "loss": 0.5513, "step": 3103 }, { "epoch": 1.2268214373919486, "grad_norm": 0.45702241651020653, "learning_rate": 4.9660171713585805e-06, "loss": 0.5678, "step": 3104 }, { "epoch": 1.2272165966905408, "grad_norm": 0.47067105579936214, "learning_rate": 4.965991421922559e-06, "loss": 0.5632, "step": 3105 }, { "epoch": 1.227611755989133, "grad_norm": 0.5372601558221561, "learning_rate": 4.965965662801621e-06, "loss": 0.5575, "step": 3106 }, { "epoch": 1.2280069152877253, "grad_norm": 0.48016720590726975, "learning_rate": 4.965939893995867e-06, "loss": 0.574, "step": 3107 }, { "epoch": 1.2284020745863176, "grad_norm": 0.43658120984806364, "learning_rate": 4.965914115505398e-06, "loss": 0.5547, "step": 3108 }, { "epoch": 1.2287972338849098, "grad_norm": 0.4464718194752851, "learning_rate": 4.965888327330316e-06, "loss": 0.5738, "step": 3109 }, { "epoch": 1.229192393183502, "grad_norm": 0.44348094873374344, "learning_rate": 4.9658625294707226e-06, "loss": 0.5558, "step": 3110 }, { "epoch": 1.2295875524820943, "grad_norm": 0.45414022219810424, "learning_rate": 4.965836721926718e-06, "loss": 0.5591, "step": 3111 }, { "epoch": 1.2299827117806865, "grad_norm": 0.45465803398220034, "learning_rate": 4.965810904698404e-06, "loss": 0.5784, "step": 3112 }, { "epoch": 1.2303778710792788, "grad_norm": 0.42507296664604205, "learning_rate": 4.965785077785882e-06, "loss": 0.5589, "step": 3113 }, { "epoch": 1.230773030377871, "grad_norm": 0.4451171220895154, "learning_rate": 4.965759241189254e-06, "loss": 0.5653, "step": 3114 }, { "epoch": 1.2311681896764632, "grad_norm": 0.44320611707236784, "learning_rate": 4.965733394908621e-06, "loss": 0.5764, "step": 3115 }, { "epoch": 1.2315633489750555, "grad_norm": 0.4467513021648378, "learning_rate": 4.965707538944085e-06, "loss": 0.5681, "step": 3116 }, { "epoch": 1.2319585082736477, "grad_norm": 0.4962560415068218, "learning_rate": 4.9656816732957454e-06, "loss": 0.5772, "step": 3117 }, { "epoch": 1.23235366757224, "grad_norm": 0.4393887859931017, "learning_rate": 4.965655797963707e-06, "loss": 0.5647, "step": 3118 }, { "epoch": 1.2327488268708322, "grad_norm": 0.464778893222507, "learning_rate": 4.965629912948069e-06, "loss": 0.5851, "step": 3119 }, { "epoch": 1.2331439861694244, "grad_norm": 0.4525476192977244, "learning_rate": 4.965604018248934e-06, "loss": 0.578, "step": 3120 }, { "epoch": 1.2335391454680167, "grad_norm": 0.461394081784411, "learning_rate": 4.965578113866404e-06, "loss": 0.561, "step": 3121 }, { "epoch": 1.233934304766609, "grad_norm": 0.43936664461165786, "learning_rate": 4.96555219980058e-06, "loss": 0.5476, "step": 3122 }, { "epoch": 1.2343294640652012, "grad_norm": 0.4509074107094845, "learning_rate": 4.965526276051564e-06, "loss": 0.567, "step": 3123 }, { "epoch": 1.2347246233637934, "grad_norm": 0.45459696038357283, "learning_rate": 4.965500342619458e-06, "loss": 0.5763, "step": 3124 }, { "epoch": 1.2351197826623859, "grad_norm": 0.44821748870137185, "learning_rate": 4.965474399504364e-06, "loss": 0.5668, "step": 3125 }, { "epoch": 1.235514941960978, "grad_norm": 0.46197320609039444, "learning_rate": 4.965448446706384e-06, "loss": 0.5682, "step": 3126 }, { "epoch": 1.2359101012595703, "grad_norm": 0.4396093700838116, "learning_rate": 4.96542248422562e-06, "loss": 0.5632, "step": 3127 }, { "epoch": 1.2363052605581626, "grad_norm": 0.4308392401888905, "learning_rate": 4.965396512062171e-06, "loss": 0.5433, "step": 3128 }, { "epoch": 1.2367004198567548, "grad_norm": 0.462787889383493, "learning_rate": 4.9653705302161446e-06, "loss": 0.5841, "step": 3129 }, { "epoch": 1.237095579155347, "grad_norm": 0.43840660693999445, "learning_rate": 4.965344538687638e-06, "loss": 0.5452, "step": 3130 }, { "epoch": 1.2374907384539393, "grad_norm": 0.4602319352251958, "learning_rate": 4.965318537476756e-06, "loss": 0.5811, "step": 3131 }, { "epoch": 1.2378858977525315, "grad_norm": 0.45813906029186197, "learning_rate": 4.9652925265836e-06, "loss": 0.5694, "step": 3132 }, { "epoch": 1.2382810570511238, "grad_norm": 0.4492594870763079, "learning_rate": 4.965266506008271e-06, "loss": 0.5884, "step": 3133 }, { "epoch": 1.238676216349716, "grad_norm": 0.4627556622877327, "learning_rate": 4.9652404757508726e-06, "loss": 0.571, "step": 3134 }, { "epoch": 1.2390713756483083, "grad_norm": 0.4629993337469515, "learning_rate": 4.965214435811506e-06, "loss": 0.5669, "step": 3135 }, { "epoch": 1.2394665349469005, "grad_norm": 0.46398715244786704, "learning_rate": 4.965188386190275e-06, "loss": 0.5735, "step": 3136 }, { "epoch": 1.2398616942454928, "grad_norm": 0.44440876198440876, "learning_rate": 4.965162326887281e-06, "loss": 0.5669, "step": 3137 }, { "epoch": 1.240256853544085, "grad_norm": 0.49597914064913107, "learning_rate": 4.965136257902626e-06, "loss": 0.5807, "step": 3138 }, { "epoch": 1.2406520128426772, "grad_norm": 0.44555439287267584, "learning_rate": 4.965110179236412e-06, "loss": 0.5439, "step": 3139 }, { "epoch": 1.2410471721412695, "grad_norm": 0.45626121734971314, "learning_rate": 4.965084090888743e-06, "loss": 0.5689, "step": 3140 }, { "epoch": 1.2414423314398617, "grad_norm": 0.4363299527581059, "learning_rate": 4.96505799285972e-06, "loss": 0.5476, "step": 3141 }, { "epoch": 1.241837490738454, "grad_norm": 0.4671748329287142, "learning_rate": 4.9650318851494465e-06, "loss": 0.5817, "step": 3142 }, { "epoch": 1.2422326500370462, "grad_norm": 0.472754493128498, "learning_rate": 4.965005767758024e-06, "loss": 0.5701, "step": 3143 }, { "epoch": 1.2426278093356384, "grad_norm": 0.4760983983168848, "learning_rate": 4.964979640685557e-06, "loss": 0.5804, "step": 3144 }, { "epoch": 1.2430229686342307, "grad_norm": 0.4717372783720608, "learning_rate": 4.964953503932146e-06, "loss": 0.5867, "step": 3145 }, { "epoch": 1.243418127932823, "grad_norm": 0.45391804628270654, "learning_rate": 4.964927357497894e-06, "loss": 0.5214, "step": 3146 }, { "epoch": 1.2438132872314152, "grad_norm": 0.4407103558661319, "learning_rate": 4.964901201382905e-06, "loss": 0.572, "step": 3147 }, { "epoch": 1.2442084465300074, "grad_norm": 0.45045690316166226, "learning_rate": 4.96487503558728e-06, "loss": 0.5663, "step": 3148 }, { "epoch": 1.2446036058285996, "grad_norm": 0.4434691882497208, "learning_rate": 4.964848860111122e-06, "loss": 0.5473, "step": 3149 }, { "epoch": 1.2449987651271919, "grad_norm": 0.4460639321688961, "learning_rate": 4.964822674954536e-06, "loss": 0.5719, "step": 3150 }, { "epoch": 1.2453939244257841, "grad_norm": 0.4709745072765176, "learning_rate": 4.964796480117623e-06, "loss": 0.5756, "step": 3151 }, { "epoch": 1.2457890837243764, "grad_norm": 0.47049229358105993, "learning_rate": 4.9647702756004855e-06, "loss": 0.5714, "step": 3152 }, { "epoch": 1.2461842430229686, "grad_norm": 0.4649418473083078, "learning_rate": 4.964744061403227e-06, "loss": 0.5749, "step": 3153 }, { "epoch": 1.2465794023215608, "grad_norm": 0.47164232942914297, "learning_rate": 4.964717837525951e-06, "loss": 0.5735, "step": 3154 }, { "epoch": 1.246974561620153, "grad_norm": 0.4529213434049233, "learning_rate": 4.9646916039687594e-06, "loss": 0.5836, "step": 3155 }, { "epoch": 1.2473697209187453, "grad_norm": 0.45289976023026995, "learning_rate": 4.964665360731757e-06, "loss": 0.5695, "step": 3156 }, { "epoch": 1.2477648802173376, "grad_norm": 0.4780170631528699, "learning_rate": 4.964639107815044e-06, "loss": 0.5602, "step": 3157 }, { "epoch": 1.2481600395159298, "grad_norm": 0.44393148384313114, "learning_rate": 4.964612845218726e-06, "loss": 0.5618, "step": 3158 }, { "epoch": 1.248555198814522, "grad_norm": 0.4486878703274451, "learning_rate": 4.964586572942905e-06, "loss": 0.5715, "step": 3159 }, { "epoch": 1.2489503581131143, "grad_norm": 0.45262089196133376, "learning_rate": 4.964560290987686e-06, "loss": 0.5538, "step": 3160 }, { "epoch": 1.2493455174117065, "grad_norm": 0.46646019654372667, "learning_rate": 4.964533999353169e-06, "loss": 0.5561, "step": 3161 }, { "epoch": 1.2497406767102988, "grad_norm": 0.46395577187095943, "learning_rate": 4.96450769803946e-06, "loss": 0.5703, "step": 3162 }, { "epoch": 1.2501358360088912, "grad_norm": 0.43225782469002116, "learning_rate": 4.9644813870466605e-06, "loss": 0.5475, "step": 3163 }, { "epoch": 1.2505309953074835, "grad_norm": 0.4477412714097664, "learning_rate": 4.9644550663748755e-06, "loss": 0.588, "step": 3164 }, { "epoch": 1.2509261546060757, "grad_norm": 0.4488220491257586, "learning_rate": 4.964428736024207e-06, "loss": 0.5456, "step": 3165 }, { "epoch": 1.251321313904668, "grad_norm": 0.4591216638115739, "learning_rate": 4.964402395994759e-06, "loss": 0.5962, "step": 3166 }, { "epoch": 1.2517164732032602, "grad_norm": 0.4496651980462876, "learning_rate": 4.964376046286635e-06, "loss": 0.5773, "step": 3167 }, { "epoch": 1.2521116325018524, "grad_norm": 0.43068834285244056, "learning_rate": 4.964349686899938e-06, "loss": 0.5649, "step": 3168 }, { "epoch": 1.2525067918004447, "grad_norm": 0.43656858666444814, "learning_rate": 4.964323317834772e-06, "loss": 0.5606, "step": 3169 }, { "epoch": 1.252901951099037, "grad_norm": 0.4465521206805361, "learning_rate": 4.96429693909124e-06, "loss": 0.5581, "step": 3170 }, { "epoch": 1.2532971103976291, "grad_norm": 0.4356583772476069, "learning_rate": 4.964270550669447e-06, "loss": 0.554, "step": 3171 }, { "epoch": 1.2536922696962214, "grad_norm": 0.4538626705056506, "learning_rate": 4.964244152569495e-06, "loss": 0.5724, "step": 3172 }, { "epoch": 1.2540874289948136, "grad_norm": 0.44499801851560644, "learning_rate": 4.964217744791489e-06, "loss": 0.5675, "step": 3173 }, { "epoch": 1.2544825882934059, "grad_norm": 0.44634942356882007, "learning_rate": 4.964191327335531e-06, "loss": 0.5507, "step": 3174 }, { "epoch": 1.254877747591998, "grad_norm": 0.47867361712053463, "learning_rate": 4.964164900201726e-06, "loss": 0.5587, "step": 3175 }, { "epoch": 1.2552729068905903, "grad_norm": 0.4555734443969186, "learning_rate": 4.964138463390178e-06, "loss": 0.5646, "step": 3176 }, { "epoch": 1.2556680661891826, "grad_norm": 0.4584704800532935, "learning_rate": 4.964112016900991e-06, "loss": 0.5517, "step": 3177 }, { "epoch": 1.2560632254877748, "grad_norm": 0.45549434221447166, "learning_rate": 4.964085560734267e-06, "loss": 0.5771, "step": 3178 }, { "epoch": 1.256458384786367, "grad_norm": 0.44914572871190456, "learning_rate": 4.964059094890112e-06, "loss": 0.5617, "step": 3179 }, { "epoch": 1.2568535440849593, "grad_norm": 0.4512882813462357, "learning_rate": 4.964032619368629e-06, "loss": 0.5723, "step": 3180 }, { "epoch": 1.2572487033835515, "grad_norm": 0.4509607204943829, "learning_rate": 4.964006134169922e-06, "loss": 0.5643, "step": 3181 }, { "epoch": 1.2576438626821438, "grad_norm": 0.45921297434875, "learning_rate": 4.9639796392940955e-06, "loss": 0.5844, "step": 3182 }, { "epoch": 1.258039021980736, "grad_norm": 0.48942904209316174, "learning_rate": 4.963953134741253e-06, "loss": 0.5802, "step": 3183 }, { "epoch": 1.2584341812793283, "grad_norm": 0.45854271793023527, "learning_rate": 4.963926620511497e-06, "loss": 0.5815, "step": 3184 }, { "epoch": 1.2588293405779205, "grad_norm": 0.4560649261852496, "learning_rate": 4.963900096604936e-06, "loss": 0.5534, "step": 3185 }, { "epoch": 1.2592244998765127, "grad_norm": 0.4518649540196938, "learning_rate": 4.9638735630216704e-06, "loss": 0.5754, "step": 3186 }, { "epoch": 1.259619659175105, "grad_norm": 0.6754753158823785, "learning_rate": 4.963847019761806e-06, "loss": 0.5704, "step": 3187 }, { "epoch": 1.2600148184736972, "grad_norm": 0.452944137497009, "learning_rate": 4.9638204668254465e-06, "loss": 0.5644, "step": 3188 }, { "epoch": 1.2604099777722895, "grad_norm": 0.44138818835118054, "learning_rate": 4.9637939042126965e-06, "loss": 0.5624, "step": 3189 }, { "epoch": 1.2608051370708817, "grad_norm": 0.467795887447353, "learning_rate": 4.96376733192366e-06, "loss": 0.5909, "step": 3190 }, { "epoch": 1.261200296369474, "grad_norm": 0.4567278109168832, "learning_rate": 4.963740749958441e-06, "loss": 0.5645, "step": 3191 }, { "epoch": 1.2615954556680662, "grad_norm": 0.603665506544785, "learning_rate": 4.9637141583171456e-06, "loss": 0.5607, "step": 3192 }, { "epoch": 1.2619906149666584, "grad_norm": 0.44076800996835624, "learning_rate": 4.9636875569998756e-06, "loss": 0.5479, "step": 3193 }, { "epoch": 1.2623857742652507, "grad_norm": 0.4597963136042489, "learning_rate": 4.963660946006737e-06, "loss": 0.5677, "step": 3194 }, { "epoch": 1.262780933563843, "grad_norm": 0.45342024851568213, "learning_rate": 4.963634325337836e-06, "loss": 0.5452, "step": 3195 }, { "epoch": 1.2631760928624352, "grad_norm": 0.44313171394307665, "learning_rate": 4.9636076949932736e-06, "loss": 0.5699, "step": 3196 }, { "epoch": 1.2635712521610274, "grad_norm": 0.45404430511750704, "learning_rate": 4.9635810549731565e-06, "loss": 0.579, "step": 3197 }, { "epoch": 1.2639664114596196, "grad_norm": 0.4739171320560697, "learning_rate": 4.9635544052775895e-06, "loss": 0.5475, "step": 3198 }, { "epoch": 1.2643615707582119, "grad_norm": 0.4528216117045259, "learning_rate": 4.963527745906677e-06, "loss": 0.5431, "step": 3199 }, { "epoch": 1.2647567300568041, "grad_norm": 0.44237602152913297, "learning_rate": 4.963501076860522e-06, "loss": 0.5514, "step": 3200 }, { "epoch": 1.2651518893553964, "grad_norm": 0.4472333931200183, "learning_rate": 4.9634743981392316e-06, "loss": 0.5597, "step": 3201 }, { "epoch": 1.2655470486539886, "grad_norm": 0.4951585973064483, "learning_rate": 4.9634477097429105e-06, "loss": 0.5993, "step": 3202 }, { "epoch": 1.2659422079525808, "grad_norm": 0.4588298365010358, "learning_rate": 4.9634210116716606e-06, "loss": 0.5612, "step": 3203 }, { "epoch": 1.266337367251173, "grad_norm": 0.43949401153597856, "learning_rate": 4.96339430392559e-06, "loss": 0.5758, "step": 3204 }, { "epoch": 1.2667325265497653, "grad_norm": 0.4429353608767374, "learning_rate": 4.963367586504803e-06, "loss": 0.5581, "step": 3205 }, { "epoch": 1.2671276858483576, "grad_norm": 0.44990618359944873, "learning_rate": 4.963340859409404e-06, "loss": 0.554, "step": 3206 }, { "epoch": 1.2675228451469498, "grad_norm": 0.46874642934363553, "learning_rate": 4.963314122639497e-06, "loss": 0.5549, "step": 3207 }, { "epoch": 1.267918004445542, "grad_norm": 0.4366561437308232, "learning_rate": 4.963287376195188e-06, "loss": 0.568, "step": 3208 }, { "epoch": 1.2683131637441343, "grad_norm": 0.43922781969850494, "learning_rate": 4.963260620076582e-06, "loss": 0.5612, "step": 3209 }, { "epoch": 1.2687083230427265, "grad_norm": 0.46732316449861777, "learning_rate": 4.963233854283785e-06, "loss": 0.5675, "step": 3210 }, { "epoch": 1.2691034823413188, "grad_norm": 0.4499324082172506, "learning_rate": 4.9632070788169e-06, "loss": 0.572, "step": 3211 }, { "epoch": 1.269498641639911, "grad_norm": 0.4448079688254288, "learning_rate": 4.9631802936760345e-06, "loss": 0.5777, "step": 3212 }, { "epoch": 1.2698938009385032, "grad_norm": 0.4681934528119531, "learning_rate": 4.963153498861292e-06, "loss": 0.5642, "step": 3213 }, { "epoch": 1.2702889602370955, "grad_norm": 0.47693235289926394, "learning_rate": 4.963126694372777e-06, "loss": 0.5895, "step": 3214 }, { "epoch": 1.2706841195356877, "grad_norm": 0.448800942766577, "learning_rate": 4.963099880210598e-06, "loss": 0.5754, "step": 3215 }, { "epoch": 1.27107927883428, "grad_norm": 0.44749924388535084, "learning_rate": 4.9630730563748575e-06, "loss": 0.5618, "step": 3216 }, { "epoch": 1.2714744381328722, "grad_norm": 0.45578491730563103, "learning_rate": 4.963046222865662e-06, "loss": 0.5689, "step": 3217 }, { "epoch": 1.2718695974314644, "grad_norm": 0.5195796262214436, "learning_rate": 4.963019379683116e-06, "loss": 0.5939, "step": 3218 }, { "epoch": 1.2722647567300567, "grad_norm": 0.47649433003286457, "learning_rate": 4.962992526827326e-06, "loss": 0.5771, "step": 3219 }, { "epoch": 1.272659916028649, "grad_norm": 0.4410004496915249, "learning_rate": 4.962965664298396e-06, "loss": 0.5511, "step": 3220 }, { "epoch": 1.2730550753272412, "grad_norm": 0.4636086332760471, "learning_rate": 4.9629387920964335e-06, "loss": 0.5684, "step": 3221 }, { "epoch": 1.2734502346258336, "grad_norm": 0.46246815925142704, "learning_rate": 4.962911910221543e-06, "loss": 0.559, "step": 3222 }, { "epoch": 1.2738453939244259, "grad_norm": 0.46339823849118905, "learning_rate": 4.96288501867383e-06, "loss": 0.5571, "step": 3223 }, { "epoch": 1.274240553223018, "grad_norm": 0.4677333897938672, "learning_rate": 4.9628581174534e-06, "loss": 0.5793, "step": 3224 }, { "epoch": 1.2746357125216103, "grad_norm": 0.4710260149695448, "learning_rate": 4.962831206560358e-06, "loss": 0.5927, "step": 3225 }, { "epoch": 1.2750308718202026, "grad_norm": 0.45906156067530524, "learning_rate": 4.962804285994811e-06, "loss": 0.5508, "step": 3226 }, { "epoch": 1.2754260311187948, "grad_norm": 0.4389981450527275, "learning_rate": 4.962777355756865e-06, "loss": 0.5544, "step": 3227 }, { "epoch": 1.275821190417387, "grad_norm": 0.4566293309775638, "learning_rate": 4.962750415846624e-06, "loss": 0.5698, "step": 3228 }, { "epoch": 1.2762163497159793, "grad_norm": 0.4565239233936326, "learning_rate": 4.9627234662641965e-06, "loss": 0.5843, "step": 3229 }, { "epoch": 1.2766115090145715, "grad_norm": 0.4402634375950171, "learning_rate": 4.962696507009686e-06, "loss": 0.5716, "step": 3230 }, { "epoch": 1.2770066683131638, "grad_norm": 0.43986945562125607, "learning_rate": 4.962669538083198e-06, "loss": 0.5645, "step": 3231 }, { "epoch": 1.277401827611756, "grad_norm": 0.44151108436527936, "learning_rate": 4.9626425594848404e-06, "loss": 0.5351, "step": 3232 }, { "epoch": 1.2777969869103483, "grad_norm": 0.46054451936494795, "learning_rate": 4.962615571214718e-06, "loss": 0.575, "step": 3233 }, { "epoch": 1.2781921462089405, "grad_norm": 0.4832601682517206, "learning_rate": 4.9625885732729365e-06, "loss": 0.5724, "step": 3234 }, { "epoch": 1.2785873055075327, "grad_norm": 0.4496650178160313, "learning_rate": 4.962561565659603e-06, "loss": 0.571, "step": 3235 }, { "epoch": 1.278982464806125, "grad_norm": 0.4459537839833863, "learning_rate": 4.962534548374823e-06, "loss": 0.5496, "step": 3236 }, { "epoch": 1.2793776241047172, "grad_norm": 0.4820361417897107, "learning_rate": 4.962507521418703e-06, "loss": 0.5488, "step": 3237 }, { "epoch": 1.2797727834033095, "grad_norm": 0.4471059519027043, "learning_rate": 4.962480484791348e-06, "loss": 0.5557, "step": 3238 }, { "epoch": 1.2801679427019017, "grad_norm": 0.44540448662756593, "learning_rate": 4.962453438492865e-06, "loss": 0.5403, "step": 3239 }, { "epoch": 1.280563102000494, "grad_norm": 0.4706472390621206, "learning_rate": 4.962426382523361e-06, "loss": 0.575, "step": 3240 }, { "epoch": 1.2809582612990862, "grad_norm": 0.531102576848962, "learning_rate": 4.962399316882941e-06, "loss": 0.5533, "step": 3241 }, { "epoch": 1.2813534205976784, "grad_norm": 0.461239203184141, "learning_rate": 4.962372241571711e-06, "loss": 0.5704, "step": 3242 }, { "epoch": 1.2817485798962707, "grad_norm": 0.43407314953672027, "learning_rate": 4.962345156589779e-06, "loss": 0.5503, "step": 3243 }, { "epoch": 1.282143739194863, "grad_norm": 0.4493863750894706, "learning_rate": 4.9623180619372505e-06, "loss": 0.5661, "step": 3244 }, { "epoch": 1.2825388984934551, "grad_norm": 0.4832832516667832, "learning_rate": 4.962290957614231e-06, "loss": 0.572, "step": 3245 }, { "epoch": 1.2829340577920474, "grad_norm": 0.4503676002104791, "learning_rate": 4.962263843620828e-06, "loss": 0.5575, "step": 3246 }, { "epoch": 1.2833292170906396, "grad_norm": 0.44527990189860395, "learning_rate": 4.9622367199571485e-06, "loss": 0.5598, "step": 3247 }, { "epoch": 1.2837243763892319, "grad_norm": 0.4360326535872034, "learning_rate": 4.962209586623298e-06, "loss": 0.5625, "step": 3248 }, { "epoch": 1.284119535687824, "grad_norm": 0.46369867577006174, "learning_rate": 4.962182443619383e-06, "loss": 0.5547, "step": 3249 }, { "epoch": 1.2845146949864163, "grad_norm": 0.47697119598465904, "learning_rate": 4.962155290945511e-06, "loss": 0.5644, "step": 3250 }, { "epoch": 1.2849098542850086, "grad_norm": 0.4572682032044699, "learning_rate": 4.962128128601787e-06, "loss": 0.6004, "step": 3251 }, { "epoch": 1.2853050135836008, "grad_norm": 0.46293963615887146, "learning_rate": 4.96210095658832e-06, "loss": 0.5735, "step": 3252 }, { "epoch": 1.285700172882193, "grad_norm": 0.4353007660494593, "learning_rate": 4.962073774905216e-06, "loss": 0.5515, "step": 3253 }, { "epoch": 1.2860953321807853, "grad_norm": 0.4336239776816601, "learning_rate": 4.96204658355258e-06, "loss": 0.5479, "step": 3254 }, { "epoch": 1.2864904914793776, "grad_norm": 0.4385631160161934, "learning_rate": 4.962019382530521e-06, "loss": 0.5581, "step": 3255 }, { "epoch": 1.2868856507779698, "grad_norm": 0.47634454262632553, "learning_rate": 4.961992171839144e-06, "loss": 0.5794, "step": 3256 }, { "epoch": 1.287280810076562, "grad_norm": 0.4672479339967555, "learning_rate": 4.961964951478557e-06, "loss": 0.5606, "step": 3257 }, { "epoch": 1.2876759693751545, "grad_norm": 0.46957567960558677, "learning_rate": 4.961937721448867e-06, "loss": 0.5643, "step": 3258 }, { "epoch": 1.2880711286737467, "grad_norm": 0.4773737016736467, "learning_rate": 4.961910481750181e-06, "loss": 0.5854, "step": 3259 }, { "epoch": 1.288466287972339, "grad_norm": 0.4587288767028595, "learning_rate": 4.961883232382604e-06, "loss": 0.5533, "step": 3260 }, { "epoch": 1.2888614472709312, "grad_norm": 0.4752043572517741, "learning_rate": 4.961855973346246e-06, "loss": 0.5837, "step": 3261 }, { "epoch": 1.2892566065695235, "grad_norm": 0.4587769372346428, "learning_rate": 4.961828704641212e-06, "loss": 0.5547, "step": 3262 }, { "epoch": 1.2896517658681157, "grad_norm": 0.4660024170483272, "learning_rate": 4.96180142626761e-06, "loss": 0.5511, "step": 3263 }, { "epoch": 1.290046925166708, "grad_norm": 0.483664785494738, "learning_rate": 4.961774138225547e-06, "loss": 0.5766, "step": 3264 }, { "epoch": 1.2904420844653002, "grad_norm": 0.4518446012693695, "learning_rate": 4.96174684051513e-06, "loss": 0.5557, "step": 3265 }, { "epoch": 1.2908372437638924, "grad_norm": 0.44730158472491605, "learning_rate": 4.961719533136466e-06, "loss": 0.5657, "step": 3266 }, { "epoch": 1.2912324030624847, "grad_norm": 0.45919328333593834, "learning_rate": 4.961692216089663e-06, "loss": 0.5659, "step": 3267 }, { "epoch": 1.291627562361077, "grad_norm": 0.4670554649733668, "learning_rate": 4.961664889374827e-06, "loss": 0.5705, "step": 3268 }, { "epoch": 1.2920227216596691, "grad_norm": 0.4470155299015426, "learning_rate": 4.961637552992067e-06, "loss": 0.5422, "step": 3269 }, { "epoch": 1.2924178809582614, "grad_norm": 0.44944938927627665, "learning_rate": 4.961610206941488e-06, "loss": 0.5603, "step": 3270 }, { "epoch": 1.2928130402568536, "grad_norm": 0.45882995864498305, "learning_rate": 4.961582851223201e-06, "loss": 0.5623, "step": 3271 }, { "epoch": 1.2932081995554459, "grad_norm": 0.44864834182329033, "learning_rate": 4.96155548583731e-06, "loss": 0.5536, "step": 3272 }, { "epoch": 1.293603358854038, "grad_norm": 0.4365082513173151, "learning_rate": 4.961528110783924e-06, "loss": 0.5492, "step": 3273 }, { "epoch": 1.2939985181526303, "grad_norm": 0.4637763227170593, "learning_rate": 4.961500726063151e-06, "loss": 0.5704, "step": 3274 }, { "epoch": 1.2943936774512226, "grad_norm": 0.4389304914923759, "learning_rate": 4.961473331675096e-06, "loss": 0.561, "step": 3275 }, { "epoch": 1.2947888367498148, "grad_norm": 0.44926731822340105, "learning_rate": 4.9614459276198705e-06, "loss": 0.582, "step": 3276 }, { "epoch": 1.295183996048407, "grad_norm": 0.4486660860991371, "learning_rate": 4.961418513897579e-06, "loss": 0.5635, "step": 3277 }, { "epoch": 1.2955791553469993, "grad_norm": 0.4527867437707233, "learning_rate": 4.96139109050833e-06, "loss": 0.5733, "step": 3278 }, { "epoch": 1.2959743146455915, "grad_norm": 0.45347608716724236, "learning_rate": 4.961363657452232e-06, "loss": 0.5518, "step": 3279 }, { "epoch": 1.2963694739441838, "grad_norm": 0.4508697870043093, "learning_rate": 4.961336214729392e-06, "loss": 0.5548, "step": 3280 }, { "epoch": 1.296764633242776, "grad_norm": 0.4637754516249654, "learning_rate": 4.961308762339918e-06, "loss": 0.5638, "step": 3281 }, { "epoch": 1.2971597925413683, "grad_norm": 0.4574438583788397, "learning_rate": 4.961281300283918e-06, "loss": 0.5516, "step": 3282 }, { "epoch": 1.2975549518399605, "grad_norm": 0.4476663326378099, "learning_rate": 4.961253828561499e-06, "loss": 0.5494, "step": 3283 }, { "epoch": 1.2979501111385527, "grad_norm": 0.45758420853662424, "learning_rate": 4.96122634717277e-06, "loss": 0.5669, "step": 3284 }, { "epoch": 1.298345270437145, "grad_norm": 0.4730047069561211, "learning_rate": 4.9611988561178385e-06, "loss": 0.5748, "step": 3285 }, { "epoch": 1.2987404297357372, "grad_norm": 0.44003229484144685, "learning_rate": 4.9611713553968125e-06, "loss": 0.5465, "step": 3286 }, { "epoch": 1.2991355890343295, "grad_norm": 0.4411529968194715, "learning_rate": 4.9611438450098e-06, "loss": 0.5559, "step": 3287 }, { "epoch": 1.2995307483329217, "grad_norm": 0.4778237900402621, "learning_rate": 4.9611163249569085e-06, "loss": 0.5639, "step": 3288 }, { "epoch": 1.299925907631514, "grad_norm": 0.44974909893119686, "learning_rate": 4.961088795238247e-06, "loss": 0.5758, "step": 3289 }, { "epoch": 1.3003210669301062, "grad_norm": 0.4378290859300243, "learning_rate": 4.9610612558539214e-06, "loss": 0.5689, "step": 3290 }, { "epoch": 1.3007162262286984, "grad_norm": 0.4429720821425789, "learning_rate": 4.961033706804044e-06, "loss": 0.5606, "step": 3291 }, { "epoch": 1.3011113855272907, "grad_norm": 0.4633926009741077, "learning_rate": 4.961006148088719e-06, "loss": 0.5631, "step": 3292 }, { "epoch": 1.301506544825883, "grad_norm": 0.46706115854462443, "learning_rate": 4.960978579708058e-06, "loss": 0.5813, "step": 3293 }, { "epoch": 1.3019017041244751, "grad_norm": 0.47368413285383676, "learning_rate": 4.9609510016621655e-06, "loss": 0.5883, "step": 3294 }, { "epoch": 1.3022968634230674, "grad_norm": 0.4737507951875103, "learning_rate": 4.960923413951153e-06, "loss": 0.5748, "step": 3295 }, { "epoch": 1.3026920227216596, "grad_norm": 0.4570347719095943, "learning_rate": 4.960895816575127e-06, "loss": 0.5692, "step": 3296 }, { "epoch": 1.3030871820202519, "grad_norm": 0.4577457839859938, "learning_rate": 4.960868209534198e-06, "loss": 0.5562, "step": 3297 }, { "epoch": 1.303482341318844, "grad_norm": 0.47003885252402855, "learning_rate": 4.960840592828472e-06, "loss": 0.5734, "step": 3298 }, { "epoch": 1.3038775006174363, "grad_norm": 0.4624743876597724, "learning_rate": 4.960812966458058e-06, "loss": 0.5603, "step": 3299 }, { "epoch": 1.3042726599160286, "grad_norm": 0.48901567850108524, "learning_rate": 4.960785330423066e-06, "loss": 0.5893, "step": 3300 }, { "epoch": 1.3046678192146208, "grad_norm": 0.4522014166198177, "learning_rate": 4.960757684723603e-06, "loss": 0.5538, "step": 3301 }, { "epoch": 1.305062978513213, "grad_norm": 0.45770794284942456, "learning_rate": 4.9607300293597774e-06, "loss": 0.5656, "step": 3302 }, { "epoch": 1.3054581378118053, "grad_norm": 0.4322173217917462, "learning_rate": 4.960702364331699e-06, "loss": 0.5497, "step": 3303 }, { "epoch": 1.3058532971103975, "grad_norm": 0.4381343619517814, "learning_rate": 4.960674689639477e-06, "loss": 0.5591, "step": 3304 }, { "epoch": 1.3062484564089898, "grad_norm": 0.44151218550571697, "learning_rate": 4.960647005283217e-06, "loss": 0.5694, "step": 3305 }, { "epoch": 1.306643615707582, "grad_norm": 0.43450554135557107, "learning_rate": 4.960619311263031e-06, "loss": 0.5803, "step": 3306 }, { "epoch": 1.3070387750061743, "grad_norm": 0.42935555212955734, "learning_rate": 4.960591607579026e-06, "loss": 0.5367, "step": 3307 }, { "epoch": 1.3074339343047665, "grad_norm": 0.44216935598010304, "learning_rate": 4.960563894231312e-06, "loss": 0.5526, "step": 3308 }, { "epoch": 1.3078290936033587, "grad_norm": 0.4436461836567033, "learning_rate": 4.960536171219997e-06, "loss": 0.5658, "step": 3309 }, { "epoch": 1.308224252901951, "grad_norm": 0.46145987300596963, "learning_rate": 4.96050843854519e-06, "loss": 0.5712, "step": 3310 }, { "epoch": 1.3086194122005432, "grad_norm": 0.4540758807723103, "learning_rate": 4.960480696206999e-06, "loss": 0.5551, "step": 3311 }, { "epoch": 1.3090145714991355, "grad_norm": 0.4371619617022349, "learning_rate": 4.960452944205535e-06, "loss": 0.5526, "step": 3312 }, { "epoch": 1.3094097307977277, "grad_norm": 0.44906152086035417, "learning_rate": 4.960425182540905e-06, "loss": 0.556, "step": 3313 }, { "epoch": 1.30980489009632, "grad_norm": 0.6283881072933288, "learning_rate": 4.96039741121322e-06, "loss": 0.6207, "step": 3314 }, { "epoch": 1.3102000493949122, "grad_norm": 0.43354415802766594, "learning_rate": 4.960369630222588e-06, "loss": 0.5782, "step": 3315 }, { "epoch": 1.3105952086935044, "grad_norm": 0.4573902248334659, "learning_rate": 4.960341839569117e-06, "loss": 0.5657, "step": 3316 }, { "epoch": 1.310990367992097, "grad_norm": 0.4618875741095821, "learning_rate": 4.9603140392529185e-06, "loss": 0.5504, "step": 3317 }, { "epoch": 1.3113855272906891, "grad_norm": 0.4335794680087329, "learning_rate": 4.9602862292740995e-06, "loss": 0.547, "step": 3318 }, { "epoch": 1.3117806865892814, "grad_norm": 0.46798359661776007, "learning_rate": 4.960258409632771e-06, "loss": 0.5565, "step": 3319 }, { "epoch": 1.3121758458878736, "grad_norm": 0.44285226370838654, "learning_rate": 4.960230580329041e-06, "loss": 0.5641, "step": 3320 }, { "epoch": 1.3125710051864659, "grad_norm": 0.4656857860504189, "learning_rate": 4.960202741363018e-06, "loss": 0.5633, "step": 3321 }, { "epoch": 1.312966164485058, "grad_norm": 0.4524731635101246, "learning_rate": 4.960174892734813e-06, "loss": 0.5607, "step": 3322 }, { "epoch": 1.3133613237836503, "grad_norm": 0.456230055946175, "learning_rate": 4.960147034444537e-06, "loss": 0.5584, "step": 3323 }, { "epoch": 1.3137564830822426, "grad_norm": 0.4772423713788212, "learning_rate": 4.960119166492295e-06, "loss": 0.57, "step": 3324 }, { "epoch": 1.3141516423808348, "grad_norm": 0.4586733788184475, "learning_rate": 4.9600912888782e-06, "loss": 0.5758, "step": 3325 }, { "epoch": 1.314546801679427, "grad_norm": 0.5480041169557188, "learning_rate": 4.9600634016023606e-06, "loss": 0.5772, "step": 3326 }, { "epoch": 1.3149419609780193, "grad_norm": 0.4415783584140013, "learning_rate": 4.960035504664885e-06, "loss": 0.5404, "step": 3327 }, { "epoch": 1.3153371202766115, "grad_norm": 0.4291556202863314, "learning_rate": 4.960007598065884e-06, "loss": 0.5612, "step": 3328 }, { "epoch": 1.3157322795752038, "grad_norm": 0.44774827375999054, "learning_rate": 4.959979681805467e-06, "loss": 0.5776, "step": 3329 }, { "epoch": 1.316127438873796, "grad_norm": 0.44973594796897337, "learning_rate": 4.959951755883744e-06, "loss": 0.5768, "step": 3330 }, { "epoch": 1.3165225981723883, "grad_norm": 0.44813772825578874, "learning_rate": 4.959923820300824e-06, "loss": 0.5623, "step": 3331 }, { "epoch": 1.3169177574709805, "grad_norm": 0.44654411240174396, "learning_rate": 4.959895875056816e-06, "loss": 0.5655, "step": 3332 }, { "epoch": 1.3173129167695727, "grad_norm": 0.4373133409477842, "learning_rate": 4.959867920151832e-06, "loss": 0.5592, "step": 3333 }, { "epoch": 1.317708076068165, "grad_norm": 0.4363324235460165, "learning_rate": 4.95983995558598e-06, "loss": 0.5492, "step": 3334 }, { "epoch": 1.3181032353667572, "grad_norm": 0.4346900282961237, "learning_rate": 4.95981198135937e-06, "loss": 0.5444, "step": 3335 }, { "epoch": 1.3184983946653495, "grad_norm": 0.4523110093393848, "learning_rate": 4.959783997472113e-06, "loss": 0.5536, "step": 3336 }, { "epoch": 1.3188935539639417, "grad_norm": 0.46544382830839454, "learning_rate": 4.959756003924317e-06, "loss": 0.5702, "step": 3337 }, { "epoch": 1.319288713262534, "grad_norm": 0.44772145946647923, "learning_rate": 4.959728000716094e-06, "loss": 0.5616, "step": 3338 }, { "epoch": 1.3196838725611262, "grad_norm": 0.4384987036176576, "learning_rate": 4.959699987847554e-06, "loss": 0.5685, "step": 3339 }, { "epoch": 1.3200790318597184, "grad_norm": 0.43847040276558563, "learning_rate": 4.9596719653188045e-06, "loss": 0.5518, "step": 3340 }, { "epoch": 1.3204741911583107, "grad_norm": 0.45117712104049984, "learning_rate": 4.959643933129958e-06, "loss": 0.5734, "step": 3341 }, { "epoch": 1.320869350456903, "grad_norm": 0.45533362827402557, "learning_rate": 4.9596158912811235e-06, "loss": 0.5649, "step": 3342 }, { "epoch": 1.3212645097554951, "grad_norm": 0.44551751419590213, "learning_rate": 4.9595878397724106e-06, "loss": 0.5778, "step": 3343 }, { "epoch": 1.3216596690540874, "grad_norm": 0.4370316849726543, "learning_rate": 4.959559778603931e-06, "loss": 0.5602, "step": 3344 }, { "epoch": 1.3220548283526796, "grad_norm": 0.4599955825604526, "learning_rate": 4.959531707775793e-06, "loss": 0.571, "step": 3345 }, { "epoch": 1.3224499876512719, "grad_norm": 0.4424416975622541, "learning_rate": 4.959503627288109e-06, "loss": 0.561, "step": 3346 }, { "epoch": 1.322845146949864, "grad_norm": 0.43661943086497584, "learning_rate": 4.959475537140989e-06, "loss": 0.5523, "step": 3347 }, { "epoch": 1.3232403062484563, "grad_norm": 0.4373998487954772, "learning_rate": 4.959447437334541e-06, "loss": 0.5613, "step": 3348 }, { "epoch": 1.3236354655470486, "grad_norm": 0.4494818246912831, "learning_rate": 4.959419327868878e-06, "loss": 0.5775, "step": 3349 }, { "epoch": 1.3240306248456408, "grad_norm": 0.43842069386255866, "learning_rate": 4.959391208744108e-06, "loss": 0.5677, "step": 3350 }, { "epoch": 1.324425784144233, "grad_norm": 0.4494362228050278, "learning_rate": 4.959363079960344e-06, "loss": 0.5517, "step": 3351 }, { "epoch": 1.3248209434428255, "grad_norm": 0.45526558767555503, "learning_rate": 4.959334941517695e-06, "loss": 0.572, "step": 3352 }, { "epoch": 1.3252161027414178, "grad_norm": 0.44027079643934225, "learning_rate": 4.959306793416271e-06, "loss": 0.5698, "step": 3353 }, { "epoch": 1.32561126204001, "grad_norm": 0.4405471076038371, "learning_rate": 4.959278635656184e-06, "loss": 0.5653, "step": 3354 }, { "epoch": 1.3260064213386022, "grad_norm": 0.45167721473900385, "learning_rate": 4.959250468237544e-06, "loss": 0.5757, "step": 3355 }, { "epoch": 1.3264015806371945, "grad_norm": 0.4421108044099741, "learning_rate": 4.959222291160461e-06, "loss": 0.5642, "step": 3356 }, { "epoch": 1.3267967399357867, "grad_norm": 0.4431212984296979, "learning_rate": 4.959194104425047e-06, "loss": 0.5505, "step": 3357 }, { "epoch": 1.327191899234379, "grad_norm": 0.4521254913136757, "learning_rate": 4.95916590803141e-06, "loss": 0.5674, "step": 3358 }, { "epoch": 1.3275870585329712, "grad_norm": 0.4738728564671993, "learning_rate": 4.959137701979665e-06, "loss": 0.5737, "step": 3359 }, { "epoch": 1.3279822178315634, "grad_norm": 0.4503578411569048, "learning_rate": 4.9591094862699185e-06, "loss": 0.5767, "step": 3360 }, { "epoch": 1.3283773771301557, "grad_norm": 0.4593996821030863, "learning_rate": 4.959081260902284e-06, "loss": 0.5819, "step": 3361 }, { "epoch": 1.328772536428748, "grad_norm": 0.47458910560702716, "learning_rate": 4.959053025876871e-06, "loss": 0.5751, "step": 3362 }, { "epoch": 1.3291676957273402, "grad_norm": 0.4601597399895584, "learning_rate": 4.959024781193792e-06, "loss": 0.5502, "step": 3363 }, { "epoch": 1.3295628550259324, "grad_norm": 0.43863004190964605, "learning_rate": 4.958996526853156e-06, "loss": 0.552, "step": 3364 }, { "epoch": 1.3299580143245247, "grad_norm": 0.449223468368665, "learning_rate": 4.958968262855075e-06, "loss": 0.5726, "step": 3365 }, { "epoch": 1.330353173623117, "grad_norm": 0.45615386443341344, "learning_rate": 4.958939989199659e-06, "loss": 0.5764, "step": 3366 }, { "epoch": 1.3307483329217091, "grad_norm": 0.44070542771087373, "learning_rate": 4.958911705887022e-06, "loss": 0.5687, "step": 3367 }, { "epoch": 1.3311434922203014, "grad_norm": 0.44414182367934024, "learning_rate": 4.958883412917271e-06, "loss": 0.5658, "step": 3368 }, { "epoch": 1.3315386515188936, "grad_norm": 0.44838583407126426, "learning_rate": 4.9588551102905205e-06, "loss": 0.5716, "step": 3369 }, { "epoch": 1.3319338108174859, "grad_norm": 0.889969275288809, "learning_rate": 4.9588267980068795e-06, "loss": 0.556, "step": 3370 }, { "epoch": 1.332328970116078, "grad_norm": 0.46358754540200775, "learning_rate": 4.95879847606646e-06, "loss": 0.5861, "step": 3371 }, { "epoch": 1.3327241294146703, "grad_norm": 0.4282860491355915, "learning_rate": 4.958770144469372e-06, "loss": 0.5428, "step": 3372 }, { "epoch": 1.3331192887132626, "grad_norm": 0.45729755865769983, "learning_rate": 4.95874180321573e-06, "loss": 0.5856, "step": 3373 }, { "epoch": 1.3335144480118548, "grad_norm": 0.45588398035202177, "learning_rate": 4.958713452305642e-06, "loss": 0.5713, "step": 3374 }, { "epoch": 1.333909607310447, "grad_norm": 0.44858104138575744, "learning_rate": 4.958685091739221e-06, "loss": 0.5657, "step": 3375 }, { "epoch": 1.3343047666090393, "grad_norm": 0.44036953218064384, "learning_rate": 4.958656721516577e-06, "loss": 0.5587, "step": 3376 }, { "epoch": 1.3346999259076315, "grad_norm": 0.4540748920166802, "learning_rate": 4.958628341637823e-06, "loss": 0.5646, "step": 3377 }, { "epoch": 1.3350950852062238, "grad_norm": 0.4396313054654993, "learning_rate": 4.9585999521030704e-06, "loss": 0.5464, "step": 3378 }, { "epoch": 1.335490244504816, "grad_norm": 0.4454412703380863, "learning_rate": 4.958571552912429e-06, "loss": 0.5545, "step": 3379 }, { "epoch": 1.3358854038034083, "grad_norm": 0.45082119473828774, "learning_rate": 4.9585431440660125e-06, "loss": 0.561, "step": 3380 }, { "epoch": 1.3362805631020005, "grad_norm": 0.4374755472502395, "learning_rate": 4.958514725563931e-06, "loss": 0.5571, "step": 3381 }, { "epoch": 1.3366757224005927, "grad_norm": 0.46478260478991495, "learning_rate": 4.958486297406296e-06, "loss": 0.5891, "step": 3382 }, { "epoch": 1.337070881699185, "grad_norm": 0.43814804662705964, "learning_rate": 4.958457859593221e-06, "loss": 0.565, "step": 3383 }, { "epoch": 1.3374660409977772, "grad_norm": 0.44507180809627767, "learning_rate": 4.958429412124816e-06, "loss": 0.5599, "step": 3384 }, { "epoch": 1.3378612002963695, "grad_norm": 0.441421838525371, "learning_rate": 4.958400955001193e-06, "loss": 0.5428, "step": 3385 }, { "epoch": 1.3382563595949617, "grad_norm": 0.44374387122522124, "learning_rate": 4.958372488222463e-06, "loss": 0.5403, "step": 3386 }, { "epoch": 1.338651518893554, "grad_norm": 0.4467964188449642, "learning_rate": 4.958344011788739e-06, "loss": 0.5503, "step": 3387 }, { "epoch": 1.3390466781921462, "grad_norm": 0.4479117153175756, "learning_rate": 4.958315525700134e-06, "loss": 0.5775, "step": 3388 }, { "epoch": 1.3394418374907384, "grad_norm": 0.5332367747737651, "learning_rate": 4.958287029956757e-06, "loss": 0.5715, "step": 3389 }, { "epoch": 1.3398369967893307, "grad_norm": 0.45663522892567066, "learning_rate": 4.958258524558721e-06, "loss": 0.5881, "step": 3390 }, { "epoch": 1.340232156087923, "grad_norm": 0.4521841960229859, "learning_rate": 4.958230009506139e-06, "loss": 0.5603, "step": 3391 }, { "epoch": 1.3406273153865151, "grad_norm": 0.45825530510638524, "learning_rate": 4.958201484799122e-06, "loss": 0.5664, "step": 3392 }, { "epoch": 1.3410224746851074, "grad_norm": 0.45965721161303025, "learning_rate": 4.958172950437782e-06, "loss": 0.5715, "step": 3393 }, { "epoch": 1.3414176339836996, "grad_norm": 0.4412893524397775, "learning_rate": 4.958144406422232e-06, "loss": 0.5709, "step": 3394 }, { "epoch": 1.3418127932822919, "grad_norm": 0.45819164319217937, "learning_rate": 4.958115852752583e-06, "loss": 0.5666, "step": 3395 }, { "epoch": 1.342207952580884, "grad_norm": 0.45080426168635684, "learning_rate": 4.958087289428948e-06, "loss": 0.5452, "step": 3396 }, { "epoch": 1.3426031118794763, "grad_norm": 0.46299421201178154, "learning_rate": 4.958058716451438e-06, "loss": 0.5788, "step": 3397 }, { "epoch": 1.3429982711780686, "grad_norm": 0.44631538749161104, "learning_rate": 4.9580301338201665e-06, "loss": 0.5605, "step": 3398 }, { "epoch": 1.3433934304766608, "grad_norm": 0.4753236947479198, "learning_rate": 4.9580015415352446e-06, "loss": 0.5833, "step": 3399 }, { "epoch": 1.343788589775253, "grad_norm": 0.4543478319902544, "learning_rate": 4.9579729395967855e-06, "loss": 0.5459, "step": 3400 }, { "epoch": 1.3441837490738453, "grad_norm": 0.42397174780842645, "learning_rate": 4.957944328004902e-06, "loss": 0.5408, "step": 3401 }, { "epoch": 1.3445789083724375, "grad_norm": 0.44366683749260666, "learning_rate": 4.957915706759705e-06, "loss": 0.5672, "step": 3402 }, { "epoch": 1.3449740676710298, "grad_norm": 0.4620326978290853, "learning_rate": 4.957887075861308e-06, "loss": 0.5912, "step": 3403 }, { "epoch": 1.345369226969622, "grad_norm": 0.437254776572285, "learning_rate": 4.957858435309822e-06, "loss": 0.5584, "step": 3404 }, { "epoch": 1.3457643862682143, "grad_norm": 0.44334059234747064, "learning_rate": 4.9578297851053626e-06, "loss": 0.5552, "step": 3405 }, { "epoch": 1.3461595455668065, "grad_norm": 0.45277563926558906, "learning_rate": 4.957801125248038e-06, "loss": 0.5796, "step": 3406 }, { "epoch": 1.3465547048653987, "grad_norm": 0.4529953395968256, "learning_rate": 4.957772455737965e-06, "loss": 0.5645, "step": 3407 }, { "epoch": 1.346949864163991, "grad_norm": 0.45953659145635833, "learning_rate": 4.9577437765752535e-06, "loss": 0.5551, "step": 3408 }, { "epoch": 1.3473450234625832, "grad_norm": 0.45243007027630033, "learning_rate": 4.957715087760017e-06, "loss": 0.562, "step": 3409 }, { "epoch": 1.3477401827611755, "grad_norm": 0.44813612053487434, "learning_rate": 4.9576863892923675e-06, "loss": 0.5669, "step": 3410 }, { "epoch": 1.348135342059768, "grad_norm": 0.4414843229708112, "learning_rate": 4.957657681172419e-06, "loss": 0.5564, "step": 3411 }, { "epoch": 1.3485305013583602, "grad_norm": 0.4762734336021678, "learning_rate": 4.957628963400283e-06, "loss": 0.5576, "step": 3412 }, { "epoch": 1.3489256606569524, "grad_norm": 0.4651845435138158, "learning_rate": 4.957600235976072e-06, "loss": 0.5471, "step": 3413 }, { "epoch": 1.3493208199555446, "grad_norm": 0.4394461136350005, "learning_rate": 4.957571498899901e-06, "loss": 0.5542, "step": 3414 }, { "epoch": 1.3497159792541369, "grad_norm": 0.44448501672351937, "learning_rate": 4.95754275217188e-06, "loss": 0.5635, "step": 3415 }, { "epoch": 1.3501111385527291, "grad_norm": 0.4620591465811548, "learning_rate": 4.9575139957921245e-06, "loss": 0.5587, "step": 3416 }, { "epoch": 1.3505062978513214, "grad_norm": 0.45815520871269744, "learning_rate": 4.957485229760747e-06, "loss": 0.5652, "step": 3417 }, { "epoch": 1.3509014571499136, "grad_norm": 0.4493406020218002, "learning_rate": 4.957456454077858e-06, "loss": 0.5441, "step": 3418 }, { "epoch": 1.3512966164485058, "grad_norm": 0.454603776283902, "learning_rate": 4.957427668743573e-06, "loss": 0.5598, "step": 3419 }, { "epoch": 1.351691775747098, "grad_norm": 0.47114620064149954, "learning_rate": 4.9573988737580045e-06, "loss": 0.5714, "step": 3420 }, { "epoch": 1.3520869350456903, "grad_norm": 0.4559096213569245, "learning_rate": 4.957370069121265e-06, "loss": 0.5616, "step": 3421 }, { "epoch": 1.3524820943442826, "grad_norm": 0.42729193122760173, "learning_rate": 4.95734125483347e-06, "loss": 0.5561, "step": 3422 }, { "epoch": 1.3528772536428748, "grad_norm": 0.46360094062631574, "learning_rate": 4.957312430894729e-06, "loss": 0.5475, "step": 3423 }, { "epoch": 1.353272412941467, "grad_norm": 0.44865908216487266, "learning_rate": 4.957283597305157e-06, "loss": 0.5744, "step": 3424 }, { "epoch": 1.3536675722400593, "grad_norm": 0.4527404050788081, "learning_rate": 4.957254754064867e-06, "loss": 0.5691, "step": 3425 }, { "epoch": 1.3540627315386515, "grad_norm": 0.4380566310678888, "learning_rate": 4.957225901173973e-06, "loss": 0.5675, "step": 3426 }, { "epoch": 1.3544578908372438, "grad_norm": 0.4480119306626472, "learning_rate": 4.957197038632587e-06, "loss": 0.5794, "step": 3427 }, { "epoch": 1.354853050135836, "grad_norm": 0.44898242236087227, "learning_rate": 4.957168166440824e-06, "loss": 0.5749, "step": 3428 }, { "epoch": 1.3552482094344283, "grad_norm": 0.4395620002210985, "learning_rate": 4.9571392845987965e-06, "loss": 0.5539, "step": 3429 }, { "epoch": 1.3556433687330205, "grad_norm": 0.4828976972353129, "learning_rate": 4.957110393106618e-06, "loss": 0.5486, "step": 3430 }, { "epoch": 1.3560385280316127, "grad_norm": 0.4340264093323101, "learning_rate": 4.9570814919644015e-06, "loss": 0.5528, "step": 3431 }, { "epoch": 1.356433687330205, "grad_norm": 0.43633467432037276, "learning_rate": 4.9570525811722604e-06, "loss": 0.5823, "step": 3432 }, { "epoch": 1.3568288466287972, "grad_norm": 0.44896667799179996, "learning_rate": 4.957023660730309e-06, "loss": 0.5674, "step": 3433 }, { "epoch": 1.3572240059273895, "grad_norm": 0.4424965400029668, "learning_rate": 4.9569947306386614e-06, "loss": 0.5597, "step": 3434 }, { "epoch": 1.3576191652259817, "grad_norm": 0.4505551734794091, "learning_rate": 4.95696579089743e-06, "loss": 0.5732, "step": 3435 }, { "epoch": 1.358014324524574, "grad_norm": 0.42939268083383797, "learning_rate": 4.956936841506729e-06, "loss": 0.5455, "step": 3436 }, { "epoch": 1.3584094838231662, "grad_norm": 0.4283591881731488, "learning_rate": 4.956907882466672e-06, "loss": 0.5599, "step": 3437 }, { "epoch": 1.3588046431217584, "grad_norm": 0.45404223789984394, "learning_rate": 4.956878913777373e-06, "loss": 0.5664, "step": 3438 }, { "epoch": 1.3591998024203507, "grad_norm": 0.4403257215242389, "learning_rate": 4.956849935438945e-06, "loss": 0.5615, "step": 3439 }, { "epoch": 1.359594961718943, "grad_norm": 0.4557250216642013, "learning_rate": 4.956820947451503e-06, "loss": 0.569, "step": 3440 }, { "epoch": 1.3599901210175351, "grad_norm": 0.45833724590035085, "learning_rate": 4.956791949815159e-06, "loss": 0.5823, "step": 3441 }, { "epoch": 1.3603852803161274, "grad_norm": 0.4530682419897322, "learning_rate": 4.956762942530029e-06, "loss": 0.5707, "step": 3442 }, { "epoch": 1.3607804396147196, "grad_norm": 0.4376158015116781, "learning_rate": 4.956733925596227e-06, "loss": 0.5708, "step": 3443 }, { "epoch": 1.3611755989133119, "grad_norm": 0.4307700914051484, "learning_rate": 4.956704899013864e-06, "loss": 0.5522, "step": 3444 }, { "epoch": 1.361570758211904, "grad_norm": 0.46554414982116393, "learning_rate": 4.956675862783057e-06, "loss": 0.5655, "step": 3445 }, { "epoch": 1.3619659175104963, "grad_norm": 0.4671181738319043, "learning_rate": 4.9566468169039185e-06, "loss": 0.566, "step": 3446 }, { "epoch": 1.3623610768090888, "grad_norm": 0.4297413408882112, "learning_rate": 4.956617761376563e-06, "loss": 0.5542, "step": 3447 }, { "epoch": 1.362756236107681, "grad_norm": 0.44684012819132296, "learning_rate": 4.956588696201105e-06, "loss": 0.5513, "step": 3448 }, { "epoch": 1.3631513954062733, "grad_norm": 0.4693033252542671, "learning_rate": 4.956559621377658e-06, "loss": 0.5557, "step": 3449 }, { "epoch": 1.3635465547048655, "grad_norm": 0.4486147777129349, "learning_rate": 4.956530536906337e-06, "loss": 0.5835, "step": 3450 }, { "epoch": 1.3639417140034578, "grad_norm": 0.4520688255329486, "learning_rate": 4.956501442787256e-06, "loss": 0.5975, "step": 3451 }, { "epoch": 1.36433687330205, "grad_norm": 0.4478220251661161, "learning_rate": 4.956472339020528e-06, "loss": 0.5653, "step": 3452 }, { "epoch": 1.3647320326006422, "grad_norm": 0.49319113298532397, "learning_rate": 4.956443225606269e-06, "loss": 0.5619, "step": 3453 }, { "epoch": 1.3651271918992345, "grad_norm": 0.4472089262359022, "learning_rate": 4.9564141025445924e-06, "loss": 0.5851, "step": 3454 }, { "epoch": 1.3655223511978267, "grad_norm": 0.45279260771258567, "learning_rate": 4.956384969835613e-06, "loss": 0.5825, "step": 3455 }, { "epoch": 1.365917510496419, "grad_norm": 0.44893832422728336, "learning_rate": 4.956355827479445e-06, "loss": 0.54, "step": 3456 }, { "epoch": 1.3663126697950112, "grad_norm": 0.44861167684626757, "learning_rate": 4.9563266754762025e-06, "loss": 0.5798, "step": 3457 }, { "epoch": 1.3667078290936034, "grad_norm": 0.457323231515835, "learning_rate": 4.956297513826e-06, "loss": 0.5578, "step": 3458 }, { "epoch": 1.3671029883921957, "grad_norm": 0.4497319765240072, "learning_rate": 4.9562683425289535e-06, "loss": 0.5738, "step": 3459 }, { "epoch": 1.367498147690788, "grad_norm": 0.44725620193345855, "learning_rate": 4.956239161585176e-06, "loss": 0.5727, "step": 3460 }, { "epoch": 1.3678933069893802, "grad_norm": 0.4464598621025446, "learning_rate": 4.956209970994783e-06, "loss": 0.5661, "step": 3461 }, { "epoch": 1.3682884662879724, "grad_norm": 0.4446893412743254, "learning_rate": 4.956180770757888e-06, "loss": 0.5732, "step": 3462 }, { "epoch": 1.3686836255865646, "grad_norm": 0.43091284408122377, "learning_rate": 4.956151560874607e-06, "loss": 0.5573, "step": 3463 }, { "epoch": 1.3690787848851569, "grad_norm": 0.4503486991715494, "learning_rate": 4.9561223413450544e-06, "loss": 0.5629, "step": 3464 }, { "epoch": 1.3694739441837491, "grad_norm": 0.4661947809210117, "learning_rate": 4.956093112169343e-06, "loss": 0.5912, "step": 3465 }, { "epoch": 1.3698691034823414, "grad_norm": 0.4420942969976414, "learning_rate": 4.956063873347591e-06, "loss": 0.5605, "step": 3466 }, { "epoch": 1.3702642627809336, "grad_norm": 0.4299631865847097, "learning_rate": 4.956034624879911e-06, "loss": 0.5311, "step": 3467 }, { "epoch": 1.3706594220795258, "grad_norm": 0.444327734857436, "learning_rate": 4.956005366766419e-06, "loss": 0.5744, "step": 3468 }, { "epoch": 1.371054581378118, "grad_norm": 0.42540391618117634, "learning_rate": 4.955976099007228e-06, "loss": 0.572, "step": 3469 }, { "epoch": 1.3714497406767103, "grad_norm": 0.4460903620872563, "learning_rate": 4.955946821602455e-06, "loss": 0.5715, "step": 3470 }, { "epoch": 1.3718448999753026, "grad_norm": 0.4464339414945937, "learning_rate": 4.9559175345522135e-06, "loss": 0.5477, "step": 3471 }, { "epoch": 1.3722400592738948, "grad_norm": 0.4496737708805918, "learning_rate": 4.95588823785662e-06, "loss": 0.566, "step": 3472 }, { "epoch": 1.372635218572487, "grad_norm": 0.46530073162081453, "learning_rate": 4.955858931515789e-06, "loss": 0.5863, "step": 3473 }, { "epoch": 1.3730303778710793, "grad_norm": 0.4304612284801493, "learning_rate": 4.955829615529835e-06, "loss": 0.5608, "step": 3474 }, { "epoch": 1.3734255371696715, "grad_norm": 0.43919382720802014, "learning_rate": 4.955800289898874e-06, "loss": 0.5455, "step": 3475 }, { "epoch": 1.3738206964682638, "grad_norm": 0.4570983105615984, "learning_rate": 4.95577095462302e-06, "loss": 0.5623, "step": 3476 }, { "epoch": 1.374215855766856, "grad_norm": 0.4377489629257734, "learning_rate": 4.955741609702389e-06, "loss": 0.5599, "step": 3477 }, { "epoch": 1.3746110150654482, "grad_norm": 0.457732827339079, "learning_rate": 4.9557122551370964e-06, "loss": 0.5607, "step": 3478 }, { "epoch": 1.3750061743640405, "grad_norm": 0.44043665757163497, "learning_rate": 4.955682890927257e-06, "loss": 0.5563, "step": 3479 }, { "epoch": 1.3754013336626327, "grad_norm": 0.4583584687679824, "learning_rate": 4.955653517072986e-06, "loss": 0.5743, "step": 3480 }, { "epoch": 1.375796492961225, "grad_norm": 0.45153202964049305, "learning_rate": 4.955624133574401e-06, "loss": 0.5677, "step": 3481 }, { "epoch": 1.3761916522598172, "grad_norm": 0.4374756911240904, "learning_rate": 4.955594740431613e-06, "loss": 0.5654, "step": 3482 }, { "epoch": 1.3765868115584095, "grad_norm": 0.4556124603164875, "learning_rate": 4.9555653376447416e-06, "loss": 0.5649, "step": 3483 }, { "epoch": 1.3769819708570017, "grad_norm": 0.4757752596484646, "learning_rate": 4.9555359252139e-06, "loss": 0.5888, "step": 3484 }, { "epoch": 1.377377130155594, "grad_norm": 0.4465560956225096, "learning_rate": 4.955506503139205e-06, "loss": 0.5591, "step": 3485 }, { "epoch": 1.3777722894541862, "grad_norm": 0.46143614783960507, "learning_rate": 4.955477071420771e-06, "loss": 0.5767, "step": 3486 }, { "epoch": 1.3781674487527784, "grad_norm": 0.4625557795823603, "learning_rate": 4.955447630058714e-06, "loss": 0.5471, "step": 3487 }, { "epoch": 1.3785626080513707, "grad_norm": 0.45122028220004684, "learning_rate": 4.95541817905315e-06, "loss": 0.5763, "step": 3488 }, { "epoch": 1.378957767349963, "grad_norm": 0.4442976727237174, "learning_rate": 4.955388718404194e-06, "loss": 0.5665, "step": 3489 }, { "epoch": 1.3793529266485551, "grad_norm": 0.46811037604603395, "learning_rate": 4.955359248111963e-06, "loss": 0.5561, "step": 3490 }, { "epoch": 1.3797480859471474, "grad_norm": 0.45401020607684417, "learning_rate": 4.955329768176571e-06, "loss": 0.5673, "step": 3491 }, { "epoch": 1.3801432452457396, "grad_norm": 0.44325397259906013, "learning_rate": 4.955300278598135e-06, "loss": 0.5571, "step": 3492 }, { "epoch": 1.3805384045443319, "grad_norm": 0.4477474281264573, "learning_rate": 4.955270779376771e-06, "loss": 0.5621, "step": 3493 }, { "epoch": 1.380933563842924, "grad_norm": 0.4768670398130126, "learning_rate": 4.955241270512593e-06, "loss": 0.5839, "step": 3494 }, { "epoch": 1.3813287231415163, "grad_norm": 0.4460401555017405, "learning_rate": 4.955211752005719e-06, "loss": 0.5539, "step": 3495 }, { "epoch": 1.3817238824401086, "grad_norm": 0.4366552836817992, "learning_rate": 4.955182223856264e-06, "loss": 0.5556, "step": 3496 }, { "epoch": 1.3821190417387008, "grad_norm": 0.4493868061509665, "learning_rate": 4.955152686064344e-06, "loss": 0.5756, "step": 3497 }, { "epoch": 1.382514201037293, "grad_norm": 0.42939645657453296, "learning_rate": 4.955123138630075e-06, "loss": 0.5705, "step": 3498 }, { "epoch": 1.3829093603358853, "grad_norm": 0.4338686438980797, "learning_rate": 4.955093581553574e-06, "loss": 0.5505, "step": 3499 }, { "epoch": 1.3833045196344775, "grad_norm": 0.44361106507257514, "learning_rate": 4.955064014834955e-06, "loss": 0.5488, "step": 3500 }, { "epoch": 1.3836996789330698, "grad_norm": 0.4420112874538524, "learning_rate": 4.9550344384743365e-06, "loss": 0.5624, "step": 3501 }, { "epoch": 1.384094838231662, "grad_norm": 0.44997332812869023, "learning_rate": 4.955004852471832e-06, "loss": 0.5697, "step": 3502 }, { "epoch": 1.3844899975302543, "grad_norm": 0.4591341499895522, "learning_rate": 4.9549752568275605e-06, "loss": 0.5587, "step": 3503 }, { "epoch": 1.3848851568288465, "grad_norm": 0.460676095154433, "learning_rate": 4.954945651541636e-06, "loss": 0.565, "step": 3504 }, { "epoch": 1.3852803161274387, "grad_norm": 0.4299069716032761, "learning_rate": 4.954916036614177e-06, "loss": 0.5545, "step": 3505 }, { "epoch": 1.3856754754260312, "grad_norm": 0.43887083866057874, "learning_rate": 4.954886412045298e-06, "loss": 0.5921, "step": 3506 }, { "epoch": 1.3860706347246234, "grad_norm": 0.42648077434777515, "learning_rate": 4.954856777835115e-06, "loss": 0.5456, "step": 3507 }, { "epoch": 1.3864657940232157, "grad_norm": 0.4290139512004522, "learning_rate": 4.954827133983746e-06, "loss": 0.5867, "step": 3508 }, { "epoch": 1.386860953321808, "grad_norm": 0.44830270070683675, "learning_rate": 4.954797480491307e-06, "loss": 0.5784, "step": 3509 }, { "epoch": 1.3872561126204002, "grad_norm": 0.4477655067908414, "learning_rate": 4.954767817357913e-06, "loss": 0.5638, "step": 3510 }, { "epoch": 1.3876512719189924, "grad_norm": 0.43658218747964356, "learning_rate": 4.954738144583683e-06, "loss": 0.5844, "step": 3511 }, { "epoch": 1.3880464312175846, "grad_norm": 0.4393922211507673, "learning_rate": 4.954708462168731e-06, "loss": 0.5658, "step": 3512 }, { "epoch": 1.3884415905161769, "grad_norm": 0.44104621414355816, "learning_rate": 4.954678770113175e-06, "loss": 0.5566, "step": 3513 }, { "epoch": 1.3888367498147691, "grad_norm": 0.4578661431463306, "learning_rate": 4.954649068417132e-06, "loss": 0.581, "step": 3514 }, { "epoch": 1.3892319091133614, "grad_norm": 0.44817762228122565, "learning_rate": 4.954619357080717e-06, "loss": 0.5633, "step": 3515 }, { "epoch": 1.3896270684119536, "grad_norm": 0.4817436014812288, "learning_rate": 4.954589636104049e-06, "loss": 0.5447, "step": 3516 }, { "epoch": 1.3900222277105458, "grad_norm": 0.46133823688460374, "learning_rate": 4.954559905487242e-06, "loss": 0.5746, "step": 3517 }, { "epoch": 1.390417387009138, "grad_norm": 0.44329286540723245, "learning_rate": 4.954530165230415e-06, "loss": 0.5564, "step": 3518 }, { "epoch": 1.3908125463077303, "grad_norm": 0.43383834817026123, "learning_rate": 4.954500415333684e-06, "loss": 0.5645, "step": 3519 }, { "epoch": 1.3912077056063226, "grad_norm": 0.4378169397587514, "learning_rate": 4.954470655797165e-06, "loss": 0.5463, "step": 3520 }, { "epoch": 1.3916028649049148, "grad_norm": 0.4550593061487435, "learning_rate": 4.954440886620977e-06, "loss": 0.5516, "step": 3521 }, { "epoch": 1.391998024203507, "grad_norm": 0.4400286853367591, "learning_rate": 4.9544111078052345e-06, "loss": 0.5699, "step": 3522 }, { "epoch": 1.3923931835020993, "grad_norm": 0.45981608819949793, "learning_rate": 4.954381319350056e-06, "loss": 0.569, "step": 3523 }, { "epoch": 1.3927883428006915, "grad_norm": 0.44060684429684627, "learning_rate": 4.9543515212555585e-06, "loss": 0.5732, "step": 3524 }, { "epoch": 1.3931835020992838, "grad_norm": 0.4516861113642003, "learning_rate": 4.954321713521858e-06, "loss": 0.5519, "step": 3525 }, { "epoch": 1.393578661397876, "grad_norm": 0.4616718149420385, "learning_rate": 4.954291896149072e-06, "loss": 0.5516, "step": 3526 }, { "epoch": 1.3939738206964682, "grad_norm": 0.4452993405905123, "learning_rate": 4.954262069137318e-06, "loss": 0.5572, "step": 3527 }, { "epoch": 1.3943689799950605, "grad_norm": 0.434383788130944, "learning_rate": 4.9542322324867136e-06, "loss": 0.5472, "step": 3528 }, { "epoch": 1.3947641392936527, "grad_norm": 0.44451550141787594, "learning_rate": 4.954202386197375e-06, "loss": 0.5587, "step": 3529 }, { "epoch": 1.395159298592245, "grad_norm": 0.4481545332604534, "learning_rate": 4.954172530269419e-06, "loss": 0.5513, "step": 3530 }, { "epoch": 1.3955544578908372, "grad_norm": 0.4396843141846436, "learning_rate": 4.954142664702963e-06, "loss": 0.5759, "step": 3531 }, { "epoch": 1.3959496171894294, "grad_norm": 0.44315777199892625, "learning_rate": 4.954112789498126e-06, "loss": 0.5745, "step": 3532 }, { "epoch": 1.3963447764880217, "grad_norm": 0.463836596007923, "learning_rate": 4.9540829046550245e-06, "loss": 0.5844, "step": 3533 }, { "epoch": 1.396739935786614, "grad_norm": 0.527126180105154, "learning_rate": 4.954053010173774e-06, "loss": 0.5632, "step": 3534 }, { "epoch": 1.3971350950852062, "grad_norm": 0.471207695771745, "learning_rate": 4.954023106054495e-06, "loss": 0.5727, "step": 3535 }, { "epoch": 1.3975302543837984, "grad_norm": 0.4547374119776966, "learning_rate": 4.953993192297303e-06, "loss": 0.5639, "step": 3536 }, { "epoch": 1.3979254136823906, "grad_norm": 0.4567625911659694, "learning_rate": 4.953963268902315e-06, "loss": 0.5772, "step": 3537 }, { "epoch": 1.398320572980983, "grad_norm": 0.4241364653534491, "learning_rate": 4.953933335869651e-06, "loss": 0.5501, "step": 3538 }, { "epoch": 1.3987157322795751, "grad_norm": 0.45962400820648397, "learning_rate": 4.9539033931994255e-06, "loss": 0.5597, "step": 3539 }, { "epoch": 1.3991108915781674, "grad_norm": 0.44479747715613505, "learning_rate": 4.953873440891758e-06, "loss": 0.567, "step": 3540 }, { "epoch": 1.3995060508767596, "grad_norm": 0.44139630414780884, "learning_rate": 4.953843478946766e-06, "loss": 0.5611, "step": 3541 }, { "epoch": 1.399901210175352, "grad_norm": 0.44038065639815877, "learning_rate": 4.953813507364566e-06, "loss": 0.5572, "step": 3542 }, { "epoch": 1.4002963694739443, "grad_norm": 0.44652707139959935, "learning_rate": 4.9537835261452785e-06, "loss": 0.552, "step": 3543 }, { "epoch": 1.4006915287725366, "grad_norm": 0.447649590870608, "learning_rate": 4.953753535289017e-06, "loss": 0.55, "step": 3544 }, { "epoch": 1.4010866880711288, "grad_norm": 0.4544049896666161, "learning_rate": 4.953723534795903e-06, "loss": 0.5504, "step": 3545 }, { "epoch": 1.401481847369721, "grad_norm": 0.466964841617532, "learning_rate": 4.953693524666054e-06, "loss": 0.5722, "step": 3546 }, { "epoch": 1.4018770066683133, "grad_norm": 0.527230905472802, "learning_rate": 4.953663504899585e-06, "loss": 0.5524, "step": 3547 }, { "epoch": 1.4022721659669055, "grad_norm": 0.440070467466554, "learning_rate": 4.953633475496615e-06, "loss": 0.5412, "step": 3548 }, { "epoch": 1.4026673252654978, "grad_norm": 0.4364642690771747, "learning_rate": 4.9536034364572645e-06, "loss": 0.5551, "step": 3549 }, { "epoch": 1.40306248456409, "grad_norm": 0.45824115876308913, "learning_rate": 4.953573387781649e-06, "loss": 0.5731, "step": 3550 }, { "epoch": 1.4034576438626822, "grad_norm": 0.4559809654130625, "learning_rate": 4.9535433294698865e-06, "loss": 0.5735, "step": 3551 }, { "epoch": 1.4038528031612745, "grad_norm": 0.4437519963912747, "learning_rate": 4.9535132615220965e-06, "loss": 0.5715, "step": 3552 }, { "epoch": 1.4042479624598667, "grad_norm": 0.45338834415060414, "learning_rate": 4.953483183938395e-06, "loss": 0.5724, "step": 3553 }, { "epoch": 1.404643121758459, "grad_norm": 0.4568209368624167, "learning_rate": 4.953453096718903e-06, "loss": 0.5581, "step": 3554 }, { "epoch": 1.4050382810570512, "grad_norm": 0.43314731805578655, "learning_rate": 4.953422999863736e-06, "loss": 0.5379, "step": 3555 }, { "epoch": 1.4054334403556434, "grad_norm": 0.45449692655904694, "learning_rate": 4.953392893373015e-06, "loss": 0.5739, "step": 3556 }, { "epoch": 1.4058285996542357, "grad_norm": 0.4562200435387982, "learning_rate": 4.953362777246855e-06, "loss": 0.5643, "step": 3557 }, { "epoch": 1.406223758952828, "grad_norm": 0.4440850256076502, "learning_rate": 4.953332651485375e-06, "loss": 0.5689, "step": 3558 }, { "epoch": 1.4066189182514202, "grad_norm": 0.43280961705826465, "learning_rate": 4.953302516088695e-06, "loss": 0.5452, "step": 3559 }, { "epoch": 1.4070140775500124, "grad_norm": 0.43383955128353996, "learning_rate": 4.953272371056933e-06, "loss": 0.5437, "step": 3560 }, { "epoch": 1.4074092368486046, "grad_norm": 0.46587228167336536, "learning_rate": 4.953242216390206e-06, "loss": 0.5626, "step": 3561 }, { "epoch": 1.4078043961471969, "grad_norm": 0.4627748388388923, "learning_rate": 4.953212052088634e-06, "loss": 0.5578, "step": 3562 }, { "epoch": 1.4081995554457891, "grad_norm": 0.4494606350210151, "learning_rate": 4.953181878152334e-06, "loss": 0.559, "step": 3563 }, { "epoch": 1.4085947147443814, "grad_norm": 0.4689503889787178, "learning_rate": 4.953151694581425e-06, "loss": 0.5853, "step": 3564 }, { "epoch": 1.4089898740429736, "grad_norm": 0.425074031613252, "learning_rate": 4.953121501376027e-06, "loss": 0.5469, "step": 3565 }, { "epoch": 1.4093850333415658, "grad_norm": 0.43908518140110436, "learning_rate": 4.953091298536256e-06, "loss": 0.5573, "step": 3566 }, { "epoch": 1.409780192640158, "grad_norm": 0.456588536080327, "learning_rate": 4.953061086062233e-06, "loss": 0.5738, "step": 3567 }, { "epoch": 1.4101753519387503, "grad_norm": 0.4407899689038523, "learning_rate": 4.953030863954075e-06, "loss": 0.5485, "step": 3568 }, { "epoch": 1.4105705112373426, "grad_norm": 0.44983477733657745, "learning_rate": 4.953000632211902e-06, "loss": 0.5587, "step": 3569 }, { "epoch": 1.4109656705359348, "grad_norm": 0.4444516608223304, "learning_rate": 4.952970390835831e-06, "loss": 0.56, "step": 3570 }, { "epoch": 1.411360829834527, "grad_norm": 0.44234362530280924, "learning_rate": 4.952940139825982e-06, "loss": 0.58, "step": 3571 }, { "epoch": 1.4117559891331193, "grad_norm": 0.4303479209786135, "learning_rate": 4.952909879182475e-06, "loss": 0.5487, "step": 3572 }, { "epoch": 1.4121511484317115, "grad_norm": 0.4395260223716139, "learning_rate": 4.952879608905427e-06, "loss": 0.5697, "step": 3573 }, { "epoch": 1.4125463077303038, "grad_norm": 0.43025433618329334, "learning_rate": 4.952849328994957e-06, "loss": 0.5622, "step": 3574 }, { "epoch": 1.412941467028896, "grad_norm": 0.4425864856013525, "learning_rate": 4.9528190394511835e-06, "loss": 0.5472, "step": 3575 }, { "epoch": 1.4133366263274882, "grad_norm": 0.4515220954726901, "learning_rate": 4.9527887402742266e-06, "loss": 0.5593, "step": 3576 }, { "epoch": 1.4137317856260805, "grad_norm": 0.4435809550125933, "learning_rate": 4.952758431464206e-06, "loss": 0.5604, "step": 3577 }, { "epoch": 1.4141269449246727, "grad_norm": 0.45887561975872704, "learning_rate": 4.952728113021239e-06, "loss": 0.5573, "step": 3578 }, { "epoch": 1.414522104223265, "grad_norm": 0.4382166236259494, "learning_rate": 4.952697784945445e-06, "loss": 0.5767, "step": 3579 }, { "epoch": 1.4149172635218572, "grad_norm": 0.43576829414699486, "learning_rate": 4.952667447236944e-06, "loss": 0.5485, "step": 3580 }, { "epoch": 1.4153124228204494, "grad_norm": 0.6295771726774199, "learning_rate": 4.952637099895854e-06, "loss": 0.5594, "step": 3581 }, { "epoch": 1.4157075821190417, "grad_norm": 0.44289940633979036, "learning_rate": 4.952606742922296e-06, "loss": 0.5492, "step": 3582 }, { "epoch": 1.416102741417634, "grad_norm": 0.44005534309853256, "learning_rate": 4.952576376316387e-06, "loss": 0.5488, "step": 3583 }, { "epoch": 1.4164979007162262, "grad_norm": 0.5156052999824663, "learning_rate": 4.952546000078247e-06, "loss": 0.5623, "step": 3584 }, { "epoch": 1.4168930600148184, "grad_norm": 0.4720203506988176, "learning_rate": 4.952515614207996e-06, "loss": 0.5709, "step": 3585 }, { "epoch": 1.4172882193134106, "grad_norm": 0.4403789400135003, "learning_rate": 4.952485218705753e-06, "loss": 0.5612, "step": 3586 }, { "epoch": 1.4176833786120029, "grad_norm": 0.4346034283155917, "learning_rate": 4.952454813571638e-06, "loss": 0.5532, "step": 3587 }, { "epoch": 1.4180785379105951, "grad_norm": 0.4914958621897474, "learning_rate": 4.952424398805769e-06, "loss": 0.5874, "step": 3588 }, { "epoch": 1.4184736972091874, "grad_norm": 0.44158849976736936, "learning_rate": 4.952393974408265e-06, "loss": 0.558, "step": 3589 }, { "epoch": 1.4188688565077796, "grad_norm": 0.47059542127549936, "learning_rate": 4.952363540379248e-06, "loss": 0.56, "step": 3590 }, { "epoch": 1.4192640158063718, "grad_norm": 0.4428834340575035, "learning_rate": 4.952333096718837e-06, "loss": 0.5779, "step": 3591 }, { "epoch": 1.419659175104964, "grad_norm": 0.4318718915548664, "learning_rate": 4.952302643427149e-06, "loss": 0.5545, "step": 3592 }, { "epoch": 1.4200543344035563, "grad_norm": 0.4399415267342125, "learning_rate": 4.952272180504306e-06, "loss": 0.5605, "step": 3593 }, { "epoch": 1.4204494937021486, "grad_norm": 0.44720706398972543, "learning_rate": 4.952241707950427e-06, "loss": 0.5722, "step": 3594 }, { "epoch": 1.4208446530007408, "grad_norm": 0.43958222609485287, "learning_rate": 4.9522112257656315e-06, "loss": 0.5474, "step": 3595 }, { "epoch": 1.421239812299333, "grad_norm": 0.4335981477530248, "learning_rate": 4.952180733950039e-06, "loss": 0.5406, "step": 3596 }, { "epoch": 1.4216349715979253, "grad_norm": 0.4452745792193868, "learning_rate": 4.952150232503771e-06, "loss": 0.5774, "step": 3597 }, { "epoch": 1.4220301308965175, "grad_norm": 0.450111748575881, "learning_rate": 4.952119721426945e-06, "loss": 0.564, "step": 3598 }, { "epoch": 1.4224252901951098, "grad_norm": 0.4514178883673883, "learning_rate": 4.952089200719682e-06, "loss": 0.5702, "step": 3599 }, { "epoch": 1.422820449493702, "grad_norm": 0.44869890712620886, "learning_rate": 4.9520586703821006e-06, "loss": 0.5695, "step": 3600 }, { "epoch": 1.4232156087922945, "grad_norm": 0.47920918782737604, "learning_rate": 4.952028130414322e-06, "loss": 0.5538, "step": 3601 }, { "epoch": 1.4236107680908867, "grad_norm": 0.4359999152882843, "learning_rate": 4.951997580816466e-06, "loss": 0.5561, "step": 3602 }, { "epoch": 1.424005927389479, "grad_norm": 0.4603603266157431, "learning_rate": 4.951967021588654e-06, "loss": 0.5539, "step": 3603 }, { "epoch": 1.4244010866880712, "grad_norm": 0.4470887413164905, "learning_rate": 4.9519364527310035e-06, "loss": 0.557, "step": 3604 }, { "epoch": 1.4247962459866634, "grad_norm": 0.45851610322617087, "learning_rate": 4.9519058742436345e-06, "loss": 0.5501, "step": 3605 }, { "epoch": 1.4251914052852557, "grad_norm": 0.47419186727157, "learning_rate": 4.951875286126669e-06, "loss": 0.577, "step": 3606 }, { "epoch": 1.425586564583848, "grad_norm": 0.4495038740005354, "learning_rate": 4.951844688380226e-06, "loss": 0.5761, "step": 3607 }, { "epoch": 1.4259817238824402, "grad_norm": 0.42780708612897866, "learning_rate": 4.951814081004426e-06, "loss": 0.5507, "step": 3608 }, { "epoch": 1.4263768831810324, "grad_norm": 0.4420018249267614, "learning_rate": 4.951783463999389e-06, "loss": 0.5659, "step": 3609 }, { "epoch": 1.4267720424796246, "grad_norm": 0.4321949197983691, "learning_rate": 4.951752837365236e-06, "loss": 0.5578, "step": 3610 }, { "epoch": 1.4271672017782169, "grad_norm": 0.45648209967593256, "learning_rate": 4.951722201102085e-06, "loss": 0.5829, "step": 3611 }, { "epoch": 1.4275623610768091, "grad_norm": 0.4448948600493821, "learning_rate": 4.9516915552100594e-06, "loss": 0.5672, "step": 3612 }, { "epoch": 1.4279575203754014, "grad_norm": 0.4357660265947906, "learning_rate": 4.951660899689278e-06, "loss": 0.5473, "step": 3613 }, { "epoch": 1.4283526796739936, "grad_norm": 0.4697567901251499, "learning_rate": 4.951630234539861e-06, "loss": 0.5793, "step": 3614 }, { "epoch": 1.4287478389725858, "grad_norm": 0.4474239868294111, "learning_rate": 4.951599559761929e-06, "loss": 0.5559, "step": 3615 }, { "epoch": 1.429142998271178, "grad_norm": 0.46638942204948125, "learning_rate": 4.951568875355603e-06, "loss": 0.5674, "step": 3616 }, { "epoch": 1.4295381575697703, "grad_norm": 0.46201733877960605, "learning_rate": 4.951538181321003e-06, "loss": 0.565, "step": 3617 }, { "epoch": 1.4299333168683626, "grad_norm": 0.4468591448162063, "learning_rate": 4.9515074776582495e-06, "loss": 0.5644, "step": 3618 }, { "epoch": 1.4303284761669548, "grad_norm": 0.4563343700830998, "learning_rate": 4.951476764367463e-06, "loss": 0.5749, "step": 3619 }, { "epoch": 1.430723635465547, "grad_norm": 0.43658851767260776, "learning_rate": 4.951446041448765e-06, "loss": 0.5736, "step": 3620 }, { "epoch": 1.4311187947641393, "grad_norm": 0.4349415610360004, "learning_rate": 4.951415308902275e-06, "loss": 0.5596, "step": 3621 }, { "epoch": 1.4315139540627315, "grad_norm": 0.462135397048311, "learning_rate": 4.951384566728115e-06, "loss": 0.5595, "step": 3622 }, { "epoch": 1.4319091133613238, "grad_norm": 0.4543875910238101, "learning_rate": 4.951353814926405e-06, "loss": 0.5489, "step": 3623 }, { "epoch": 1.432304272659916, "grad_norm": 0.4297719147230641, "learning_rate": 4.951323053497265e-06, "loss": 0.565, "step": 3624 }, { "epoch": 1.4326994319585082, "grad_norm": 0.45442855434813045, "learning_rate": 4.951292282440817e-06, "loss": 0.5698, "step": 3625 }, { "epoch": 1.4330945912571005, "grad_norm": 0.5225354098317048, "learning_rate": 4.951261501757182e-06, "loss": 0.5531, "step": 3626 }, { "epoch": 1.4334897505556927, "grad_norm": 0.4420068105595844, "learning_rate": 4.951230711446479e-06, "loss": 0.5766, "step": 3627 }, { "epoch": 1.433884909854285, "grad_norm": 0.44501142719182346, "learning_rate": 4.951199911508831e-06, "loss": 0.5568, "step": 3628 }, { "epoch": 1.4342800691528772, "grad_norm": 0.5349036907902572, "learning_rate": 4.951169101944358e-06, "loss": 0.5508, "step": 3629 }, { "epoch": 1.4346752284514694, "grad_norm": 0.46471714809625664, "learning_rate": 4.951138282753181e-06, "loss": 0.579, "step": 3630 }, { "epoch": 1.4350703877500617, "grad_norm": 0.4425488778078077, "learning_rate": 4.951107453935421e-06, "loss": 0.5469, "step": 3631 }, { "epoch": 1.435465547048654, "grad_norm": 0.4395384121942282, "learning_rate": 4.951076615491201e-06, "loss": 0.5469, "step": 3632 }, { "epoch": 1.4358607063472462, "grad_norm": 0.45264206492150943, "learning_rate": 4.9510457674206385e-06, "loss": 0.5669, "step": 3633 }, { "epoch": 1.4362558656458384, "grad_norm": 0.4359952505534556, "learning_rate": 4.951014909723858e-06, "loss": 0.5581, "step": 3634 }, { "epoch": 1.4366510249444306, "grad_norm": 0.43031272641598334, "learning_rate": 4.950984042400978e-06, "loss": 0.5614, "step": 3635 }, { "epoch": 1.437046184243023, "grad_norm": 0.44952139574949684, "learning_rate": 4.9509531654521216e-06, "loss": 0.564, "step": 3636 }, { "epoch": 1.4374413435416153, "grad_norm": 0.4894458991545237, "learning_rate": 4.950922278877409e-06, "loss": 0.5373, "step": 3637 }, { "epoch": 1.4378365028402076, "grad_norm": 0.4435614177631397, "learning_rate": 4.950891382676963e-06, "loss": 0.561, "step": 3638 }, { "epoch": 1.4382316621387998, "grad_norm": 0.4556594196014496, "learning_rate": 4.950860476850903e-06, "loss": 0.5612, "step": 3639 }, { "epoch": 1.438626821437392, "grad_norm": 0.43556778440010696, "learning_rate": 4.9508295613993515e-06, "loss": 0.5494, "step": 3640 }, { "epoch": 1.4390219807359843, "grad_norm": 0.4521754341667638, "learning_rate": 4.9507986363224305e-06, "loss": 0.5526, "step": 3641 }, { "epoch": 1.4394171400345765, "grad_norm": 0.439981945538117, "learning_rate": 4.950767701620259e-06, "loss": 0.548, "step": 3642 }, { "epoch": 1.4398122993331688, "grad_norm": 0.4351939027647017, "learning_rate": 4.950736757292962e-06, "loss": 0.5698, "step": 3643 }, { "epoch": 1.440207458631761, "grad_norm": 0.43466458525745466, "learning_rate": 4.950705803340657e-06, "loss": 0.5595, "step": 3644 }, { "epoch": 1.4406026179303533, "grad_norm": 0.4482718750704879, "learning_rate": 4.9506748397634695e-06, "loss": 0.5714, "step": 3645 }, { "epoch": 1.4409977772289455, "grad_norm": 0.4424752556047753, "learning_rate": 4.9506438665615195e-06, "loss": 0.5542, "step": 3646 }, { "epoch": 1.4413929365275377, "grad_norm": 0.4455607912735244, "learning_rate": 4.950612883734928e-06, "loss": 0.5591, "step": 3647 }, { "epoch": 1.44178809582613, "grad_norm": 0.44170634526996, "learning_rate": 4.950581891283816e-06, "loss": 0.5546, "step": 3648 }, { "epoch": 1.4421832551247222, "grad_norm": 0.45300602248436533, "learning_rate": 4.950550889208308e-06, "loss": 0.5494, "step": 3649 }, { "epoch": 1.4425784144233145, "grad_norm": 0.45683270516581864, "learning_rate": 4.950519877508524e-06, "loss": 0.5805, "step": 3650 }, { "epoch": 1.4429735737219067, "grad_norm": 0.4527318497547736, "learning_rate": 4.950488856184585e-06, "loss": 0.5625, "step": 3651 }, { "epoch": 1.443368733020499, "grad_norm": 0.4384333802721848, "learning_rate": 4.950457825236615e-06, "loss": 0.5624, "step": 3652 }, { "epoch": 1.4437638923190912, "grad_norm": 0.48213960406408873, "learning_rate": 4.950426784664734e-06, "loss": 0.5796, "step": 3653 }, { "epoch": 1.4441590516176834, "grad_norm": 0.4656284274871252, "learning_rate": 4.950395734469065e-06, "loss": 0.5488, "step": 3654 }, { "epoch": 1.4445542109162757, "grad_norm": 0.4342682580148919, "learning_rate": 4.950364674649729e-06, "loss": 0.5465, "step": 3655 }, { "epoch": 1.444949370214868, "grad_norm": 0.4371079121432642, "learning_rate": 4.9503336052068485e-06, "loss": 0.5622, "step": 3656 }, { "epoch": 1.4453445295134602, "grad_norm": 0.43992242202525345, "learning_rate": 4.9503025261405455e-06, "loss": 0.5514, "step": 3657 }, { "epoch": 1.4457396888120524, "grad_norm": 0.4414816507834958, "learning_rate": 4.950271437450943e-06, "loss": 0.5563, "step": 3658 }, { "epoch": 1.4461348481106446, "grad_norm": 0.44005325021141295, "learning_rate": 4.950240339138161e-06, "loss": 0.5747, "step": 3659 }, { "epoch": 1.4465300074092369, "grad_norm": 0.4421664700998617, "learning_rate": 4.950209231202323e-06, "loss": 0.5498, "step": 3660 }, { "epoch": 1.4469251667078291, "grad_norm": 0.43113979255459345, "learning_rate": 4.950178113643551e-06, "loss": 0.5798, "step": 3661 }, { "epoch": 1.4473203260064214, "grad_norm": 0.4708087923129291, "learning_rate": 4.950146986461968e-06, "loss": 0.5421, "step": 3662 }, { "epoch": 1.4477154853050136, "grad_norm": 0.45141956435396696, "learning_rate": 4.9501158496576945e-06, "loss": 0.5613, "step": 3663 }, { "epoch": 1.4481106446036058, "grad_norm": 0.45661303582282825, "learning_rate": 4.950084703230854e-06, "loss": 0.5821, "step": 3664 }, { "epoch": 1.448505803902198, "grad_norm": 0.4467554956414753, "learning_rate": 4.9500535471815696e-06, "loss": 0.5614, "step": 3665 }, { "epoch": 1.4489009632007903, "grad_norm": 0.4375691215444136, "learning_rate": 4.950022381509961e-06, "loss": 0.5546, "step": 3666 }, { "epoch": 1.4492961224993826, "grad_norm": 0.44417059941621506, "learning_rate": 4.949991206216152e-06, "loss": 0.5474, "step": 3667 }, { "epoch": 1.4496912817979748, "grad_norm": 0.5239384393901027, "learning_rate": 4.949960021300267e-06, "loss": 0.5724, "step": 3668 }, { "epoch": 1.450086441096567, "grad_norm": 0.4600101751343761, "learning_rate": 4.949928826762425e-06, "loss": 0.5643, "step": 3669 }, { "epoch": 1.4504816003951593, "grad_norm": 0.4398883196726322, "learning_rate": 4.949897622602752e-06, "loss": 0.5645, "step": 3670 }, { "epoch": 1.4508767596937515, "grad_norm": 0.4243916986024868, "learning_rate": 4.949866408821368e-06, "loss": 0.5414, "step": 3671 }, { "epoch": 1.4512719189923438, "grad_norm": 0.46429052461409404, "learning_rate": 4.949835185418397e-06, "loss": 0.5675, "step": 3672 }, { "epoch": 1.451667078290936, "grad_norm": 0.43727146868324845, "learning_rate": 4.94980395239396e-06, "loss": 0.5411, "step": 3673 }, { "epoch": 1.4520622375895282, "grad_norm": 0.44296006305796626, "learning_rate": 4.94977270974818e-06, "loss": 0.5706, "step": 3674 }, { "epoch": 1.4524573968881205, "grad_norm": 0.43581736362129647, "learning_rate": 4.949741457481182e-06, "loss": 0.5579, "step": 3675 }, { "epoch": 1.4528525561867127, "grad_norm": 0.43935390623902243, "learning_rate": 4.949710195593087e-06, "loss": 0.5522, "step": 3676 }, { "epoch": 1.453247715485305, "grad_norm": 0.4604206853638542, "learning_rate": 4.949678924084017e-06, "loss": 0.5861, "step": 3677 }, { "epoch": 1.4536428747838972, "grad_norm": 0.4666269167795469, "learning_rate": 4.949647642954096e-06, "loss": 0.5733, "step": 3678 }, { "epoch": 1.4540380340824894, "grad_norm": 0.4425711605975332, "learning_rate": 4.949616352203447e-06, "loss": 0.5599, "step": 3679 }, { "epoch": 1.4544331933810817, "grad_norm": 0.438259767331312, "learning_rate": 4.949585051832192e-06, "loss": 0.5512, "step": 3680 }, { "epoch": 1.454828352679674, "grad_norm": 0.47194140077374513, "learning_rate": 4.949553741840455e-06, "loss": 0.5684, "step": 3681 }, { "epoch": 1.4552235119782662, "grad_norm": 0.4581698913385663, "learning_rate": 4.9495224222283576e-06, "loss": 0.5731, "step": 3682 }, { "epoch": 1.4556186712768584, "grad_norm": 0.4729218162827143, "learning_rate": 4.949491092996024e-06, "loss": 0.5581, "step": 3683 }, { "epoch": 1.4560138305754506, "grad_norm": 0.4631393154498066, "learning_rate": 4.9494597541435764e-06, "loss": 0.5891, "step": 3684 }, { "epoch": 1.4564089898740429, "grad_norm": 0.44412962722285376, "learning_rate": 4.949428405671138e-06, "loss": 0.5669, "step": 3685 }, { "epoch": 1.4568041491726351, "grad_norm": 0.44251521812090155, "learning_rate": 4.949397047578833e-06, "loss": 0.5767, "step": 3686 }, { "epoch": 1.4571993084712274, "grad_norm": 0.5568404678574649, "learning_rate": 4.949365679866783e-06, "loss": 0.5695, "step": 3687 }, { "epoch": 1.4575944677698196, "grad_norm": 0.4597237274269838, "learning_rate": 4.9493343025351125e-06, "loss": 0.5841, "step": 3688 }, { "epoch": 1.4579896270684118, "grad_norm": 0.433320088836893, "learning_rate": 4.9493029155839435e-06, "loss": 0.5549, "step": 3689 }, { "epoch": 1.458384786367004, "grad_norm": 0.4408848451488535, "learning_rate": 4.949271519013401e-06, "loss": 0.5628, "step": 3690 }, { "epoch": 1.4587799456655963, "grad_norm": 0.45052941581991246, "learning_rate": 4.949240112823606e-06, "loss": 0.5777, "step": 3691 }, { "epoch": 1.4591751049641886, "grad_norm": 0.5775749617611324, "learning_rate": 4.949208697014685e-06, "loss": 0.5599, "step": 3692 }, { "epoch": 1.4595702642627808, "grad_norm": 0.44576099623969545, "learning_rate": 4.949177271586758e-06, "loss": 0.5525, "step": 3693 }, { "epoch": 1.459965423561373, "grad_norm": 0.44043843064525395, "learning_rate": 4.94914583653995e-06, "loss": 0.5702, "step": 3694 }, { "epoch": 1.4603605828599655, "grad_norm": 0.47947522923316277, "learning_rate": 4.9491143918743845e-06, "loss": 0.5558, "step": 3695 }, { "epoch": 1.4607557421585577, "grad_norm": 0.4592839139494569, "learning_rate": 4.949082937590185e-06, "loss": 0.5702, "step": 3696 }, { "epoch": 1.46115090145715, "grad_norm": 0.45188516970792914, "learning_rate": 4.949051473687475e-06, "loss": 0.5528, "step": 3697 }, { "epoch": 1.4615460607557422, "grad_norm": 2.2730337039449893, "learning_rate": 4.949020000166378e-06, "loss": 0.5541, "step": 3698 }, { "epoch": 1.4619412200543345, "grad_norm": 0.4339995210953989, "learning_rate": 4.948988517027017e-06, "loss": 0.5729, "step": 3699 }, { "epoch": 1.4623363793529267, "grad_norm": 0.4500818289037516, "learning_rate": 4.948957024269516e-06, "loss": 0.5613, "step": 3700 }, { "epoch": 1.462731538651519, "grad_norm": 0.44742467459872376, "learning_rate": 4.948925521894e-06, "loss": 0.5351, "step": 3701 }, { "epoch": 1.4631266979501112, "grad_norm": 0.4499910805435301, "learning_rate": 4.948894009900591e-06, "loss": 0.5574, "step": 3702 }, { "epoch": 1.4635218572487034, "grad_norm": 0.4357059717410755, "learning_rate": 4.948862488289413e-06, "loss": 0.552, "step": 3703 }, { "epoch": 1.4639170165472957, "grad_norm": 0.6496169252590506, "learning_rate": 4.948830957060591e-06, "loss": 0.5701, "step": 3704 }, { "epoch": 1.464312175845888, "grad_norm": 0.6005901899539847, "learning_rate": 4.948799416214247e-06, "loss": 0.5604, "step": 3705 }, { "epoch": 1.4647073351444801, "grad_norm": 0.45141656213054204, "learning_rate": 4.9487678657505065e-06, "loss": 0.573, "step": 3706 }, { "epoch": 1.4651024944430724, "grad_norm": 0.46166246230338953, "learning_rate": 4.948736305669494e-06, "loss": 0.5826, "step": 3707 }, { "epoch": 1.4654976537416646, "grad_norm": 0.4485874822386171, "learning_rate": 4.9487047359713304e-06, "loss": 0.5683, "step": 3708 }, { "epoch": 1.4658928130402569, "grad_norm": 0.4504494694062807, "learning_rate": 4.9486731566561416e-06, "loss": 0.5597, "step": 3709 }, { "epoch": 1.466287972338849, "grad_norm": 0.46237624966282265, "learning_rate": 4.948641567724053e-06, "loss": 0.5587, "step": 3710 }, { "epoch": 1.4666831316374414, "grad_norm": 0.4412338330627608, "learning_rate": 4.948609969175186e-06, "loss": 0.58, "step": 3711 }, { "epoch": 1.4670782909360336, "grad_norm": 0.46261210643628975, "learning_rate": 4.9485783610096664e-06, "loss": 0.5919, "step": 3712 }, { "epoch": 1.4674734502346258, "grad_norm": 0.4570667260557455, "learning_rate": 4.948546743227617e-06, "loss": 0.5623, "step": 3713 }, { "epoch": 1.467868609533218, "grad_norm": 0.43799780214279865, "learning_rate": 4.948515115829164e-06, "loss": 0.5526, "step": 3714 }, { "epoch": 1.4682637688318103, "grad_norm": 0.4867222574376945, "learning_rate": 4.9484834788144295e-06, "loss": 0.5734, "step": 3715 }, { "epoch": 1.4686589281304026, "grad_norm": 0.46745491220808105, "learning_rate": 4.948451832183539e-06, "loss": 0.5864, "step": 3716 }, { "epoch": 1.4690540874289948, "grad_norm": 0.45356535226498157, "learning_rate": 4.948420175936618e-06, "loss": 0.563, "step": 3717 }, { "epoch": 1.469449246727587, "grad_norm": 0.46464161309085605, "learning_rate": 4.9483885100737875e-06, "loss": 0.5494, "step": 3718 }, { "epoch": 1.4698444060261793, "grad_norm": 0.4296514326477871, "learning_rate": 4.9483568345951735e-06, "loss": 0.561, "step": 3719 }, { "epoch": 1.4702395653247715, "grad_norm": 0.4449958968332145, "learning_rate": 4.948325149500902e-06, "loss": 0.5767, "step": 3720 }, { "epoch": 1.4706347246233638, "grad_norm": 0.44304606813408115, "learning_rate": 4.948293454791095e-06, "loss": 0.5768, "step": 3721 }, { "epoch": 1.471029883921956, "grad_norm": 0.4438521427352953, "learning_rate": 4.948261750465878e-06, "loss": 0.5467, "step": 3722 }, { "epoch": 1.4714250432205482, "grad_norm": 0.4377012072556724, "learning_rate": 4.948230036525375e-06, "loss": 0.5613, "step": 3723 }, { "epoch": 1.4718202025191405, "grad_norm": 0.43727189711397746, "learning_rate": 4.948198312969712e-06, "loss": 0.5428, "step": 3724 }, { "epoch": 1.4722153618177327, "grad_norm": 0.48964762021273595, "learning_rate": 4.948166579799013e-06, "loss": 0.5568, "step": 3725 }, { "epoch": 1.472610521116325, "grad_norm": 0.5323132325510923, "learning_rate": 4.948134837013402e-06, "loss": 0.5578, "step": 3726 }, { "epoch": 1.4730056804149172, "grad_norm": 0.4398364066065741, "learning_rate": 4.948103084613003e-06, "loss": 0.5609, "step": 3727 }, { "epoch": 1.4734008397135094, "grad_norm": 0.4310398347472198, "learning_rate": 4.948071322597943e-06, "loss": 0.5651, "step": 3728 }, { "epoch": 1.4737959990121017, "grad_norm": 0.539562325199801, "learning_rate": 4.948039550968345e-06, "loss": 0.5635, "step": 3729 }, { "epoch": 1.474191158310694, "grad_norm": 0.4463656077676281, "learning_rate": 4.948007769724333e-06, "loss": 0.55, "step": 3730 }, { "epoch": 1.4745863176092864, "grad_norm": 0.44864795312044087, "learning_rate": 4.947975978866034e-06, "loss": 0.5385, "step": 3731 }, { "epoch": 1.4749814769078786, "grad_norm": 3.1516964419751026, "learning_rate": 4.947944178393572e-06, "loss": 0.582, "step": 3732 }, { "epoch": 1.4753766362064709, "grad_norm": 0.46934564073210333, "learning_rate": 4.947912368307071e-06, "loss": 0.555, "step": 3733 }, { "epoch": 1.475771795505063, "grad_norm": 0.46254224035417973, "learning_rate": 4.9478805486066575e-06, "loss": 0.5879, "step": 3734 }, { "epoch": 1.4761669548036553, "grad_norm": 0.4469674187179735, "learning_rate": 4.947848719292455e-06, "loss": 0.554, "step": 3735 }, { "epoch": 1.4765621141022476, "grad_norm": 0.45010349935865934, "learning_rate": 4.947816880364589e-06, "loss": 0.5465, "step": 3736 }, { "epoch": 1.4769572734008398, "grad_norm": 0.4671526430772705, "learning_rate": 4.9477850318231855e-06, "loss": 0.5768, "step": 3737 }, { "epoch": 1.477352432699432, "grad_norm": 0.44222712097266215, "learning_rate": 4.947753173668368e-06, "loss": 0.5647, "step": 3738 }, { "epoch": 1.4777475919980243, "grad_norm": 0.4346426270801945, "learning_rate": 4.947721305900263e-06, "loss": 0.5883, "step": 3739 }, { "epoch": 1.4781427512966165, "grad_norm": 0.4428949633974912, "learning_rate": 4.947689428518994e-06, "loss": 0.5585, "step": 3740 }, { "epoch": 1.4785379105952088, "grad_norm": 0.45290050689849504, "learning_rate": 4.947657541524689e-06, "loss": 0.5698, "step": 3741 }, { "epoch": 1.478933069893801, "grad_norm": 0.46047272522893107, "learning_rate": 4.947625644917471e-06, "loss": 0.5853, "step": 3742 }, { "epoch": 1.4793282291923933, "grad_norm": 0.436358562052887, "learning_rate": 4.9475937386974645e-06, "loss": 0.5556, "step": 3743 }, { "epoch": 1.4797233884909855, "grad_norm": 0.44605536186917205, "learning_rate": 4.947561822864797e-06, "loss": 0.5691, "step": 3744 }, { "epoch": 1.4801185477895777, "grad_norm": 0.45340872193909043, "learning_rate": 4.947529897419593e-06, "loss": 0.5595, "step": 3745 }, { "epoch": 1.48051370708817, "grad_norm": 0.44732431526653443, "learning_rate": 4.947497962361977e-06, "loss": 0.5469, "step": 3746 }, { "epoch": 1.4809088663867622, "grad_norm": 0.43950554191375485, "learning_rate": 4.947466017692075e-06, "loss": 0.5493, "step": 3747 }, { "epoch": 1.4813040256853545, "grad_norm": 0.451698847161343, "learning_rate": 4.947434063410014e-06, "loss": 0.5763, "step": 3748 }, { "epoch": 1.4816991849839467, "grad_norm": 0.4383967173667518, "learning_rate": 4.947402099515918e-06, "loss": 0.5574, "step": 3749 }, { "epoch": 1.482094344282539, "grad_norm": 0.46921309981868, "learning_rate": 4.947370126009912e-06, "loss": 0.5698, "step": 3750 }, { "epoch": 1.4824895035811312, "grad_norm": 0.4576340583848198, "learning_rate": 4.947338142892123e-06, "loss": 0.5555, "step": 3751 }, { "epoch": 1.4828846628797234, "grad_norm": 0.45431167387415017, "learning_rate": 4.947306150162675e-06, "loss": 0.5584, "step": 3752 }, { "epoch": 1.4832798221783157, "grad_norm": 0.44151975322514186, "learning_rate": 4.947274147821694e-06, "loss": 0.5395, "step": 3753 }, { "epoch": 1.483674981476908, "grad_norm": 0.4508179501579237, "learning_rate": 4.947242135869308e-06, "loss": 0.5629, "step": 3754 }, { "epoch": 1.4840701407755001, "grad_norm": 0.44449630571041165, "learning_rate": 4.94721011430564e-06, "loss": 0.5543, "step": 3755 }, { "epoch": 1.4844653000740924, "grad_norm": 0.44088055023127704, "learning_rate": 4.947178083130817e-06, "loss": 0.5525, "step": 3756 }, { "epoch": 1.4848604593726846, "grad_norm": 0.4349780866471166, "learning_rate": 4.947146042344964e-06, "loss": 0.5431, "step": 3757 }, { "epoch": 1.4852556186712769, "grad_norm": 0.4690057952385603, "learning_rate": 4.947113991948207e-06, "loss": 0.609, "step": 3758 }, { "epoch": 1.485650777969869, "grad_norm": 0.4570722839955001, "learning_rate": 4.947081931940673e-06, "loss": 0.5589, "step": 3759 }, { "epoch": 1.4860459372684613, "grad_norm": 0.44584022143653507, "learning_rate": 4.9470498623224875e-06, "loss": 0.5509, "step": 3760 }, { "epoch": 1.4864410965670536, "grad_norm": 0.44483253459798316, "learning_rate": 4.947017783093775e-06, "loss": 0.5671, "step": 3761 }, { "epoch": 1.4868362558656458, "grad_norm": 0.4637492181059857, "learning_rate": 4.946985694254662e-06, "loss": 0.5649, "step": 3762 }, { "epoch": 1.487231415164238, "grad_norm": 0.4443774539885124, "learning_rate": 4.946953595805277e-06, "loss": 0.5664, "step": 3763 }, { "epoch": 1.4876265744628303, "grad_norm": 0.4371331634851273, "learning_rate": 4.946921487745743e-06, "loss": 0.5585, "step": 3764 }, { "epoch": 1.4880217337614225, "grad_norm": 0.44138169702671043, "learning_rate": 4.9468893700761874e-06, "loss": 0.5576, "step": 3765 }, { "epoch": 1.4884168930600148, "grad_norm": 0.4525359288431999, "learning_rate": 4.946857242796737e-06, "loss": 0.5688, "step": 3766 }, { "epoch": 1.488812052358607, "grad_norm": 0.4488955169238718, "learning_rate": 4.946825105907516e-06, "loss": 0.5708, "step": 3767 }, { "epoch": 1.4892072116571993, "grad_norm": 0.453722892192494, "learning_rate": 4.946792959408652e-06, "loss": 0.574, "step": 3768 }, { "epoch": 1.4896023709557915, "grad_norm": 0.4438539782138186, "learning_rate": 4.9467608033002715e-06, "loss": 0.5736, "step": 3769 }, { "epoch": 1.4899975302543838, "grad_norm": 0.4734446349651656, "learning_rate": 4.9467286375824995e-06, "loss": 0.5587, "step": 3770 }, { "epoch": 1.490392689552976, "grad_norm": 0.45483078234475843, "learning_rate": 4.946696462255464e-06, "loss": 0.5843, "step": 3771 }, { "epoch": 1.4907878488515682, "grad_norm": 0.43491380893057063, "learning_rate": 4.94666427731929e-06, "loss": 0.5568, "step": 3772 }, { "epoch": 1.4911830081501605, "grad_norm": 0.4593799609237185, "learning_rate": 4.946632082774105e-06, "loss": 0.568, "step": 3773 }, { "epoch": 1.4915781674487527, "grad_norm": 0.4756685082546809, "learning_rate": 4.946599878620034e-06, "loss": 0.5525, "step": 3774 }, { "epoch": 1.491973326747345, "grad_norm": 0.4375243311510868, "learning_rate": 4.946567664857205e-06, "loss": 0.5688, "step": 3775 }, { "epoch": 1.4923684860459372, "grad_norm": 0.44023853517164957, "learning_rate": 4.946535441485744e-06, "loss": 0.5461, "step": 3776 }, { "epoch": 1.4927636453445294, "grad_norm": 0.4417756181030543, "learning_rate": 4.946503208505776e-06, "loss": 0.5483, "step": 3777 }, { "epoch": 1.4931588046431217, "grad_norm": 0.4459801582543887, "learning_rate": 4.94647096591743e-06, "loss": 0.5494, "step": 3778 }, { "epoch": 1.493553963941714, "grad_norm": 0.44654308211555016, "learning_rate": 4.9464387137208326e-06, "loss": 0.5711, "step": 3779 }, { "epoch": 1.4939491232403062, "grad_norm": 0.44171711692198834, "learning_rate": 4.946406451916108e-06, "loss": 0.5607, "step": 3780 }, { "epoch": 1.4943442825388984, "grad_norm": 0.4390434383203796, "learning_rate": 4.946374180503385e-06, "loss": 0.5462, "step": 3781 }, { "epoch": 1.4947394418374906, "grad_norm": 0.4548707333166944, "learning_rate": 4.94634189948279e-06, "loss": 0.554, "step": 3782 }, { "epoch": 1.4951346011360829, "grad_norm": 0.4499388348643594, "learning_rate": 4.946309608854449e-06, "loss": 0.5705, "step": 3783 }, { "epoch": 1.4955297604346751, "grad_norm": 0.4451819872634511, "learning_rate": 4.94627730861849e-06, "loss": 0.5653, "step": 3784 }, { "epoch": 1.4959249197332674, "grad_norm": 0.4429157766074593, "learning_rate": 4.946244998775039e-06, "loss": 0.5591, "step": 3785 }, { "epoch": 1.4963200790318596, "grad_norm": 0.45149504764423787, "learning_rate": 4.946212679324222e-06, "loss": 0.5531, "step": 3786 }, { "epoch": 1.4967152383304518, "grad_norm": 0.45311480641045254, "learning_rate": 4.946180350266168e-06, "loss": 0.5738, "step": 3787 }, { "epoch": 1.497110397629044, "grad_norm": 0.5577932771269037, "learning_rate": 4.946148011601003e-06, "loss": 0.5918, "step": 3788 }, { "epoch": 1.4975055569276363, "grad_norm": 0.4442006706613438, "learning_rate": 4.9461156633288535e-06, "loss": 0.5573, "step": 3789 }, { "epoch": 1.4979007162262288, "grad_norm": 0.448842435666703, "learning_rate": 4.946083305449847e-06, "loss": 0.5694, "step": 3790 }, { "epoch": 1.498295875524821, "grad_norm": 0.47063189404977285, "learning_rate": 4.946050937964112e-06, "loss": 0.5663, "step": 3791 }, { "epoch": 1.4986910348234133, "grad_norm": 0.4222521912580929, "learning_rate": 4.946018560871772e-06, "loss": 0.5333, "step": 3792 }, { "epoch": 1.4990861941220055, "grad_norm": 0.708119310733299, "learning_rate": 4.945986174172958e-06, "loss": 0.5816, "step": 3793 }, { "epoch": 1.4994813534205977, "grad_norm": 0.44863172461117107, "learning_rate": 4.9459537778677955e-06, "loss": 0.5848, "step": 3794 }, { "epoch": 1.49987651271919, "grad_norm": 0.4411602360525524, "learning_rate": 4.945921371956411e-06, "loss": 0.5733, "step": 3795 }, { "epoch": 1.5002716720177822, "grad_norm": 0.433024812052623, "learning_rate": 4.945888956438933e-06, "loss": 0.5644, "step": 3796 }, { "epoch": 1.5006668313163745, "grad_norm": 0.5014185664609002, "learning_rate": 4.945856531315489e-06, "loss": 0.564, "step": 3797 }, { "epoch": 1.5010619906149667, "grad_norm": 0.46228942218632185, "learning_rate": 4.945824096586205e-06, "loss": 0.569, "step": 3798 }, { "epoch": 1.501457149913559, "grad_norm": 0.4623049423084713, "learning_rate": 4.94579165225121e-06, "loss": 0.5614, "step": 3799 }, { "epoch": 1.5018523092121512, "grad_norm": 0.4294050033616448, "learning_rate": 4.945759198310629e-06, "loss": 0.5661, "step": 3800 }, { "epoch": 1.5022474685107434, "grad_norm": 0.4383701048448765, "learning_rate": 4.945726734764592e-06, "loss": 0.5541, "step": 3801 }, { "epoch": 1.5026426278093357, "grad_norm": 0.43965998205136625, "learning_rate": 4.945694261613225e-06, "loss": 0.5503, "step": 3802 }, { "epoch": 1.503037787107928, "grad_norm": 0.43862817298801543, "learning_rate": 4.945661778856658e-06, "loss": 0.5793, "step": 3803 }, { "epoch": 1.5034329464065201, "grad_norm": 0.45968905394437315, "learning_rate": 4.945629286495014e-06, "loss": 0.5836, "step": 3804 }, { "epoch": 1.5038281057051124, "grad_norm": 0.43146289605158555, "learning_rate": 4.945596784528425e-06, "loss": 0.5553, "step": 3805 }, { "epoch": 1.5042232650037046, "grad_norm": 0.4227325494043722, "learning_rate": 4.945564272957016e-06, "loss": 0.5467, "step": 3806 }, { "epoch": 1.5046184243022969, "grad_norm": 0.45665440959944387, "learning_rate": 4.945531751780915e-06, "loss": 0.5825, "step": 3807 }, { "epoch": 1.505013583600889, "grad_norm": 0.4370355896617275, "learning_rate": 4.9454992210002515e-06, "loss": 0.5757, "step": 3808 }, { "epoch": 1.5054087428994813, "grad_norm": 0.44183906572444237, "learning_rate": 4.9454666806151515e-06, "loss": 0.5619, "step": 3809 }, { "epoch": 1.5058039021980736, "grad_norm": 0.4214971459805865, "learning_rate": 4.945434130625744e-06, "loss": 0.5638, "step": 3810 }, { "epoch": 1.5061990614966658, "grad_norm": 0.43677389141426, "learning_rate": 4.945401571032156e-06, "loss": 0.5626, "step": 3811 }, { "epoch": 1.506594220795258, "grad_norm": 0.43865734507834114, "learning_rate": 4.9453690018345144e-06, "loss": 0.5353, "step": 3812 }, { "epoch": 1.5069893800938503, "grad_norm": 0.5342126887725891, "learning_rate": 4.945336423032949e-06, "loss": 0.5695, "step": 3813 }, { "epoch": 1.5073845393924425, "grad_norm": 0.4349734124143589, "learning_rate": 4.945303834627587e-06, "loss": 0.5418, "step": 3814 }, { "epoch": 1.5077796986910348, "grad_norm": 0.4385647900898778, "learning_rate": 4.945271236618557e-06, "loss": 0.5658, "step": 3815 }, { "epoch": 1.508174857989627, "grad_norm": 0.42971941105827866, "learning_rate": 4.945238629005986e-06, "loss": 0.5727, "step": 3816 }, { "epoch": 1.5085700172882193, "grad_norm": 0.4454445834803687, "learning_rate": 4.945206011790002e-06, "loss": 0.582, "step": 3817 }, { "epoch": 1.5089651765868115, "grad_norm": 0.4534020504734814, "learning_rate": 4.945173384970734e-06, "loss": 0.5659, "step": 3818 }, { "epoch": 1.509360335885404, "grad_norm": 0.4615252795337526, "learning_rate": 4.945140748548309e-06, "loss": 0.5762, "step": 3819 }, { "epoch": 1.5097554951839962, "grad_norm": 0.43867764806826415, "learning_rate": 4.945108102522858e-06, "loss": 0.5667, "step": 3820 }, { "epoch": 1.5101506544825885, "grad_norm": 0.43752103633853423, "learning_rate": 4.945075446894505e-06, "loss": 0.5648, "step": 3821 }, { "epoch": 1.5105458137811807, "grad_norm": 0.48724137180843774, "learning_rate": 4.945042781663381e-06, "loss": 0.5496, "step": 3822 }, { "epoch": 1.510940973079773, "grad_norm": 0.4386933082638948, "learning_rate": 4.945010106829614e-06, "loss": 0.5613, "step": 3823 }, { "epoch": 1.5113361323783652, "grad_norm": 0.5117191506598214, "learning_rate": 4.944977422393332e-06, "loss": 0.5537, "step": 3824 }, { "epoch": 1.5117312916769574, "grad_norm": 0.43364355513059377, "learning_rate": 4.944944728354663e-06, "loss": 0.5655, "step": 3825 }, { "epoch": 1.5121264509755497, "grad_norm": 0.4467784125266709, "learning_rate": 4.9449120247137365e-06, "loss": 0.5664, "step": 3826 }, { "epoch": 1.512521610274142, "grad_norm": 0.5300397203257904, "learning_rate": 4.944879311470679e-06, "loss": 0.5769, "step": 3827 }, { "epoch": 1.5129167695727341, "grad_norm": 0.4257934452475102, "learning_rate": 4.944846588625621e-06, "loss": 0.5436, "step": 3828 }, { "epoch": 1.5133119288713264, "grad_norm": 0.4455795318956715, "learning_rate": 4.94481385617869e-06, "loss": 0.5385, "step": 3829 }, { "epoch": 1.5137070881699186, "grad_norm": 0.45355241250172695, "learning_rate": 4.944781114130015e-06, "loss": 0.5721, "step": 3830 }, { "epoch": 1.5141022474685109, "grad_norm": 0.44014963072857977, "learning_rate": 4.944748362479723e-06, "loss": 0.562, "step": 3831 }, { "epoch": 1.514497406767103, "grad_norm": 0.42555153523264766, "learning_rate": 4.9447156012279455e-06, "loss": 0.5624, "step": 3832 }, { "epoch": 1.5148925660656953, "grad_norm": 0.48013397168835886, "learning_rate": 4.944682830374809e-06, "loss": 0.5513, "step": 3833 }, { "epoch": 1.5152877253642876, "grad_norm": 0.45697256603832853, "learning_rate": 4.944650049920443e-06, "loss": 0.5629, "step": 3834 }, { "epoch": 1.5156828846628798, "grad_norm": 0.44411668965768414, "learning_rate": 4.944617259864976e-06, "loss": 0.5506, "step": 3835 }, { "epoch": 1.516078043961472, "grad_norm": 0.4416796017067383, "learning_rate": 4.944584460208537e-06, "loss": 0.5536, "step": 3836 }, { "epoch": 1.5164732032600643, "grad_norm": 0.4349355230140983, "learning_rate": 4.944551650951255e-06, "loss": 0.5599, "step": 3837 }, { "epoch": 1.5168683625586565, "grad_norm": 0.42051333332637836, "learning_rate": 4.944518832093258e-06, "loss": 0.549, "step": 3838 }, { "epoch": 1.5172635218572488, "grad_norm": 0.43470424455730705, "learning_rate": 4.944486003634675e-06, "loss": 0.5705, "step": 3839 }, { "epoch": 1.517658681155841, "grad_norm": 0.4484568492506127, "learning_rate": 4.944453165575635e-06, "loss": 0.5527, "step": 3840 }, { "epoch": 1.5180538404544333, "grad_norm": 0.43871369779576824, "learning_rate": 4.944420317916269e-06, "loss": 0.5572, "step": 3841 }, { "epoch": 1.5184489997530255, "grad_norm": 0.4267428186392819, "learning_rate": 4.944387460656703e-06, "loss": 0.5602, "step": 3842 }, { "epoch": 1.5188441590516177, "grad_norm": 0.43416355081762653, "learning_rate": 4.9443545937970686e-06, "loss": 0.567, "step": 3843 }, { "epoch": 1.51923931835021, "grad_norm": 0.49590515295094667, "learning_rate": 4.944321717337493e-06, "loss": 0.5598, "step": 3844 }, { "epoch": 1.5196344776488022, "grad_norm": 0.4626680777777951, "learning_rate": 4.9442888312781056e-06, "loss": 0.5583, "step": 3845 }, { "epoch": 1.5200296369473945, "grad_norm": 0.4449293851147631, "learning_rate": 4.944255935619036e-06, "loss": 0.562, "step": 3846 }, { "epoch": 1.5204247962459867, "grad_norm": 0.43950123990367407, "learning_rate": 4.944223030360414e-06, "loss": 0.5642, "step": 3847 }, { "epoch": 1.520819955544579, "grad_norm": 0.44149298105890766, "learning_rate": 4.9441901155023675e-06, "loss": 0.5796, "step": 3848 }, { "epoch": 1.5212151148431712, "grad_norm": 0.4482301026578816, "learning_rate": 4.944157191045027e-06, "loss": 0.5688, "step": 3849 }, { "epoch": 1.5216102741417634, "grad_norm": 0.5488343962637722, "learning_rate": 4.94412425698852e-06, "loss": 0.5546, "step": 3850 }, { "epoch": 1.5220054334403557, "grad_norm": 0.44100420516713945, "learning_rate": 4.944091313332978e-06, "loss": 0.5624, "step": 3851 }, { "epoch": 1.522400592738948, "grad_norm": 0.42706076298730056, "learning_rate": 4.94405836007853e-06, "loss": 0.5365, "step": 3852 }, { "epoch": 1.5227957520375401, "grad_norm": 0.4359259767312831, "learning_rate": 4.944025397225304e-06, "loss": 0.5411, "step": 3853 }, { "epoch": 1.5231909113361324, "grad_norm": 0.4388004674269903, "learning_rate": 4.943992424773431e-06, "loss": 0.5655, "step": 3854 }, { "epoch": 1.5235860706347246, "grad_norm": 0.4747569771286888, "learning_rate": 4.943959442723039e-06, "loss": 0.5583, "step": 3855 }, { "epoch": 1.5239812299333169, "grad_norm": 0.4673031379896023, "learning_rate": 4.943926451074258e-06, "loss": 0.5663, "step": 3856 }, { "epoch": 1.524376389231909, "grad_norm": 0.45989463226172245, "learning_rate": 4.943893449827219e-06, "loss": 0.554, "step": 3857 }, { "epoch": 1.5247715485305013, "grad_norm": 0.4488593649799981, "learning_rate": 4.94386043898205e-06, "loss": 0.5733, "step": 3858 }, { "epoch": 1.5251667078290936, "grad_norm": 0.4485345293642178, "learning_rate": 4.943827418538882e-06, "loss": 0.5603, "step": 3859 }, { "epoch": 1.5255618671276858, "grad_norm": 0.4535620010441292, "learning_rate": 4.943794388497842e-06, "loss": 0.5548, "step": 3860 }, { "epoch": 1.525957026426278, "grad_norm": 0.4735064699244355, "learning_rate": 4.943761348859063e-06, "loss": 0.5524, "step": 3861 }, { "epoch": 1.5263521857248703, "grad_norm": 0.4513665077962669, "learning_rate": 4.9437282996226734e-06, "loss": 0.5677, "step": 3862 }, { "epoch": 1.5267473450234625, "grad_norm": 0.4556372954566632, "learning_rate": 4.943695240788803e-06, "loss": 0.559, "step": 3863 }, { "epoch": 1.5271425043220548, "grad_norm": 0.4597655079944917, "learning_rate": 4.943662172357582e-06, "loss": 0.5529, "step": 3864 }, { "epoch": 1.527537663620647, "grad_norm": 0.45852506910423174, "learning_rate": 4.943629094329139e-06, "loss": 0.556, "step": 3865 }, { "epoch": 1.5279328229192393, "grad_norm": 0.4475865214143378, "learning_rate": 4.9435960067036045e-06, "loss": 0.5638, "step": 3866 }, { "epoch": 1.5283279822178315, "grad_norm": 0.44719416313388277, "learning_rate": 4.943562909481109e-06, "loss": 0.5652, "step": 3867 }, { "epoch": 1.5287231415164237, "grad_norm": 0.4288620192017668, "learning_rate": 4.943529802661783e-06, "loss": 0.5436, "step": 3868 }, { "epoch": 1.529118300815016, "grad_norm": 0.45692475861207277, "learning_rate": 4.943496686245754e-06, "loss": 0.5723, "step": 3869 }, { "epoch": 1.5295134601136082, "grad_norm": 0.43936779754731264, "learning_rate": 4.943463560233155e-06, "loss": 0.5631, "step": 3870 }, { "epoch": 1.5299086194122005, "grad_norm": 0.45634250616291433, "learning_rate": 4.943430424624115e-06, "loss": 0.5876, "step": 3871 }, { "epoch": 1.5303037787107927, "grad_norm": 0.46250472654805214, "learning_rate": 4.943397279418764e-06, "loss": 0.5704, "step": 3872 }, { "epoch": 1.530698938009385, "grad_norm": 0.4445910904786816, "learning_rate": 4.943364124617232e-06, "loss": 0.5762, "step": 3873 }, { "epoch": 1.5310940973079772, "grad_norm": 0.4492442500667238, "learning_rate": 4.9433309602196494e-06, "loss": 0.5812, "step": 3874 }, { "epoch": 1.5314892566065694, "grad_norm": 0.4375970310778139, "learning_rate": 4.943297786226147e-06, "loss": 0.5673, "step": 3875 }, { "epoch": 1.5318844159051617, "grad_norm": 0.43961087035282753, "learning_rate": 4.9432646026368535e-06, "loss": 0.5671, "step": 3876 }, { "epoch": 1.532279575203754, "grad_norm": 0.4415868685793641, "learning_rate": 4.943231409451901e-06, "loss": 0.5559, "step": 3877 }, { "epoch": 1.5326747345023461, "grad_norm": 0.43726516852584973, "learning_rate": 4.943198206671419e-06, "loss": 0.5684, "step": 3878 }, { "epoch": 1.5330698938009384, "grad_norm": 0.5804283001744497, "learning_rate": 4.943164994295538e-06, "loss": 0.5731, "step": 3879 }, { "epoch": 1.5334650530995306, "grad_norm": 0.45583485277725394, "learning_rate": 4.943131772324388e-06, "loss": 0.5572, "step": 3880 }, { "epoch": 1.5338602123981229, "grad_norm": 0.4399328351020496, "learning_rate": 4.9430985407581e-06, "loss": 0.5427, "step": 3881 }, { "epoch": 1.534255371696715, "grad_norm": 0.42834809808516533, "learning_rate": 4.943065299596806e-06, "loss": 0.5681, "step": 3882 }, { "epoch": 1.5346505309953073, "grad_norm": 0.5826505399905085, "learning_rate": 4.943032048840633e-06, "loss": 0.5551, "step": 3883 }, { "epoch": 1.5350456902938996, "grad_norm": 0.4471917315958813, "learning_rate": 4.942998788489715e-06, "loss": 0.5762, "step": 3884 }, { "epoch": 1.5354408495924918, "grad_norm": 0.44668514989336283, "learning_rate": 4.94296551854418e-06, "loss": 0.5721, "step": 3885 }, { "epoch": 1.535836008891084, "grad_norm": 0.4631760060913264, "learning_rate": 4.942932239004161e-06, "loss": 0.5684, "step": 3886 }, { "epoch": 1.5362311681896763, "grad_norm": 0.45324498503225535, "learning_rate": 4.942898949869787e-06, "loss": 0.5749, "step": 3887 }, { "epoch": 1.5366263274882686, "grad_norm": 0.43838680281308623, "learning_rate": 4.942865651141189e-06, "loss": 0.5723, "step": 3888 }, { "epoch": 1.5370214867868608, "grad_norm": 0.44695717165338794, "learning_rate": 4.942832342818499e-06, "loss": 0.58, "step": 3889 }, { "epoch": 1.5374166460854533, "grad_norm": 0.42767213191242315, "learning_rate": 4.942799024901846e-06, "loss": 0.5508, "step": 3890 }, { "epoch": 1.5378118053840455, "grad_norm": 0.440495140617439, "learning_rate": 4.942765697391363e-06, "loss": 0.5735, "step": 3891 }, { "epoch": 1.5382069646826377, "grad_norm": 0.4607800617717655, "learning_rate": 4.942732360287179e-06, "loss": 0.5874, "step": 3892 }, { "epoch": 1.53860212398123, "grad_norm": 0.44999899303277263, "learning_rate": 4.942699013589425e-06, "loss": 0.5793, "step": 3893 }, { "epoch": 1.5389972832798222, "grad_norm": 0.4558760646566193, "learning_rate": 4.942665657298233e-06, "loss": 0.5714, "step": 3894 }, { "epoch": 1.5393924425784145, "grad_norm": 0.43776240719293746, "learning_rate": 4.9426322914137335e-06, "loss": 0.5676, "step": 3895 }, { "epoch": 1.5397876018770067, "grad_norm": 0.4437159939338884, "learning_rate": 4.942598915936058e-06, "loss": 0.5893, "step": 3896 }, { "epoch": 1.540182761175599, "grad_norm": 0.4374941926171507, "learning_rate": 4.942565530865337e-06, "loss": 0.555, "step": 3897 }, { "epoch": 1.5405779204741912, "grad_norm": 0.4208573300497547, "learning_rate": 4.942532136201702e-06, "loss": 0.5565, "step": 3898 }, { "epoch": 1.5409730797727834, "grad_norm": 0.4337013789375931, "learning_rate": 4.942498731945283e-06, "loss": 0.5616, "step": 3899 }, { "epoch": 1.5413682390713757, "grad_norm": 0.4527938862232506, "learning_rate": 4.942465318096213e-06, "loss": 0.556, "step": 3900 }, { "epoch": 1.541763398369968, "grad_norm": 0.4427938210330589, "learning_rate": 4.942431894654622e-06, "loss": 0.5716, "step": 3901 }, { "epoch": 1.5421585576685601, "grad_norm": 0.439043234576677, "learning_rate": 4.942398461620642e-06, "loss": 0.5627, "step": 3902 }, { "epoch": 1.5425537169671524, "grad_norm": 0.4556850317164177, "learning_rate": 4.942365018994404e-06, "loss": 0.564, "step": 3903 }, { "epoch": 1.5429488762657446, "grad_norm": 0.5109127133225726, "learning_rate": 4.942331566776039e-06, "loss": 0.5574, "step": 3904 }, { "epoch": 1.5433440355643369, "grad_norm": 0.4363940727408752, "learning_rate": 4.942298104965679e-06, "loss": 0.575, "step": 3905 }, { "epoch": 1.543739194862929, "grad_norm": 0.42938184413018377, "learning_rate": 4.942264633563455e-06, "loss": 0.5562, "step": 3906 }, { "epoch": 1.5441343541615213, "grad_norm": 0.44370460437623127, "learning_rate": 4.942231152569499e-06, "loss": 0.5753, "step": 3907 }, { "epoch": 1.5445295134601136, "grad_norm": 0.4367799811992645, "learning_rate": 4.94219766198394e-06, "loss": 0.5531, "step": 3908 }, { "epoch": 1.5449246727587058, "grad_norm": 0.436026557215798, "learning_rate": 4.942164161806914e-06, "loss": 0.5641, "step": 3909 }, { "epoch": 1.545319832057298, "grad_norm": 0.43686930660689016, "learning_rate": 4.94213065203855e-06, "loss": 0.5514, "step": 3910 }, { "epoch": 1.5457149913558903, "grad_norm": 0.459912181031538, "learning_rate": 4.942097132678978e-06, "loss": 0.5717, "step": 3911 }, { "epoch": 1.5461101506544825, "grad_norm": 0.432296005037305, "learning_rate": 4.942063603728332e-06, "loss": 0.5598, "step": 3912 }, { "epoch": 1.5465053099530748, "grad_norm": 0.44580566374151687, "learning_rate": 4.942030065186744e-06, "loss": 0.5649, "step": 3913 }, { "epoch": 1.5469004692516672, "grad_norm": 0.4362823236216351, "learning_rate": 4.941996517054344e-06, "loss": 0.5658, "step": 3914 }, { "epoch": 1.5472956285502595, "grad_norm": 0.42956539131805577, "learning_rate": 4.941962959331265e-06, "loss": 0.564, "step": 3915 }, { "epoch": 1.5476907878488517, "grad_norm": 0.445691619032847, "learning_rate": 4.941929392017637e-06, "loss": 0.5615, "step": 3916 }, { "epoch": 1.548085947147444, "grad_norm": 0.43714883646638897, "learning_rate": 4.9418958151135946e-06, "loss": 0.5746, "step": 3917 }, { "epoch": 1.5484811064460362, "grad_norm": 0.4574932956702348, "learning_rate": 4.941862228619267e-06, "loss": 0.5527, "step": 3918 }, { "epoch": 1.5488762657446284, "grad_norm": 0.4530888175518143, "learning_rate": 4.941828632534789e-06, "loss": 0.5569, "step": 3919 }, { "epoch": 1.5492714250432207, "grad_norm": 0.44308964244366394, "learning_rate": 4.94179502686029e-06, "loss": 0.5578, "step": 3920 }, { "epoch": 1.549666584341813, "grad_norm": 0.4395901315241771, "learning_rate": 4.941761411595903e-06, "loss": 0.5559, "step": 3921 }, { "epoch": 1.5500617436404052, "grad_norm": 0.4506494793506838, "learning_rate": 4.94172778674176e-06, "loss": 0.5751, "step": 3922 }, { "epoch": 1.5504569029389974, "grad_norm": 0.43273605402011445, "learning_rate": 4.9416941522979926e-06, "loss": 0.5476, "step": 3923 }, { "epoch": 1.5508520622375896, "grad_norm": 0.43415805767715054, "learning_rate": 4.9416605082647325e-06, "loss": 0.5693, "step": 3924 }, { "epoch": 1.5512472215361819, "grad_norm": 0.4434283494921018, "learning_rate": 4.941626854642114e-06, "loss": 0.546, "step": 3925 }, { "epoch": 1.5516423808347741, "grad_norm": 0.443351882260677, "learning_rate": 4.941593191430267e-06, "loss": 0.5511, "step": 3926 }, { "epoch": 1.5520375401333664, "grad_norm": 0.4371115950925369, "learning_rate": 4.941559518629325e-06, "loss": 0.5655, "step": 3927 }, { "epoch": 1.5524326994319586, "grad_norm": 0.4500093849359018, "learning_rate": 4.94152583623942e-06, "loss": 0.5556, "step": 3928 }, { "epoch": 1.5528278587305508, "grad_norm": 0.4353898954337321, "learning_rate": 4.941492144260683e-06, "loss": 0.5571, "step": 3929 }, { "epoch": 1.553223018029143, "grad_norm": 0.45540296825899435, "learning_rate": 4.941458442693249e-06, "loss": 0.5649, "step": 3930 }, { "epoch": 1.5536181773277353, "grad_norm": 0.43590100396307097, "learning_rate": 4.9414247315372474e-06, "loss": 0.5592, "step": 3931 }, { "epoch": 1.5540133366263276, "grad_norm": 0.4273362760763734, "learning_rate": 4.9413910107928136e-06, "loss": 0.5559, "step": 3932 }, { "epoch": 1.5544084959249198, "grad_norm": 0.43873328194495376, "learning_rate": 4.941357280460076e-06, "loss": 0.5707, "step": 3933 }, { "epoch": 1.554803655223512, "grad_norm": 0.44105964043710244, "learning_rate": 4.941323540539171e-06, "loss": 0.5683, "step": 3934 }, { "epoch": 1.5551988145221043, "grad_norm": 0.436934179465898, "learning_rate": 4.941289791030229e-06, "loss": 0.5757, "step": 3935 }, { "epoch": 1.5555939738206965, "grad_norm": 0.45737730913945995, "learning_rate": 4.9412560319333844e-06, "loss": 0.5711, "step": 3936 }, { "epoch": 1.5559891331192888, "grad_norm": 0.42468924058156193, "learning_rate": 4.941222263248767e-06, "loss": 0.5478, "step": 3937 }, { "epoch": 1.556384292417881, "grad_norm": 0.43195983842147717, "learning_rate": 4.941188484976512e-06, "loss": 0.5659, "step": 3938 }, { "epoch": 1.5567794517164733, "grad_norm": 0.44375257055854056, "learning_rate": 4.9411546971167505e-06, "loss": 0.5867, "step": 3939 }, { "epoch": 1.5571746110150655, "grad_norm": 0.43710025950059056, "learning_rate": 4.941120899669616e-06, "loss": 0.5735, "step": 3940 }, { "epoch": 1.5575697703136577, "grad_norm": 0.4474954937975031, "learning_rate": 4.94108709263524e-06, "loss": 0.602, "step": 3941 }, { "epoch": 1.55796492961225, "grad_norm": 0.4305607613140482, "learning_rate": 4.941053276013758e-06, "loss": 0.5548, "step": 3942 }, { "epoch": 1.5583600889108422, "grad_norm": 0.4398028265526488, "learning_rate": 4.941019449805299e-06, "loss": 0.5495, "step": 3943 }, { "epoch": 1.5587552482094345, "grad_norm": 0.427010931690335, "learning_rate": 4.940985614009999e-06, "loss": 0.561, "step": 3944 }, { "epoch": 1.5591504075080267, "grad_norm": 0.43014008416807015, "learning_rate": 4.9409517686279895e-06, "loss": 0.5796, "step": 3945 }, { "epoch": 1.559545566806619, "grad_norm": 0.4473068429218742, "learning_rate": 4.940917913659404e-06, "loss": 0.5594, "step": 3946 }, { "epoch": 1.5599407261052112, "grad_norm": 0.44348060278334916, "learning_rate": 4.940884049104374e-06, "loss": 0.5603, "step": 3947 }, { "epoch": 1.5603358854038034, "grad_norm": 0.4254990006538986, "learning_rate": 4.940850174963035e-06, "loss": 0.5496, "step": 3948 }, { "epoch": 1.5607310447023957, "grad_norm": 0.4599229925834786, "learning_rate": 4.9408162912355185e-06, "loss": 0.569, "step": 3949 }, { "epoch": 1.561126204000988, "grad_norm": 0.4608183190992476, "learning_rate": 4.940782397921957e-06, "loss": 0.5475, "step": 3950 }, { "epoch": 1.5615213632995801, "grad_norm": 0.4334203860785032, "learning_rate": 4.940748495022485e-06, "loss": 0.5617, "step": 3951 }, { "epoch": 1.5619165225981724, "grad_norm": 0.4286064511326075, "learning_rate": 4.940714582537235e-06, "loss": 0.5508, "step": 3952 }, { "epoch": 1.5623116818967646, "grad_norm": 0.4451771504215982, "learning_rate": 4.94068066046634e-06, "loss": 0.5679, "step": 3953 }, { "epoch": 1.5627068411953569, "grad_norm": 0.44950544874820486, "learning_rate": 4.940646728809933e-06, "loss": 0.5571, "step": 3954 }, { "epoch": 1.563102000493949, "grad_norm": 0.43332984605668107, "learning_rate": 4.940612787568148e-06, "loss": 0.5412, "step": 3955 }, { "epoch": 1.5634971597925413, "grad_norm": 0.4537080751951872, "learning_rate": 4.940578836741119e-06, "loss": 0.5922, "step": 3956 }, { "epoch": 1.5638923190911336, "grad_norm": 0.45065137229775004, "learning_rate": 4.940544876328977e-06, "loss": 0.5761, "step": 3957 }, { "epoch": 1.5642874783897258, "grad_norm": 0.468459520127438, "learning_rate": 4.940510906331856e-06, "loss": 0.5766, "step": 3958 }, { "epoch": 1.564682637688318, "grad_norm": 0.4841953450723878, "learning_rate": 4.940476926749892e-06, "loss": 0.5509, "step": 3959 }, { "epoch": 1.5650777969869103, "grad_norm": 0.43421269689782305, "learning_rate": 4.940442937583216e-06, "loss": 0.5428, "step": 3960 }, { "epoch": 1.5654729562855025, "grad_norm": 0.44740726473549824, "learning_rate": 4.940408938831962e-06, "loss": 0.5543, "step": 3961 }, { "epoch": 1.5658681155840948, "grad_norm": 0.43972490377414003, "learning_rate": 4.9403749304962635e-06, "loss": 0.5711, "step": 3962 }, { "epoch": 1.566263274882687, "grad_norm": 0.42877178820545214, "learning_rate": 4.940340912576254e-06, "loss": 0.5674, "step": 3963 }, { "epoch": 1.5666584341812793, "grad_norm": 0.4180015219456009, "learning_rate": 4.940306885072067e-06, "loss": 0.5465, "step": 3964 }, { "epoch": 1.5670535934798715, "grad_norm": 0.44880071394905674, "learning_rate": 4.940272847983837e-06, "loss": 0.5638, "step": 3965 }, { "epoch": 1.5674487527784637, "grad_norm": 0.4470580270494271, "learning_rate": 4.9402388013116965e-06, "loss": 0.5577, "step": 3966 }, { "epoch": 1.567843912077056, "grad_norm": 0.46986742626918304, "learning_rate": 4.94020474505578e-06, "loss": 0.5754, "step": 3967 }, { "epoch": 1.5682390713756482, "grad_norm": 0.4719511276473468, "learning_rate": 4.940170679216222e-06, "loss": 0.5465, "step": 3968 }, { "epoch": 1.5686342306742405, "grad_norm": 0.4819185669692573, "learning_rate": 4.940136603793154e-06, "loss": 0.5903, "step": 3969 }, { "epoch": 1.5690293899728327, "grad_norm": 0.4596392737276118, "learning_rate": 4.940102518786711e-06, "loss": 0.5827, "step": 3970 }, { "epoch": 1.569424549271425, "grad_norm": 0.4350274926677015, "learning_rate": 4.9400684241970285e-06, "loss": 0.5732, "step": 3971 }, { "epoch": 1.5698197085700172, "grad_norm": 0.43547065854272937, "learning_rate": 4.940034320024237e-06, "loss": 0.5587, "step": 3972 }, { "epoch": 1.5702148678686094, "grad_norm": 0.44965772836569196, "learning_rate": 4.940000206268474e-06, "loss": 0.5757, "step": 3973 }, { "epoch": 1.5706100271672017, "grad_norm": 0.42960226035038346, "learning_rate": 4.939966082929872e-06, "loss": 0.5411, "step": 3974 }, { "epoch": 1.571005186465794, "grad_norm": 0.43654243807808146, "learning_rate": 4.939931950008563e-06, "loss": 0.5681, "step": 3975 }, { "epoch": 1.5714003457643861, "grad_norm": 0.46158348779701347, "learning_rate": 4.939897807504684e-06, "loss": 0.5637, "step": 3976 }, { "epoch": 1.5717955050629784, "grad_norm": 0.4506664040924429, "learning_rate": 4.939863655418368e-06, "loss": 0.5716, "step": 3977 }, { "epoch": 1.5721906643615706, "grad_norm": 0.43736830707484653, "learning_rate": 4.939829493749749e-06, "loss": 0.5634, "step": 3978 }, { "epoch": 1.5725858236601629, "grad_norm": 0.47956231415468115, "learning_rate": 4.939795322498961e-06, "loss": 0.5768, "step": 3979 }, { "epoch": 1.572980982958755, "grad_norm": 0.47009053670758705, "learning_rate": 4.93976114166614e-06, "loss": 0.5739, "step": 3980 }, { "epoch": 1.5733761422573473, "grad_norm": 0.4519666405286993, "learning_rate": 4.9397269512514175e-06, "loss": 0.5548, "step": 3981 }, { "epoch": 1.5737713015559396, "grad_norm": 0.46600888709363636, "learning_rate": 4.939692751254929e-06, "loss": 0.5409, "step": 3982 }, { "epoch": 1.5741664608545318, "grad_norm": 0.4362128507908923, "learning_rate": 4.939658541676809e-06, "loss": 0.557, "step": 3983 }, { "epoch": 1.574561620153124, "grad_norm": 0.45311061929248453, "learning_rate": 4.9396243225171916e-06, "loss": 0.5534, "step": 3984 }, { "epoch": 1.5749567794517165, "grad_norm": 0.4344985151811654, "learning_rate": 4.939590093776211e-06, "loss": 0.5646, "step": 3985 }, { "epoch": 1.5753519387503088, "grad_norm": 0.4480209913301821, "learning_rate": 4.939555855454003e-06, "loss": 0.5561, "step": 3986 }, { "epoch": 1.575747098048901, "grad_norm": 0.4531075976767362, "learning_rate": 4.9395216075507e-06, "loss": 0.565, "step": 3987 }, { "epoch": 1.5761422573474932, "grad_norm": 0.4291612629531522, "learning_rate": 4.939487350066438e-06, "loss": 0.5561, "step": 3988 }, { "epoch": 1.5765374166460855, "grad_norm": 0.4411400070808928, "learning_rate": 4.93945308300135e-06, "loss": 0.5722, "step": 3989 }, { "epoch": 1.5769325759446777, "grad_norm": 0.4411690987818465, "learning_rate": 4.939418806355573e-06, "loss": 0.5744, "step": 3990 }, { "epoch": 1.57732773524327, "grad_norm": 0.4234846841720286, "learning_rate": 4.939384520129239e-06, "loss": 0.5242, "step": 3991 }, { "epoch": 1.5777228945418622, "grad_norm": 0.43794297369469665, "learning_rate": 4.9393502243224844e-06, "loss": 0.5702, "step": 3992 }, { "epoch": 1.5781180538404544, "grad_norm": 0.4619795012466095, "learning_rate": 4.9393159189354435e-06, "loss": 0.5602, "step": 3993 }, { "epoch": 1.5785132131390467, "grad_norm": 0.4248706656017066, "learning_rate": 4.9392816039682516e-06, "loss": 0.5593, "step": 3994 }, { "epoch": 1.578908372437639, "grad_norm": 0.4581079886861394, "learning_rate": 4.939247279421041e-06, "loss": 0.5602, "step": 3995 }, { "epoch": 1.5793035317362312, "grad_norm": 0.4305195033029715, "learning_rate": 4.93921294529395e-06, "loss": 0.5596, "step": 3996 }, { "epoch": 1.5796986910348234, "grad_norm": 0.4366623407663496, "learning_rate": 4.9391786015871106e-06, "loss": 0.5597, "step": 3997 }, { "epoch": 1.5800938503334157, "grad_norm": 0.44069624259394685, "learning_rate": 4.939144248300659e-06, "loss": 0.5897, "step": 3998 }, { "epoch": 1.580489009632008, "grad_norm": 0.43890619357978694, "learning_rate": 4.939109885434731e-06, "loss": 0.569, "step": 3999 }, { "epoch": 1.5808841689306001, "grad_norm": 0.4364576916674103, "learning_rate": 4.939075512989459e-06, "loss": 0.5787, "step": 4000 }, { "epoch": 1.5812793282291924, "grad_norm": 0.45319416604702467, "learning_rate": 4.93904113096498e-06, "loss": 0.5832, "step": 4001 }, { "epoch": 1.5816744875277846, "grad_norm": 0.4408144136335299, "learning_rate": 4.939006739361429e-06, "loss": 0.5757, "step": 4002 }, { "epoch": 1.5820696468263769, "grad_norm": 0.43847351893891434, "learning_rate": 4.93897233817894e-06, "loss": 0.57, "step": 4003 }, { "epoch": 1.582464806124969, "grad_norm": 0.4272786242930415, "learning_rate": 4.9389379274176485e-06, "loss": 0.561, "step": 4004 }, { "epoch": 1.5828599654235613, "grad_norm": 0.4273445360462422, "learning_rate": 4.93890350707769e-06, "loss": 0.5543, "step": 4005 }, { "epoch": 1.5832551247221536, "grad_norm": 0.43334855462016986, "learning_rate": 4.9388690771592e-06, "loss": 0.5632, "step": 4006 }, { "epoch": 1.5836502840207458, "grad_norm": 0.4412911003838902, "learning_rate": 4.938834637662313e-06, "loss": 0.5717, "step": 4007 }, { "epoch": 1.584045443319338, "grad_norm": 0.4384873516991556, "learning_rate": 4.9388001885871635e-06, "loss": 0.5569, "step": 4008 }, { "epoch": 1.5844406026179305, "grad_norm": 0.4357385688585848, "learning_rate": 4.938765729933889e-06, "loss": 0.5656, "step": 4009 }, { "epoch": 1.5848357619165228, "grad_norm": 0.43820335981947794, "learning_rate": 4.938731261702624e-06, "loss": 0.5726, "step": 4010 }, { "epoch": 1.585230921215115, "grad_norm": 0.4369840237848306, "learning_rate": 4.938696783893502e-06, "loss": 0.5543, "step": 4011 }, { "epoch": 1.5856260805137072, "grad_norm": 0.4472393204243838, "learning_rate": 4.938662296506661e-06, "loss": 0.5565, "step": 4012 }, { "epoch": 1.5860212398122995, "grad_norm": 0.4615011233799181, "learning_rate": 4.938627799542235e-06, "loss": 0.5587, "step": 4013 }, { "epoch": 1.5864163991108917, "grad_norm": 0.42914755332494525, "learning_rate": 4.93859329300036e-06, "loss": 0.5551, "step": 4014 }, { "epoch": 1.586811558409484, "grad_norm": 0.6994500397802149, "learning_rate": 4.93855877688117e-06, "loss": 0.5641, "step": 4015 }, { "epoch": 1.5872067177080762, "grad_norm": 0.4539471102905313, "learning_rate": 4.938524251184803e-06, "loss": 0.5772, "step": 4016 }, { "epoch": 1.5876018770066684, "grad_norm": 0.4737130066010192, "learning_rate": 4.938489715911394e-06, "loss": 0.5753, "step": 4017 }, { "epoch": 1.5879970363052607, "grad_norm": 0.4459029139930523, "learning_rate": 4.938455171061077e-06, "loss": 0.5774, "step": 4018 }, { "epoch": 1.588392195603853, "grad_norm": 0.4434353397507251, "learning_rate": 4.93842061663399e-06, "loss": 0.5581, "step": 4019 }, { "epoch": 1.5887873549024452, "grad_norm": 0.43023249449874645, "learning_rate": 4.938386052630267e-06, "loss": 0.5429, "step": 4020 }, { "epoch": 1.5891825142010374, "grad_norm": 0.45310635393738163, "learning_rate": 4.938351479050044e-06, "loss": 0.5674, "step": 4021 }, { "epoch": 1.5895776734996296, "grad_norm": 0.45352464255321795, "learning_rate": 4.938316895893458e-06, "loss": 0.5661, "step": 4022 }, { "epoch": 1.5899728327982219, "grad_norm": 0.4532822555467358, "learning_rate": 4.938282303160643e-06, "loss": 0.5515, "step": 4023 }, { "epoch": 1.5903679920968141, "grad_norm": 0.44533687524094095, "learning_rate": 4.938247700851735e-06, "loss": 0.5511, "step": 4024 }, { "epoch": 1.5907631513954064, "grad_norm": 0.45327422167542913, "learning_rate": 4.938213088966872e-06, "loss": 0.5691, "step": 4025 }, { "epoch": 1.5911583106939986, "grad_norm": 0.43728559584246246, "learning_rate": 4.938178467506187e-06, "loss": 0.5565, "step": 4026 }, { "epoch": 1.5915534699925908, "grad_norm": 0.4393345012809165, "learning_rate": 4.938143836469818e-06, "loss": 0.5432, "step": 4027 }, { "epoch": 1.591948629291183, "grad_norm": 0.4446012831175294, "learning_rate": 4.938109195857902e-06, "loss": 0.5782, "step": 4028 }, { "epoch": 1.5923437885897753, "grad_norm": 0.4476104890005792, "learning_rate": 4.938074545670573e-06, "loss": 0.58, "step": 4029 }, { "epoch": 1.5927389478883676, "grad_norm": 0.4469302927676119, "learning_rate": 4.938039885907967e-06, "loss": 0.582, "step": 4030 }, { "epoch": 1.5931341071869598, "grad_norm": 0.4326506856607228, "learning_rate": 4.938005216570221e-06, "loss": 0.5587, "step": 4031 }, { "epoch": 1.593529266485552, "grad_norm": 0.45215823190175686, "learning_rate": 4.9379705376574705e-06, "loss": 0.5416, "step": 4032 }, { "epoch": 1.5939244257841443, "grad_norm": 0.4604923926371955, "learning_rate": 4.937935849169853e-06, "loss": 0.5814, "step": 4033 }, { "epoch": 1.5943195850827365, "grad_norm": 0.4483793169947381, "learning_rate": 4.937901151107504e-06, "loss": 0.5606, "step": 4034 }, { "epoch": 1.5947147443813288, "grad_norm": 0.4731607560798562, "learning_rate": 4.937866443470558e-06, "loss": 0.5616, "step": 4035 }, { "epoch": 1.595109903679921, "grad_norm": 0.44139056004958066, "learning_rate": 4.9378317262591545e-06, "loss": 0.5704, "step": 4036 }, { "epoch": 1.5955050629785132, "grad_norm": 0.43225652789170976, "learning_rate": 4.937796999473427e-06, "loss": 0.545, "step": 4037 }, { "epoch": 1.5959002222771055, "grad_norm": 0.4376950142264724, "learning_rate": 4.937762263113515e-06, "loss": 0.5607, "step": 4038 }, { "epoch": 1.5962953815756977, "grad_norm": 0.43266642151648355, "learning_rate": 4.937727517179552e-06, "loss": 0.5734, "step": 4039 }, { "epoch": 1.59669054087429, "grad_norm": 0.4491269605040788, "learning_rate": 4.9376927616716764e-06, "loss": 0.5593, "step": 4040 }, { "epoch": 1.5970857001728822, "grad_norm": 0.4339310588220914, "learning_rate": 4.937657996590023e-06, "loss": 0.5706, "step": 4041 }, { "epoch": 1.5974808594714744, "grad_norm": 0.4300915782723815, "learning_rate": 4.93762322193473e-06, "loss": 0.549, "step": 4042 }, { "epoch": 1.5978760187700667, "grad_norm": 0.4349429575023993, "learning_rate": 4.937588437705933e-06, "loss": 0.5673, "step": 4043 }, { "epoch": 1.598271178068659, "grad_norm": 0.43087812609014015, "learning_rate": 4.937553643903768e-06, "loss": 0.5514, "step": 4044 }, { "epoch": 1.5986663373672512, "grad_norm": 0.4497997607400426, "learning_rate": 4.937518840528373e-06, "loss": 0.5593, "step": 4045 }, { "epoch": 1.5990614966658434, "grad_norm": 0.44562506368324917, "learning_rate": 4.937484027579883e-06, "loss": 0.5587, "step": 4046 }, { "epoch": 1.5994566559644356, "grad_norm": 0.43652547554219967, "learning_rate": 4.937449205058438e-06, "loss": 0.58, "step": 4047 }, { "epoch": 1.5998518152630279, "grad_norm": 0.4240519663611341, "learning_rate": 4.937414372964171e-06, "loss": 0.5486, "step": 4048 }, { "epoch": 1.6002469745616201, "grad_norm": 0.45799730349541395, "learning_rate": 4.937379531297221e-06, "loss": 0.5567, "step": 4049 }, { "epoch": 1.6006421338602124, "grad_norm": 0.4292261145352039, "learning_rate": 4.937344680057724e-06, "loss": 0.5505, "step": 4050 }, { "epoch": 1.6010372931588046, "grad_norm": 0.42937598054316073, "learning_rate": 4.937309819245818e-06, "loss": 0.5601, "step": 4051 }, { "epoch": 1.6014324524573968, "grad_norm": 0.4380122077896125, "learning_rate": 4.937274948861638e-06, "loss": 0.5608, "step": 4052 }, { "epoch": 1.601827611755989, "grad_norm": 0.4548432558319798, "learning_rate": 4.937240068905322e-06, "loss": 0.5685, "step": 4053 }, { "epoch": 1.6022227710545813, "grad_norm": 0.43181199483513577, "learning_rate": 4.937205179377008e-06, "loss": 0.5646, "step": 4054 }, { "epoch": 1.6026179303531736, "grad_norm": 0.43979884745381725, "learning_rate": 4.937170280276831e-06, "loss": 0.582, "step": 4055 }, { "epoch": 1.6030130896517658, "grad_norm": 0.428767044050838, "learning_rate": 4.93713537160493e-06, "loss": 0.545, "step": 4056 }, { "epoch": 1.603408248950358, "grad_norm": 0.4444074071525543, "learning_rate": 4.9371004533614395e-06, "loss": 0.5357, "step": 4057 }, { "epoch": 1.6038034082489503, "grad_norm": 0.4305444155233475, "learning_rate": 4.9370655255465e-06, "loss": 0.5483, "step": 4058 }, { "epoch": 1.6041985675475425, "grad_norm": 0.44066258955092896, "learning_rate": 4.937030588160247e-06, "loss": 0.5631, "step": 4059 }, { "epoch": 1.6045937268461348, "grad_norm": 0.4487310628582567, "learning_rate": 4.936995641202816e-06, "loss": 0.5761, "step": 4060 }, { "epoch": 1.604988886144727, "grad_norm": 0.44499130482437343, "learning_rate": 4.936960684674348e-06, "loss": 0.5692, "step": 4061 }, { "epoch": 1.6053840454433193, "grad_norm": 0.4412506035853422, "learning_rate": 4.9369257185749766e-06, "loss": 0.5496, "step": 4062 }, { "epoch": 1.6057792047419115, "grad_norm": 0.4359722621598534, "learning_rate": 4.936890742904842e-06, "loss": 0.5493, "step": 4063 }, { "epoch": 1.6061743640405037, "grad_norm": 0.4340637645593551, "learning_rate": 4.936855757664079e-06, "loss": 0.5566, "step": 4064 }, { "epoch": 1.606569523339096, "grad_norm": 0.4289469251048877, "learning_rate": 4.936820762852827e-06, "loss": 0.5362, "step": 4065 }, { "epoch": 1.6069646826376882, "grad_norm": 0.4319109469211228, "learning_rate": 4.936785758471223e-06, "loss": 0.5601, "step": 4066 }, { "epoch": 1.6073598419362805, "grad_norm": 0.4476806397110603, "learning_rate": 4.936750744519404e-06, "loss": 0.5868, "step": 4067 }, { "epoch": 1.6077550012348727, "grad_norm": 0.4386152539571968, "learning_rate": 4.936715720997508e-06, "loss": 0.5643, "step": 4068 }, { "epoch": 1.608150160533465, "grad_norm": 0.4279438753027654, "learning_rate": 4.936680687905673e-06, "loss": 0.5575, "step": 4069 }, { "epoch": 1.6085453198320572, "grad_norm": 0.4326188532708726, "learning_rate": 4.936645645244034e-06, "loss": 0.5598, "step": 4070 }, { "epoch": 1.6089404791306494, "grad_norm": 0.44654932545961695, "learning_rate": 4.936610593012732e-06, "loss": 0.5678, "step": 4071 }, { "epoch": 1.6093356384292417, "grad_norm": 0.4350127525034427, "learning_rate": 4.936575531211902e-06, "loss": 0.552, "step": 4072 }, { "epoch": 1.609730797727834, "grad_norm": 0.4334061095799106, "learning_rate": 4.936540459841684e-06, "loss": 0.5631, "step": 4073 }, { "epoch": 1.6101259570264261, "grad_norm": 0.4380393010701646, "learning_rate": 4.9365053789022145e-06, "loss": 0.5505, "step": 4074 }, { "epoch": 1.6105211163250184, "grad_norm": 0.44462390814852865, "learning_rate": 4.936470288393631e-06, "loss": 0.548, "step": 4075 }, { "epoch": 1.6109162756236106, "grad_norm": 0.4391814847764043, "learning_rate": 4.936435188316071e-06, "loss": 0.5508, "step": 4076 }, { "epoch": 1.6113114349222029, "grad_norm": 0.45314650675562296, "learning_rate": 4.936400078669674e-06, "loss": 0.5763, "step": 4077 }, { "epoch": 1.611706594220795, "grad_norm": 0.42859513707939456, "learning_rate": 4.936364959454577e-06, "loss": 0.5534, "step": 4078 }, { "epoch": 1.6121017535193873, "grad_norm": 0.43651873348895415, "learning_rate": 4.936329830670918e-06, "loss": 0.5539, "step": 4079 }, { "epoch": 1.6124969128179798, "grad_norm": 0.4464140156441266, "learning_rate": 4.9362946923188345e-06, "loss": 0.5639, "step": 4080 }, { "epoch": 1.612892072116572, "grad_norm": 0.43474468854326337, "learning_rate": 4.936259544398465e-06, "loss": 0.567, "step": 4081 }, { "epoch": 1.6132872314151643, "grad_norm": 0.446752616186375, "learning_rate": 4.936224386909947e-06, "loss": 0.5701, "step": 4082 }, { "epoch": 1.6136823907137565, "grad_norm": 0.4281198845056971, "learning_rate": 4.93618921985342e-06, "loss": 0.5582, "step": 4083 }, { "epoch": 1.6140775500123488, "grad_norm": 0.4460784589294146, "learning_rate": 4.93615404322902e-06, "loss": 0.5565, "step": 4084 }, { "epoch": 1.614472709310941, "grad_norm": 0.4488735337458113, "learning_rate": 4.936118857036887e-06, "loss": 0.5498, "step": 4085 }, { "epoch": 1.6148678686095332, "grad_norm": 0.4364780045470906, "learning_rate": 4.936083661277158e-06, "loss": 0.5603, "step": 4086 }, { "epoch": 1.6152630279081255, "grad_norm": 0.4263849821332742, "learning_rate": 4.936048455949971e-06, "loss": 0.5407, "step": 4087 }, { "epoch": 1.6156581872067177, "grad_norm": 0.4400718397147201, "learning_rate": 4.936013241055465e-06, "loss": 0.5762, "step": 4088 }, { "epoch": 1.61605334650531, "grad_norm": 0.4308631720264388, "learning_rate": 4.935978016593779e-06, "loss": 0.566, "step": 4089 }, { "epoch": 1.6164485058039022, "grad_norm": 0.4299942853443749, "learning_rate": 4.935942782565051e-06, "loss": 0.5337, "step": 4090 }, { "epoch": 1.6168436651024944, "grad_norm": 0.4366671539606536, "learning_rate": 4.935907538969418e-06, "loss": 0.555, "step": 4091 }, { "epoch": 1.6172388244010867, "grad_norm": 0.48254923126585936, "learning_rate": 4.93587228580702e-06, "loss": 0.5595, "step": 4092 }, { "epoch": 1.617633983699679, "grad_norm": 0.4239126920096438, "learning_rate": 4.935837023077994e-06, "loss": 0.5517, "step": 4093 }, { "epoch": 1.6180291429982712, "grad_norm": 0.436879102852562, "learning_rate": 4.93580175078248e-06, "loss": 0.545, "step": 4094 }, { "epoch": 1.6184243022968634, "grad_norm": 0.44498901921393713, "learning_rate": 4.935766468920615e-06, "loss": 0.5639, "step": 4095 }, { "epoch": 1.6188194615954556, "grad_norm": 0.4208783410765207, "learning_rate": 4.935731177492539e-06, "loss": 0.5513, "step": 4096 }, { "epoch": 1.6192146208940479, "grad_norm": 0.4403934391341918, "learning_rate": 4.93569587649839e-06, "loss": 0.5597, "step": 4097 }, { "epoch": 1.6196097801926401, "grad_norm": 0.4445680674509993, "learning_rate": 4.935660565938306e-06, "loss": 0.5747, "step": 4098 }, { "epoch": 1.6200049394912324, "grad_norm": 0.4542236992519073, "learning_rate": 4.935625245812426e-06, "loss": 0.5557, "step": 4099 }, { "epoch": 1.6204000987898246, "grad_norm": 0.4448277099086342, "learning_rate": 4.935589916120891e-06, "loss": 0.5652, "step": 4100 }, { "epoch": 1.6207952580884168, "grad_norm": 0.4383946322249754, "learning_rate": 4.935554576863837e-06, "loss": 0.5433, "step": 4101 }, { "epoch": 1.621190417387009, "grad_norm": 0.43502119286992535, "learning_rate": 4.9355192280414024e-06, "loss": 0.5693, "step": 4102 }, { "epoch": 1.6215855766856015, "grad_norm": 0.4316383774413981, "learning_rate": 4.935483869653728e-06, "loss": 0.5677, "step": 4103 }, { "epoch": 1.6219807359841938, "grad_norm": 0.4341260683812164, "learning_rate": 4.935448501700953e-06, "loss": 0.5391, "step": 4104 }, { "epoch": 1.622375895282786, "grad_norm": 0.43195315752672536, "learning_rate": 4.935413124183212e-06, "loss": 0.5639, "step": 4105 }, { "epoch": 1.6227710545813783, "grad_norm": 0.43507822462287415, "learning_rate": 4.93537773710065e-06, "loss": 0.5576, "step": 4106 }, { "epoch": 1.6231662138799705, "grad_norm": 0.46638962293882175, "learning_rate": 4.935342340453402e-06, "loss": 0.5631, "step": 4107 }, { "epoch": 1.6235613731785628, "grad_norm": 0.4558364456192893, "learning_rate": 4.9353069342416085e-06, "loss": 0.5566, "step": 4108 }, { "epoch": 1.623956532477155, "grad_norm": 0.43312180311952486, "learning_rate": 4.935271518465408e-06, "loss": 0.56, "step": 4109 }, { "epoch": 1.6243516917757472, "grad_norm": 0.4365748990534713, "learning_rate": 4.93523609312494e-06, "loss": 0.5629, "step": 4110 }, { "epoch": 1.6247468510743395, "grad_norm": 0.4587870833960197, "learning_rate": 4.935200658220342e-06, "loss": 0.5741, "step": 4111 }, { "epoch": 1.6251420103729317, "grad_norm": 0.43339139678044175, "learning_rate": 4.935165213751757e-06, "loss": 0.5569, "step": 4112 }, { "epoch": 1.625537169671524, "grad_norm": 0.4428057531936081, "learning_rate": 4.93512975971932e-06, "loss": 0.5683, "step": 4113 }, { "epoch": 1.6259323289701162, "grad_norm": 0.4578729689342045, "learning_rate": 4.935094296123172e-06, "loss": 0.5501, "step": 4114 }, { "epoch": 1.6263274882687084, "grad_norm": 0.45476951551439365, "learning_rate": 4.935058822963454e-06, "loss": 0.5709, "step": 4115 }, { "epoch": 1.6267226475673007, "grad_norm": 0.45641536816268186, "learning_rate": 4.935023340240301e-06, "loss": 0.5564, "step": 4116 }, { "epoch": 1.627117806865893, "grad_norm": 0.4488224757255472, "learning_rate": 4.934987847953856e-06, "loss": 0.5619, "step": 4117 }, { "epoch": 1.6275129661644852, "grad_norm": 0.46880383951054816, "learning_rate": 4.934952346104258e-06, "loss": 0.5604, "step": 4118 }, { "epoch": 1.6279081254630774, "grad_norm": 0.545037272078235, "learning_rate": 4.9349168346916454e-06, "loss": 0.543, "step": 4119 }, { "epoch": 1.6283032847616696, "grad_norm": 0.4366418096115771, "learning_rate": 4.934881313716158e-06, "loss": 0.5722, "step": 4120 }, { "epoch": 1.6286984440602619, "grad_norm": 0.43777846292338235, "learning_rate": 4.934845783177935e-06, "loss": 0.5524, "step": 4121 }, { "epoch": 1.6290936033588541, "grad_norm": 0.422011213186665, "learning_rate": 4.934810243077117e-06, "loss": 0.5733, "step": 4122 }, { "epoch": 1.6294887626574464, "grad_norm": 0.44138201150444306, "learning_rate": 4.934774693413843e-06, "loss": 0.5686, "step": 4123 }, { "epoch": 1.6298839219560386, "grad_norm": 0.44921636818156546, "learning_rate": 4.934739134188251e-06, "loss": 0.5724, "step": 4124 }, { "epoch": 1.6302790812546308, "grad_norm": 0.44859391331022486, "learning_rate": 4.934703565400484e-06, "loss": 0.5821, "step": 4125 }, { "epoch": 1.630674240553223, "grad_norm": 0.4233228039840541, "learning_rate": 4.934667987050678e-06, "loss": 0.5571, "step": 4126 }, { "epoch": 1.6310693998518153, "grad_norm": 0.4338865601440852, "learning_rate": 4.934632399138976e-06, "loss": 0.5675, "step": 4127 }, { "epoch": 1.6314645591504076, "grad_norm": 0.4394241216685595, "learning_rate": 4.934596801665515e-06, "loss": 0.5571, "step": 4128 }, { "epoch": 1.6318597184489998, "grad_norm": 0.43864069069871264, "learning_rate": 4.934561194630437e-06, "loss": 0.5381, "step": 4129 }, { "epoch": 1.632254877747592, "grad_norm": 0.42429399086993547, "learning_rate": 4.934525578033881e-06, "loss": 0.5587, "step": 4130 }, { "epoch": 1.6326500370461843, "grad_norm": 0.4404107423141753, "learning_rate": 4.934489951875987e-06, "loss": 0.5721, "step": 4131 }, { "epoch": 1.6330451963447765, "grad_norm": 0.4332961401456829, "learning_rate": 4.934454316156894e-06, "loss": 0.5638, "step": 4132 }, { "epoch": 1.6334403556433688, "grad_norm": 0.4695705815950691, "learning_rate": 4.934418670876743e-06, "loss": 0.561, "step": 4133 }, { "epoch": 1.633835514941961, "grad_norm": 0.45507102791172976, "learning_rate": 4.9343830160356744e-06, "loss": 0.5713, "step": 4134 }, { "epoch": 1.6342306742405532, "grad_norm": 0.43768991836081356, "learning_rate": 4.934347351633827e-06, "loss": 0.556, "step": 4135 }, { "epoch": 1.6346258335391455, "grad_norm": 0.43283166401271617, "learning_rate": 4.934311677671342e-06, "loss": 0.5596, "step": 4136 }, { "epoch": 1.6350209928377377, "grad_norm": 0.42555330658880147, "learning_rate": 4.934275994148357e-06, "loss": 0.5593, "step": 4137 }, { "epoch": 1.63541615213633, "grad_norm": 0.44485035790650657, "learning_rate": 4.934240301065016e-06, "loss": 0.5739, "step": 4138 }, { "epoch": 1.6358113114349222, "grad_norm": 0.4409925294439782, "learning_rate": 4.934204598421457e-06, "loss": 0.5643, "step": 4139 }, { "epoch": 1.6362064707335144, "grad_norm": 0.4310591741238124, "learning_rate": 4.934168886217821e-06, "loss": 0.571, "step": 4140 }, { "epoch": 1.6366016300321067, "grad_norm": 0.43546524799796604, "learning_rate": 4.934133164454246e-06, "loss": 0.5682, "step": 4141 }, { "epoch": 1.636996789330699, "grad_norm": 0.42766476649034174, "learning_rate": 4.934097433130875e-06, "loss": 0.5587, "step": 4142 }, { "epoch": 1.6373919486292912, "grad_norm": 0.4381721056390369, "learning_rate": 4.934061692247847e-06, "loss": 0.5627, "step": 4143 }, { "epoch": 1.6377871079278834, "grad_norm": 0.4289920275995806, "learning_rate": 4.9340259418053035e-06, "loss": 0.547, "step": 4144 }, { "epoch": 1.6381822672264756, "grad_norm": 0.4264307971370297, "learning_rate": 4.933990181803383e-06, "loss": 0.5569, "step": 4145 }, { "epoch": 1.6385774265250679, "grad_norm": 0.4296221573612008, "learning_rate": 4.933954412242228e-06, "loss": 0.5523, "step": 4146 }, { "epoch": 1.6389725858236601, "grad_norm": 0.43507977648083157, "learning_rate": 4.933918633121978e-06, "loss": 0.5561, "step": 4147 }, { "epoch": 1.6393677451222524, "grad_norm": 0.42056933159661486, "learning_rate": 4.933882844442773e-06, "loss": 0.5636, "step": 4148 }, { "epoch": 1.6397629044208446, "grad_norm": 0.4433116034912783, "learning_rate": 4.933847046204754e-06, "loss": 0.5553, "step": 4149 }, { "epoch": 1.6401580637194368, "grad_norm": 0.4459630268656738, "learning_rate": 4.933811238408063e-06, "loss": 0.5577, "step": 4150 }, { "epoch": 1.640553223018029, "grad_norm": 0.4403056379106962, "learning_rate": 4.933775421052838e-06, "loss": 0.5833, "step": 4151 }, { "epoch": 1.6409483823166213, "grad_norm": 0.42886531696805363, "learning_rate": 4.933739594139221e-06, "loss": 0.541, "step": 4152 }, { "epoch": 1.6413435416152136, "grad_norm": 0.4315557640640133, "learning_rate": 4.933703757667353e-06, "loss": 0.5575, "step": 4153 }, { "epoch": 1.6417387009138058, "grad_norm": 0.48140901833929156, "learning_rate": 4.933667911637375e-06, "loss": 0.5783, "step": 4154 }, { "epoch": 1.642133860212398, "grad_norm": 0.42997667165062453, "learning_rate": 4.933632056049427e-06, "loss": 0.5482, "step": 4155 }, { "epoch": 1.6425290195109903, "grad_norm": 0.429706934348882, "learning_rate": 4.93359619090365e-06, "loss": 0.5721, "step": 4156 }, { "epoch": 1.6429241788095825, "grad_norm": 0.4445206300843515, "learning_rate": 4.933560316200185e-06, "loss": 0.5742, "step": 4157 }, { "epoch": 1.6433193381081748, "grad_norm": 0.433653943288636, "learning_rate": 4.933524431939173e-06, "loss": 0.5682, "step": 4158 }, { "epoch": 1.643714497406767, "grad_norm": 0.43516906274908146, "learning_rate": 4.933488538120754e-06, "loss": 0.5658, "step": 4159 }, { "epoch": 1.6441096567053592, "grad_norm": 0.43315071912764175, "learning_rate": 4.933452634745071e-06, "loss": 0.5605, "step": 4160 }, { "epoch": 1.6445048160039515, "grad_norm": 0.4623128049190355, "learning_rate": 4.933416721812262e-06, "loss": 0.5451, "step": 4161 }, { "epoch": 1.6448999753025437, "grad_norm": 0.4322419596499138, "learning_rate": 4.933380799322471e-06, "loss": 0.5405, "step": 4162 }, { "epoch": 1.645295134601136, "grad_norm": 0.4353017445216181, "learning_rate": 4.933344867275837e-06, "loss": 0.5465, "step": 4163 }, { "epoch": 1.6456902938997282, "grad_norm": 0.42774307582614995, "learning_rate": 4.9333089256725034e-06, "loss": 0.5272, "step": 4164 }, { "epoch": 1.6460854531983204, "grad_norm": 0.43158324134092596, "learning_rate": 4.9332729745126085e-06, "loss": 0.5593, "step": 4165 }, { "epoch": 1.6464806124969127, "grad_norm": 0.43107023737645744, "learning_rate": 4.933237013796295e-06, "loss": 0.5674, "step": 4166 }, { "epoch": 1.646875771795505, "grad_norm": 0.470689528328689, "learning_rate": 4.9332010435237045e-06, "loss": 0.5735, "step": 4167 }, { "epoch": 1.6472709310940972, "grad_norm": 0.4337591992128468, "learning_rate": 4.933165063694978e-06, "loss": 0.5651, "step": 4168 }, { "epoch": 1.6476660903926894, "grad_norm": 0.433598238285656, "learning_rate": 4.933129074310257e-06, "loss": 0.5609, "step": 4169 }, { "epoch": 1.6480612496912816, "grad_norm": 0.4383209332082589, "learning_rate": 4.933093075369681e-06, "loss": 0.5675, "step": 4170 }, { "epoch": 1.648456408989874, "grad_norm": 0.47207585803002206, "learning_rate": 4.933057066873394e-06, "loss": 0.5662, "step": 4171 }, { "epoch": 1.6488515682884661, "grad_norm": 0.472525910555258, "learning_rate": 4.933021048821536e-06, "loss": 0.5402, "step": 4172 }, { "epoch": 1.6492467275870584, "grad_norm": 0.4760221302508032, "learning_rate": 4.932985021214248e-06, "loss": 0.5783, "step": 4173 }, { "epoch": 1.6496418868856508, "grad_norm": 0.433597879420229, "learning_rate": 4.932948984051673e-06, "loss": 0.5731, "step": 4174 }, { "epoch": 1.650037046184243, "grad_norm": 0.4402059167564898, "learning_rate": 4.9329129373339525e-06, "loss": 0.547, "step": 4175 }, { "epoch": 1.6504322054828353, "grad_norm": 0.4546433582700222, "learning_rate": 4.932876881061226e-06, "loss": 0.5469, "step": 4176 }, { "epoch": 1.6508273647814276, "grad_norm": 0.4316382194174931, "learning_rate": 4.932840815233637e-06, "loss": 0.5392, "step": 4177 }, { "epoch": 1.6512225240800198, "grad_norm": 0.4334651261656393, "learning_rate": 4.9328047398513265e-06, "loss": 0.5556, "step": 4178 }, { "epoch": 1.651617683378612, "grad_norm": 0.5167325693733595, "learning_rate": 4.932768654914437e-06, "loss": 0.5574, "step": 4179 }, { "epoch": 1.6520128426772043, "grad_norm": 0.4537887100737346, "learning_rate": 4.932732560423108e-06, "loss": 0.5519, "step": 4180 }, { "epoch": 1.6524080019757965, "grad_norm": 0.4407088531206237, "learning_rate": 4.932696456377484e-06, "loss": 0.5602, "step": 4181 }, { "epoch": 1.6528031612743888, "grad_norm": 0.4239271803777524, "learning_rate": 4.932660342777705e-06, "loss": 0.5344, "step": 4182 }, { "epoch": 1.653198320572981, "grad_norm": 0.4221587419527885, "learning_rate": 4.932624219623913e-06, "loss": 0.5419, "step": 4183 }, { "epoch": 1.6535934798715732, "grad_norm": 0.45091328428360944, "learning_rate": 4.932588086916251e-06, "loss": 0.5569, "step": 4184 }, { "epoch": 1.6539886391701655, "grad_norm": 0.4535113787923153, "learning_rate": 4.93255194465486e-06, "loss": 0.5311, "step": 4185 }, { "epoch": 1.6543837984687577, "grad_norm": 0.4495246513741634, "learning_rate": 4.932515792839882e-06, "loss": 0.5459, "step": 4186 }, { "epoch": 1.65477895776735, "grad_norm": 0.45103027154827796, "learning_rate": 4.932479631471459e-06, "loss": 0.5617, "step": 4187 }, { "epoch": 1.6551741170659422, "grad_norm": 0.46161108604777257, "learning_rate": 4.932443460549733e-06, "loss": 0.5744, "step": 4188 }, { "epoch": 1.6555692763645344, "grad_norm": 0.4585641137181729, "learning_rate": 4.932407280074846e-06, "loss": 0.5571, "step": 4189 }, { "epoch": 1.6559644356631267, "grad_norm": 0.47002181393717396, "learning_rate": 4.93237109004694e-06, "loss": 0.5591, "step": 4190 }, { "epoch": 1.656359594961719, "grad_norm": 0.45813305720090625, "learning_rate": 4.932334890466158e-06, "loss": 0.5509, "step": 4191 }, { "epoch": 1.6567547542603112, "grad_norm": 0.4442101744900535, "learning_rate": 4.932298681332641e-06, "loss": 0.5834, "step": 4192 }, { "epoch": 1.6571499135589034, "grad_norm": 0.43904506024703893, "learning_rate": 4.932262462646532e-06, "loss": 0.5628, "step": 4193 }, { "epoch": 1.6575450728574956, "grad_norm": 0.45081395145304054, "learning_rate": 4.932226234407973e-06, "loss": 0.5597, "step": 4194 }, { "epoch": 1.6579402321560879, "grad_norm": 0.45308321586467587, "learning_rate": 4.932189996617106e-06, "loss": 0.5424, "step": 4195 }, { "epoch": 1.6583353914546801, "grad_norm": 0.43748276416781745, "learning_rate": 4.932153749274074e-06, "loss": 0.5478, "step": 4196 }, { "epoch": 1.6587305507532724, "grad_norm": 0.4390093028892108, "learning_rate": 4.932117492379019e-06, "loss": 0.5576, "step": 4197 }, { "epoch": 1.6591257100518648, "grad_norm": 0.43009631591048475, "learning_rate": 4.932081225932084e-06, "loss": 0.5461, "step": 4198 }, { "epoch": 1.659520869350457, "grad_norm": 0.4421965633593312, "learning_rate": 4.93204494993341e-06, "loss": 0.5644, "step": 4199 }, { "epoch": 1.6599160286490493, "grad_norm": 0.44435453925130375, "learning_rate": 4.93200866438314e-06, "loss": 0.5522, "step": 4200 }, { "epoch": 1.6603111879476415, "grad_norm": 0.47458082371469706, "learning_rate": 4.931972369281417e-06, "loss": 0.5471, "step": 4201 }, { "epoch": 1.6607063472462338, "grad_norm": 0.4301353638936425, "learning_rate": 4.931936064628383e-06, "loss": 0.5703, "step": 4202 }, { "epoch": 1.661101506544826, "grad_norm": 0.4372380561131763, "learning_rate": 4.931899750424182e-06, "loss": 0.5683, "step": 4203 }, { "epoch": 1.6614966658434183, "grad_norm": 0.46217752541691776, "learning_rate": 4.931863426668955e-06, "loss": 0.5559, "step": 4204 }, { "epoch": 1.6618918251420105, "grad_norm": 0.4414697866376089, "learning_rate": 4.931827093362844e-06, "loss": 0.5533, "step": 4205 }, { "epoch": 1.6622869844406027, "grad_norm": 0.44026690012982383, "learning_rate": 4.931790750505994e-06, "loss": 0.5473, "step": 4206 }, { "epoch": 1.662682143739195, "grad_norm": 0.44908034456035184, "learning_rate": 4.931754398098546e-06, "loss": 0.5676, "step": 4207 }, { "epoch": 1.6630773030377872, "grad_norm": 0.43506228540161684, "learning_rate": 4.931718036140645e-06, "loss": 0.56, "step": 4208 }, { "epoch": 1.6634724623363795, "grad_norm": 0.4414844514234546, "learning_rate": 4.9316816646324305e-06, "loss": 0.5395, "step": 4209 }, { "epoch": 1.6638676216349717, "grad_norm": 0.4353535377517495, "learning_rate": 4.931645283574047e-06, "loss": 0.5706, "step": 4210 }, { "epoch": 1.664262780933564, "grad_norm": 0.4352610887294283, "learning_rate": 4.931608892965638e-06, "loss": 0.5529, "step": 4211 }, { "epoch": 1.6646579402321562, "grad_norm": 0.48475567073134784, "learning_rate": 4.931572492807346e-06, "loss": 0.5691, "step": 4212 }, { "epoch": 1.6650530995307484, "grad_norm": 0.43914859193682443, "learning_rate": 4.931536083099313e-06, "loss": 0.5447, "step": 4213 }, { "epoch": 1.6654482588293407, "grad_norm": 0.42574696114957944, "learning_rate": 4.931499663841683e-06, "loss": 0.5506, "step": 4214 }, { "epoch": 1.665843418127933, "grad_norm": 0.44003634839285843, "learning_rate": 4.9314632350345995e-06, "loss": 0.5714, "step": 4215 }, { "epoch": 1.6662385774265251, "grad_norm": 0.44345794553567824, "learning_rate": 4.931426796678204e-06, "loss": 0.5525, "step": 4216 }, { "epoch": 1.6666337367251174, "grad_norm": 0.44184543039510044, "learning_rate": 4.9313903487726415e-06, "loss": 0.5533, "step": 4217 }, { "epoch": 1.6670288960237096, "grad_norm": 0.4482583566446144, "learning_rate": 4.931353891318053e-06, "loss": 0.5567, "step": 4218 }, { "epoch": 1.6674240553223019, "grad_norm": 0.44359624620051485, "learning_rate": 4.931317424314583e-06, "loss": 0.5417, "step": 4219 }, { "epoch": 1.667819214620894, "grad_norm": 0.45039139024204555, "learning_rate": 4.9312809477623755e-06, "loss": 0.5756, "step": 4220 }, { "epoch": 1.6682143739194863, "grad_norm": 0.46242291351750414, "learning_rate": 4.931244461661571e-06, "loss": 0.5548, "step": 4221 }, { "epoch": 1.6686095332180786, "grad_norm": 0.44761433940772233, "learning_rate": 4.9312079660123165e-06, "loss": 0.5545, "step": 4222 }, { "epoch": 1.6690046925166708, "grad_norm": 0.42745307952513, "learning_rate": 4.931171460814752e-06, "loss": 0.5612, "step": 4223 }, { "epoch": 1.669399851815263, "grad_norm": 0.4409759305327771, "learning_rate": 4.9311349460690235e-06, "loss": 0.5295, "step": 4224 }, { "epoch": 1.6697950111138553, "grad_norm": 0.44041060145128974, "learning_rate": 4.9310984217752725e-06, "loss": 0.5807, "step": 4225 }, { "epoch": 1.6701901704124476, "grad_norm": 0.45082681364452676, "learning_rate": 4.931061887933643e-06, "loss": 0.5525, "step": 4226 }, { "epoch": 1.6705853297110398, "grad_norm": 0.4325179559967354, "learning_rate": 4.931025344544279e-06, "loss": 0.5614, "step": 4227 }, { "epoch": 1.670980489009632, "grad_norm": 0.4491688766617088, "learning_rate": 4.930988791607324e-06, "loss": 0.5547, "step": 4228 }, { "epoch": 1.6713756483082243, "grad_norm": 0.43251674883633673, "learning_rate": 4.930952229122921e-06, "loss": 0.5525, "step": 4229 }, { "epoch": 1.6717708076068165, "grad_norm": 0.450173373250462, "learning_rate": 4.930915657091213e-06, "loss": 0.5539, "step": 4230 }, { "epoch": 1.6721659669054088, "grad_norm": 0.452547822179787, "learning_rate": 4.930879075512345e-06, "loss": 0.5783, "step": 4231 }, { "epoch": 1.672561126204001, "grad_norm": 0.456787048326293, "learning_rate": 4.93084248438646e-06, "loss": 0.5851, "step": 4232 }, { "epoch": 1.6729562855025932, "grad_norm": 0.4398970129307844, "learning_rate": 4.930805883713702e-06, "loss": 0.5588, "step": 4233 }, { "epoch": 1.6733514448011855, "grad_norm": 0.4344964137010482, "learning_rate": 4.930769273494215e-06, "loss": 0.5422, "step": 4234 }, { "epoch": 1.6737466040997777, "grad_norm": 0.4480357286208341, "learning_rate": 4.930732653728141e-06, "loss": 0.5719, "step": 4235 }, { "epoch": 1.67414176339837, "grad_norm": 0.4468533257133995, "learning_rate": 4.930696024415626e-06, "loss": 0.5706, "step": 4236 }, { "epoch": 1.6745369226969622, "grad_norm": 0.43589038296389493, "learning_rate": 4.930659385556813e-06, "loss": 0.5718, "step": 4237 }, { "epoch": 1.6749320819955544, "grad_norm": 0.46213277997599467, "learning_rate": 4.9306227371518455e-06, "loss": 0.543, "step": 4238 }, { "epoch": 1.6753272412941467, "grad_norm": 0.4418860283426867, "learning_rate": 4.930586079200869e-06, "loss": 0.555, "step": 4239 }, { "epoch": 1.675722400592739, "grad_norm": 0.41996717679739026, "learning_rate": 4.930549411704025e-06, "loss": 0.5357, "step": 4240 }, { "epoch": 1.6761175598913312, "grad_norm": 0.4575919872171913, "learning_rate": 4.930512734661459e-06, "loss": 0.5545, "step": 4241 }, { "epoch": 1.6765127191899234, "grad_norm": 0.44970217578452903, "learning_rate": 4.930476048073316e-06, "loss": 0.5634, "step": 4242 }, { "epoch": 1.6769078784885156, "grad_norm": 0.4436734001641958, "learning_rate": 4.930439351939738e-06, "loss": 0.5848, "step": 4243 }, { "epoch": 1.6773030377871079, "grad_norm": 0.43230787717115343, "learning_rate": 4.93040264626087e-06, "loss": 0.5758, "step": 4244 }, { "epoch": 1.6776981970857001, "grad_norm": 0.45704860376232215, "learning_rate": 4.9303659310368565e-06, "loss": 0.5644, "step": 4245 }, { "epoch": 1.6780933563842924, "grad_norm": 0.458499629507116, "learning_rate": 4.930329206267841e-06, "loss": 0.5714, "step": 4246 }, { "epoch": 1.6784885156828846, "grad_norm": 0.4436057764210825, "learning_rate": 4.930292471953969e-06, "loss": 0.5512, "step": 4247 }, { "epoch": 1.6788836749814768, "grad_norm": 0.4322151502298006, "learning_rate": 4.930255728095383e-06, "loss": 0.5475, "step": 4248 }, { "epoch": 1.679278834280069, "grad_norm": 0.4549937231291737, "learning_rate": 4.930218974692229e-06, "loss": 0.5568, "step": 4249 }, { "epoch": 1.6796739935786613, "grad_norm": 0.44063968344614446, "learning_rate": 4.930182211744649e-06, "loss": 0.562, "step": 4250 }, { "epoch": 1.6800691528772536, "grad_norm": 0.4635898234904489, "learning_rate": 4.930145439252791e-06, "loss": 0.566, "step": 4251 }, { "epoch": 1.6804643121758458, "grad_norm": 0.4347065121603174, "learning_rate": 4.930108657216796e-06, "loss": 0.5579, "step": 4252 }, { "epoch": 1.680859471474438, "grad_norm": 0.45191051205485905, "learning_rate": 4.930071865636811e-06, "loss": 0.5616, "step": 4253 }, { "epoch": 1.6812546307730303, "grad_norm": 0.5003942515624423, "learning_rate": 4.930035064512979e-06, "loss": 0.5544, "step": 4254 }, { "epoch": 1.6816497900716225, "grad_norm": 0.43280130558001995, "learning_rate": 4.929998253845444e-06, "loss": 0.5839, "step": 4255 }, { "epoch": 1.6820449493702148, "grad_norm": 0.4347329458274132, "learning_rate": 4.929961433634352e-06, "loss": 0.5512, "step": 4256 }, { "epoch": 1.682440108668807, "grad_norm": 0.4595377167928187, "learning_rate": 4.9299246038798474e-06, "loss": 0.5797, "step": 4257 }, { "epoch": 1.6828352679673992, "grad_norm": 0.4542171341695038, "learning_rate": 4.9298877645820735e-06, "loss": 0.5421, "step": 4258 }, { "epoch": 1.6832304272659915, "grad_norm": 0.43845775532831804, "learning_rate": 4.929850915741177e-06, "loss": 0.565, "step": 4259 }, { "epoch": 1.6836255865645837, "grad_norm": 0.44402420830903166, "learning_rate": 4.929814057357301e-06, "loss": 0.5605, "step": 4260 }, { "epoch": 1.684020745863176, "grad_norm": 0.44396077168291875, "learning_rate": 4.929777189430591e-06, "loss": 0.5694, "step": 4261 }, { "epoch": 1.6844159051617682, "grad_norm": 0.4413223103845851, "learning_rate": 4.929740311961192e-06, "loss": 0.5681, "step": 4262 }, { "epoch": 1.6848110644603604, "grad_norm": 0.4432712724622185, "learning_rate": 4.929703424949248e-06, "loss": 0.5791, "step": 4263 }, { "epoch": 1.6852062237589527, "grad_norm": 0.4291806701241977, "learning_rate": 4.929666528394904e-06, "loss": 0.5608, "step": 4264 }, { "epoch": 1.685601383057545, "grad_norm": 0.44175554248806165, "learning_rate": 4.929629622298307e-06, "loss": 0.5367, "step": 4265 }, { "epoch": 1.6859965423561372, "grad_norm": 0.43417237826237876, "learning_rate": 4.929592706659599e-06, "loss": 0.5621, "step": 4266 }, { "epoch": 1.6863917016547294, "grad_norm": 0.436619782617058, "learning_rate": 4.929555781478925e-06, "loss": 0.556, "step": 4267 }, { "epoch": 1.6867868609533216, "grad_norm": 0.4412555558446154, "learning_rate": 4.9295188467564326e-06, "loss": 0.5549, "step": 4268 }, { "epoch": 1.687182020251914, "grad_norm": 0.4429817647687938, "learning_rate": 4.929481902492265e-06, "loss": 0.5451, "step": 4269 }, { "epoch": 1.6875771795505063, "grad_norm": 0.42710204000708135, "learning_rate": 4.929444948686568e-06, "loss": 0.5574, "step": 4270 }, { "epoch": 1.6879723388490986, "grad_norm": 0.4345670015513589, "learning_rate": 4.929407985339486e-06, "loss": 0.573, "step": 4271 }, { "epoch": 1.6883674981476908, "grad_norm": 0.42754387278545575, "learning_rate": 4.929371012451165e-06, "loss": 0.5504, "step": 4272 }, { "epoch": 1.688762657446283, "grad_norm": 0.44346425482357044, "learning_rate": 4.9293340300217505e-06, "loss": 0.566, "step": 4273 }, { "epoch": 1.6891578167448753, "grad_norm": 0.43395132844534345, "learning_rate": 4.929297038051386e-06, "loss": 0.5751, "step": 4274 }, { "epoch": 1.6895529760434675, "grad_norm": 0.4289911868889898, "learning_rate": 4.929260036540218e-06, "loss": 0.5545, "step": 4275 }, { "epoch": 1.6899481353420598, "grad_norm": 0.4299481945107919, "learning_rate": 4.929223025488393e-06, "loss": 0.5655, "step": 4276 }, { "epoch": 1.690343294640652, "grad_norm": 0.4315808805682797, "learning_rate": 4.929186004896054e-06, "loss": 0.5586, "step": 4277 }, { "epoch": 1.6907384539392443, "grad_norm": 0.43737402235911826, "learning_rate": 4.929148974763347e-06, "loss": 0.5677, "step": 4278 }, { "epoch": 1.6911336132378365, "grad_norm": 0.44163862598713893, "learning_rate": 4.929111935090418e-06, "loss": 0.5484, "step": 4279 }, { "epoch": 1.6915287725364287, "grad_norm": 0.44050203475372246, "learning_rate": 4.929074885877414e-06, "loss": 0.5639, "step": 4280 }, { "epoch": 1.691923931835021, "grad_norm": 0.44036371463743945, "learning_rate": 4.929037827124477e-06, "loss": 0.5532, "step": 4281 }, { "epoch": 1.6923190911336132, "grad_norm": 0.42541252276330227, "learning_rate": 4.929000758831755e-06, "loss": 0.5541, "step": 4282 }, { "epoch": 1.6927142504322055, "grad_norm": 0.45077617747089965, "learning_rate": 4.928963680999393e-06, "loss": 0.5728, "step": 4283 }, { "epoch": 1.6931094097307977, "grad_norm": 0.43862795202138594, "learning_rate": 4.928926593627537e-06, "loss": 0.5609, "step": 4284 }, { "epoch": 1.69350456902939, "grad_norm": 0.4355391950216865, "learning_rate": 4.928889496716331e-06, "loss": 0.5577, "step": 4285 }, { "epoch": 1.6938997283279822, "grad_norm": 0.449424153003478, "learning_rate": 4.928852390265923e-06, "loss": 0.5589, "step": 4286 }, { "epoch": 1.6942948876265744, "grad_norm": 0.42444603371479195, "learning_rate": 4.928815274276458e-06, "loss": 0.5542, "step": 4287 }, { "epoch": 1.6946900469251667, "grad_norm": 0.4374300606831901, "learning_rate": 4.928778148748081e-06, "loss": 0.5768, "step": 4288 }, { "epoch": 1.695085206223759, "grad_norm": 0.4267602747759363, "learning_rate": 4.928741013680939e-06, "loss": 0.5606, "step": 4289 }, { "epoch": 1.6954803655223512, "grad_norm": 0.4366846729132836, "learning_rate": 4.928703869075176e-06, "loss": 0.5498, "step": 4290 }, { "epoch": 1.6958755248209434, "grad_norm": 0.4435134184494798, "learning_rate": 4.92866671493094e-06, "loss": 0.5485, "step": 4291 }, { "epoch": 1.6962706841195359, "grad_norm": 0.42497901408157157, "learning_rate": 4.928629551248375e-06, "loss": 0.5383, "step": 4292 }, { "epoch": 1.696665843418128, "grad_norm": 0.4443410896472178, "learning_rate": 4.928592378027628e-06, "loss": 0.543, "step": 4293 }, { "epoch": 1.6970610027167203, "grad_norm": 0.4546919285525924, "learning_rate": 4.928555195268845e-06, "loss": 0.5598, "step": 4294 }, { "epoch": 1.6974561620153126, "grad_norm": 0.4327018959048797, "learning_rate": 4.928518002972172e-06, "loss": 0.5607, "step": 4295 }, { "epoch": 1.6978513213139048, "grad_norm": 0.43177140667053937, "learning_rate": 4.928480801137755e-06, "loss": 0.5468, "step": 4296 }, { "epoch": 1.698246480612497, "grad_norm": 0.43141416580935815, "learning_rate": 4.92844358976574e-06, "loss": 0.5663, "step": 4297 }, { "epoch": 1.6986416399110893, "grad_norm": 0.4787754721973709, "learning_rate": 4.928406368856273e-06, "loss": 0.5636, "step": 4298 }, { "epoch": 1.6990367992096815, "grad_norm": 0.442334169803762, "learning_rate": 4.9283691384095e-06, "loss": 0.5823, "step": 4299 }, { "epoch": 1.6994319585082738, "grad_norm": 0.432453303132636, "learning_rate": 4.928331898425568e-06, "loss": 0.5589, "step": 4300 }, { "epoch": 1.699827117806866, "grad_norm": 0.4490638732729113, "learning_rate": 4.9282946489046235e-06, "loss": 0.5754, "step": 4301 }, { "epoch": 1.7002222771054583, "grad_norm": 0.4853856757096015, "learning_rate": 4.928257389846812e-06, "loss": 0.5503, "step": 4302 }, { "epoch": 1.7006174364040505, "grad_norm": 0.42812505775715426, "learning_rate": 4.92822012125228e-06, "loss": 0.5477, "step": 4303 }, { "epoch": 1.7010125957026427, "grad_norm": 0.43886286312335837, "learning_rate": 4.928182843121173e-06, "loss": 0.5716, "step": 4304 }, { "epoch": 1.701407755001235, "grad_norm": 0.4523032752577509, "learning_rate": 4.928145555453638e-06, "loss": 0.5509, "step": 4305 }, { "epoch": 1.7018029142998272, "grad_norm": 0.4517070813549554, "learning_rate": 4.928108258249823e-06, "loss": 0.566, "step": 4306 }, { "epoch": 1.7021980735984195, "grad_norm": 0.5646470946712949, "learning_rate": 4.928070951509873e-06, "loss": 0.575, "step": 4307 }, { "epoch": 1.7025932328970117, "grad_norm": 0.42398116604836683, "learning_rate": 4.928033635233934e-06, "loss": 0.5449, "step": 4308 }, { "epoch": 1.702988392195604, "grad_norm": 0.45125784631407473, "learning_rate": 4.927996309422154e-06, "loss": 0.555, "step": 4309 }, { "epoch": 1.7033835514941962, "grad_norm": 0.4516857652827014, "learning_rate": 4.927958974074678e-06, "loss": 0.5358, "step": 4310 }, { "epoch": 1.7037787107927884, "grad_norm": 0.4486541477736022, "learning_rate": 4.927921629191654e-06, "loss": 0.5706, "step": 4311 }, { "epoch": 1.7041738700913807, "grad_norm": 0.5504549124395234, "learning_rate": 4.927884274773229e-06, "loss": 0.5499, "step": 4312 }, { "epoch": 1.704569029389973, "grad_norm": 0.4377036298330416, "learning_rate": 4.927846910819547e-06, "loss": 0.5624, "step": 4313 }, { "epoch": 1.7049641886885651, "grad_norm": 0.44142723756552443, "learning_rate": 4.9278095373307586e-06, "loss": 0.5641, "step": 4314 }, { "epoch": 1.7053593479871574, "grad_norm": 0.4305246703883127, "learning_rate": 4.927772154307007e-06, "loss": 0.5584, "step": 4315 }, { "epoch": 1.7057545072857496, "grad_norm": 0.42333610423419793, "learning_rate": 4.927734761748441e-06, "loss": 0.5662, "step": 4316 }, { "epoch": 1.7061496665843419, "grad_norm": 0.43713955439883345, "learning_rate": 4.927697359655208e-06, "loss": 0.5574, "step": 4317 }, { "epoch": 1.706544825882934, "grad_norm": 0.4350887115565443, "learning_rate": 4.927659948027453e-06, "loss": 0.5484, "step": 4318 }, { "epoch": 1.7069399851815263, "grad_norm": 0.4305661898067747, "learning_rate": 4.927622526865324e-06, "loss": 0.5721, "step": 4319 }, { "epoch": 1.7073351444801186, "grad_norm": 0.4237425713774883, "learning_rate": 4.927585096168967e-06, "loss": 0.5385, "step": 4320 }, { "epoch": 1.7077303037787108, "grad_norm": 0.42768772713800246, "learning_rate": 4.9275476559385316e-06, "loss": 0.5615, "step": 4321 }, { "epoch": 1.708125463077303, "grad_norm": 0.45101823860302, "learning_rate": 4.927510206174162e-06, "loss": 0.5777, "step": 4322 }, { "epoch": 1.7085206223758953, "grad_norm": 0.45186205819480785, "learning_rate": 4.927472746876007e-06, "loss": 0.55, "step": 4323 }, { "epoch": 1.7089157816744875, "grad_norm": 0.4632522948748882, "learning_rate": 4.9274352780442125e-06, "loss": 0.5719, "step": 4324 }, { "epoch": 1.7093109409730798, "grad_norm": 0.43100101608453223, "learning_rate": 4.927397799678927e-06, "loss": 0.5549, "step": 4325 }, { "epoch": 1.709706100271672, "grad_norm": 0.434763481025286, "learning_rate": 4.927360311780296e-06, "loss": 0.566, "step": 4326 }, { "epoch": 1.7101012595702643, "grad_norm": 0.43309030464048226, "learning_rate": 4.927322814348468e-06, "loss": 0.5345, "step": 4327 }, { "epoch": 1.7104964188688565, "grad_norm": 0.48013463816215374, "learning_rate": 4.92728530738359e-06, "loss": 0.5912, "step": 4328 }, { "epoch": 1.7108915781674487, "grad_norm": 0.47034903875978573, "learning_rate": 4.927247790885809e-06, "loss": 0.5778, "step": 4329 }, { "epoch": 1.711286737466041, "grad_norm": 0.4278937802281345, "learning_rate": 4.927210264855274e-06, "loss": 0.5597, "step": 4330 }, { "epoch": 1.7116818967646332, "grad_norm": 0.43626768378812447, "learning_rate": 4.927172729292129e-06, "loss": 0.5651, "step": 4331 }, { "epoch": 1.7120770560632255, "grad_norm": 0.4465422521558346, "learning_rate": 4.927135184196524e-06, "loss": 0.5756, "step": 4332 }, { "epoch": 1.7124722153618177, "grad_norm": 0.44974916569628687, "learning_rate": 4.927097629568606e-06, "loss": 0.568, "step": 4333 }, { "epoch": 1.71286737466041, "grad_norm": 0.4472334023755386, "learning_rate": 4.927060065408522e-06, "loss": 0.5797, "step": 4334 }, { "epoch": 1.7132625339590022, "grad_norm": 0.4481801045998597, "learning_rate": 4.9270224917164204e-06, "loss": 0.5675, "step": 4335 }, { "epoch": 1.7136576932575944, "grad_norm": 0.4549341973795611, "learning_rate": 4.926984908492448e-06, "loss": 0.5803, "step": 4336 }, { "epoch": 1.7140528525561867, "grad_norm": 0.4359842869982814, "learning_rate": 4.9269473157367535e-06, "loss": 0.5499, "step": 4337 }, { "epoch": 1.714448011854779, "grad_norm": 0.5200059350206986, "learning_rate": 4.926909713449482e-06, "loss": 0.5577, "step": 4338 }, { "epoch": 1.7148431711533711, "grad_norm": 0.4689154411180137, "learning_rate": 4.926872101630784e-06, "loss": 0.5545, "step": 4339 }, { "epoch": 1.7152383304519634, "grad_norm": 0.45440101006285427, "learning_rate": 4.9268344802808055e-06, "loss": 0.5633, "step": 4340 }, { "epoch": 1.7156334897505556, "grad_norm": 0.45895219511156227, "learning_rate": 4.926796849399694e-06, "loss": 0.5515, "step": 4341 }, { "epoch": 1.7160286490491479, "grad_norm": 0.43433747556834423, "learning_rate": 4.9267592089876e-06, "loss": 0.55, "step": 4342 }, { "epoch": 1.71642380834774, "grad_norm": 0.4338247101488299, "learning_rate": 4.926721559044668e-06, "loss": 0.5563, "step": 4343 }, { "epoch": 1.7168189676463324, "grad_norm": 0.45944141139935163, "learning_rate": 4.926683899571048e-06, "loss": 0.5357, "step": 4344 }, { "epoch": 1.7172141269449246, "grad_norm": 0.4264619359745203, "learning_rate": 4.9266462305668876e-06, "loss": 0.5385, "step": 4345 }, { "epoch": 1.7176092862435168, "grad_norm": 0.43636381830305015, "learning_rate": 4.926608552032334e-06, "loss": 0.5781, "step": 4346 }, { "epoch": 1.718004445542109, "grad_norm": 0.4488377673647985, "learning_rate": 4.926570863967535e-06, "loss": 0.559, "step": 4347 }, { "epoch": 1.7183996048407013, "grad_norm": 0.44153153641448994, "learning_rate": 4.926533166372639e-06, "loss": 0.5704, "step": 4348 }, { "epoch": 1.7187947641392936, "grad_norm": 0.43552915303217893, "learning_rate": 4.926495459247795e-06, "loss": 0.5564, "step": 4349 }, { "epoch": 1.7191899234378858, "grad_norm": 0.4306421081394027, "learning_rate": 4.9264577425931505e-06, "loss": 0.5668, "step": 4350 }, { "epoch": 1.719585082736478, "grad_norm": 0.43170183294012887, "learning_rate": 4.926420016408852e-06, "loss": 0.56, "step": 4351 }, { "epoch": 1.7199802420350703, "grad_norm": 0.44310661756844827, "learning_rate": 4.92638228069505e-06, "loss": 0.5444, "step": 4352 }, { "epoch": 1.7203754013336625, "grad_norm": 0.5473701951430664, "learning_rate": 4.926344535451892e-06, "loss": 0.5518, "step": 4353 }, { "epoch": 1.7207705606322548, "grad_norm": 0.45773670023142704, "learning_rate": 4.926306780679526e-06, "loss": 0.5879, "step": 4354 }, { "epoch": 1.721165719930847, "grad_norm": 0.4470158656152022, "learning_rate": 4.926269016378099e-06, "loss": 0.5574, "step": 4355 }, { "epoch": 1.7215608792294392, "grad_norm": 0.4326444239390642, "learning_rate": 4.926231242547762e-06, "loss": 0.5575, "step": 4356 }, { "epoch": 1.7219560385280315, "grad_norm": 0.43096022881772944, "learning_rate": 4.926193459188662e-06, "loss": 0.5237, "step": 4357 }, { "epoch": 1.7223511978266237, "grad_norm": 0.4335631980043337, "learning_rate": 4.926155666300947e-06, "loss": 0.5508, "step": 4358 }, { "epoch": 1.722746357125216, "grad_norm": 0.43344519231600975, "learning_rate": 4.926117863884765e-06, "loss": 0.5578, "step": 4359 }, { "epoch": 1.7231415164238082, "grad_norm": 0.439357665476649, "learning_rate": 4.926080051940267e-06, "loss": 0.5677, "step": 4360 }, { "epoch": 1.7235366757224004, "grad_norm": 0.45079399416037436, "learning_rate": 4.926042230467598e-06, "loss": 0.5653, "step": 4361 }, { "epoch": 1.7239318350209927, "grad_norm": 0.41470626875209715, "learning_rate": 4.9260043994669094e-06, "loss": 0.5375, "step": 4362 }, { "epoch": 1.7243269943195851, "grad_norm": 0.4313994825808373, "learning_rate": 4.925966558938348e-06, "loss": 0.5692, "step": 4363 }, { "epoch": 1.7247221536181774, "grad_norm": 0.4321560662738567, "learning_rate": 4.925928708882064e-06, "loss": 0.5467, "step": 4364 }, { "epoch": 1.7251173129167696, "grad_norm": 0.43615051233389873, "learning_rate": 4.925890849298204e-06, "loss": 0.561, "step": 4365 }, { "epoch": 1.7255124722153619, "grad_norm": 0.4386066095911232, "learning_rate": 4.925852980186918e-06, "loss": 0.5702, "step": 4366 }, { "epoch": 1.725907631513954, "grad_norm": 0.4294008469837109, "learning_rate": 4.9258151015483555e-06, "loss": 0.5611, "step": 4367 }, { "epoch": 1.7263027908125463, "grad_norm": 0.43462199012351793, "learning_rate": 4.925777213382663e-06, "loss": 0.571, "step": 4368 }, { "epoch": 1.7266979501111386, "grad_norm": 0.45940417820864915, "learning_rate": 4.925739315689991e-06, "loss": 0.576, "step": 4369 }, { "epoch": 1.7270931094097308, "grad_norm": 0.43409099796596096, "learning_rate": 4.925701408470489e-06, "loss": 0.5583, "step": 4370 }, { "epoch": 1.727488268708323, "grad_norm": 0.4364436014175337, "learning_rate": 4.925663491724304e-06, "loss": 0.5465, "step": 4371 }, { "epoch": 1.7278834280069153, "grad_norm": 0.4242367032389472, "learning_rate": 4.9256255654515865e-06, "loss": 0.5566, "step": 4372 }, { "epoch": 1.7282785873055075, "grad_norm": 0.4348950743141649, "learning_rate": 4.925587629652483e-06, "loss": 0.5524, "step": 4373 }, { "epoch": 1.7286737466040998, "grad_norm": 0.440868038863271, "learning_rate": 4.925549684327145e-06, "loss": 0.5432, "step": 4374 }, { "epoch": 1.729068905902692, "grad_norm": 0.4594372909416891, "learning_rate": 4.925511729475722e-06, "loss": 0.573, "step": 4375 }, { "epoch": 1.7294640652012843, "grad_norm": 0.442038328983773, "learning_rate": 4.92547376509836e-06, "loss": 0.5436, "step": 4376 }, { "epoch": 1.7298592244998765, "grad_norm": 0.4488082560018164, "learning_rate": 4.925435791195211e-06, "loss": 0.5382, "step": 4377 }, { "epoch": 1.7302543837984687, "grad_norm": 0.4451552451369125, "learning_rate": 4.925397807766422e-06, "loss": 0.5708, "step": 4378 }, { "epoch": 1.730649543097061, "grad_norm": 0.44829170627702375, "learning_rate": 4.925359814812144e-06, "loss": 0.5566, "step": 4379 }, { "epoch": 1.7310447023956532, "grad_norm": 0.45319114553972123, "learning_rate": 4.925321812332526e-06, "loss": 0.5812, "step": 4380 }, { "epoch": 1.7314398616942455, "grad_norm": 0.4730466509935319, "learning_rate": 4.925283800327715e-06, "loss": 0.5595, "step": 4381 }, { "epoch": 1.7318350209928377, "grad_norm": 0.44863564694406577, "learning_rate": 4.925245778797863e-06, "loss": 0.5537, "step": 4382 }, { "epoch": 1.73223018029143, "grad_norm": 0.449712427848464, "learning_rate": 4.925207747743118e-06, "loss": 0.5548, "step": 4383 }, { "epoch": 1.7326253395900222, "grad_norm": 0.4353187498862786, "learning_rate": 4.925169707163629e-06, "loss": 0.5699, "step": 4384 }, { "epoch": 1.7330204988886144, "grad_norm": 0.4552613973646662, "learning_rate": 4.925131657059548e-06, "loss": 0.5589, "step": 4385 }, { "epoch": 1.7334156581872067, "grad_norm": 0.43804313299916375, "learning_rate": 4.925093597431021e-06, "loss": 0.5665, "step": 4386 }, { "epoch": 1.7338108174857991, "grad_norm": 0.44336856585398643, "learning_rate": 4.925055528278199e-06, "loss": 0.5691, "step": 4387 }, { "epoch": 1.7342059767843914, "grad_norm": 0.4453678495451275, "learning_rate": 4.925017449601231e-06, "loss": 0.5799, "step": 4388 }, { "epoch": 1.7346011360829836, "grad_norm": 0.43883849979746115, "learning_rate": 4.924979361400268e-06, "loss": 0.5714, "step": 4389 }, { "epoch": 1.7349962953815758, "grad_norm": 0.4301004660209356, "learning_rate": 4.924941263675458e-06, "loss": 0.5542, "step": 4390 }, { "epoch": 1.735391454680168, "grad_norm": 0.4371671225872987, "learning_rate": 4.924903156426952e-06, "loss": 0.5634, "step": 4391 }, { "epoch": 1.7357866139787603, "grad_norm": 0.4467984256797525, "learning_rate": 4.924865039654898e-06, "loss": 0.5741, "step": 4392 }, { "epoch": 1.7361817732773526, "grad_norm": 0.4373830450072716, "learning_rate": 4.9248269133594464e-06, "loss": 0.5546, "step": 4393 }, { "epoch": 1.7365769325759448, "grad_norm": 0.44157114771440137, "learning_rate": 4.924788777540748e-06, "loss": 0.5758, "step": 4394 }, { "epoch": 1.736972091874537, "grad_norm": 0.42686715929933167, "learning_rate": 4.9247506321989514e-06, "loss": 0.5612, "step": 4395 }, { "epoch": 1.7373672511731293, "grad_norm": 0.429547791413384, "learning_rate": 4.924712477334206e-06, "loss": 0.5474, "step": 4396 }, { "epoch": 1.7377624104717215, "grad_norm": 0.4456343541323995, "learning_rate": 4.924674312946663e-06, "loss": 0.5648, "step": 4397 }, { "epoch": 1.7381575697703138, "grad_norm": 0.4557318656179386, "learning_rate": 4.924636139036472e-06, "loss": 0.5811, "step": 4398 }, { "epoch": 1.738552729068906, "grad_norm": 0.42877428492133096, "learning_rate": 4.924597955603782e-06, "loss": 0.5719, "step": 4399 }, { "epoch": 1.7389478883674983, "grad_norm": 0.44201823103262, "learning_rate": 4.924559762648744e-06, "loss": 0.5527, "step": 4400 }, { "epoch": 1.7393430476660905, "grad_norm": 0.4406878460734057, "learning_rate": 4.924521560171507e-06, "loss": 0.5506, "step": 4401 }, { "epoch": 1.7397382069646827, "grad_norm": 0.4238997986809783, "learning_rate": 4.924483348172222e-06, "loss": 0.555, "step": 4402 }, { "epoch": 1.740133366263275, "grad_norm": 0.444786923090035, "learning_rate": 4.924445126651038e-06, "loss": 0.5585, "step": 4403 }, { "epoch": 1.7405285255618672, "grad_norm": 0.42484925220727704, "learning_rate": 4.924406895608106e-06, "loss": 0.5482, "step": 4404 }, { "epoch": 1.7409236848604595, "grad_norm": 0.44349133709535676, "learning_rate": 4.924368655043577e-06, "loss": 0.5573, "step": 4405 }, { "epoch": 1.7413188441590517, "grad_norm": 0.4227263535085141, "learning_rate": 4.924330404957599e-06, "loss": 0.5465, "step": 4406 }, { "epoch": 1.741714003457644, "grad_norm": 0.44828080019324945, "learning_rate": 4.924292145350323e-06, "loss": 0.5723, "step": 4407 }, { "epoch": 1.7421091627562362, "grad_norm": 0.426266339908026, "learning_rate": 4.924253876221899e-06, "loss": 0.5772, "step": 4408 }, { "epoch": 1.7425043220548284, "grad_norm": 0.43419093036176604, "learning_rate": 4.92421559757248e-06, "loss": 0.5596, "step": 4409 }, { "epoch": 1.7428994813534207, "grad_norm": 0.43554329163688116, "learning_rate": 4.924177309402213e-06, "loss": 0.5697, "step": 4410 }, { "epoch": 1.743294640652013, "grad_norm": 0.4473385143087953, "learning_rate": 4.9241390117112495e-06, "loss": 0.5642, "step": 4411 }, { "epoch": 1.7436897999506051, "grad_norm": 0.4387919180383067, "learning_rate": 4.92410070449974e-06, "loss": 0.5712, "step": 4412 }, { "epoch": 1.7440849592491974, "grad_norm": 0.4440101663736749, "learning_rate": 4.924062387767835e-06, "loss": 0.5767, "step": 4413 }, { "epoch": 1.7444801185477896, "grad_norm": 0.43157217680041976, "learning_rate": 4.924024061515684e-06, "loss": 0.5589, "step": 4414 }, { "epoch": 1.7448752778463819, "grad_norm": 0.4207925328836993, "learning_rate": 4.9239857257434395e-06, "loss": 0.5494, "step": 4415 }, { "epoch": 1.745270437144974, "grad_norm": 0.4343427301041821, "learning_rate": 4.923947380451252e-06, "loss": 0.5418, "step": 4416 }, { "epoch": 1.7456655964435663, "grad_norm": 0.43915247356661835, "learning_rate": 4.92390902563927e-06, "loss": 0.5457, "step": 4417 }, { "epoch": 1.7460607557421586, "grad_norm": 0.412423518922869, "learning_rate": 4.923870661307645e-06, "loss": 0.543, "step": 4418 }, { "epoch": 1.7464559150407508, "grad_norm": 0.4659475964234698, "learning_rate": 4.923832287456527e-06, "loss": 0.5727, "step": 4419 }, { "epoch": 1.746851074339343, "grad_norm": 0.4386733240756286, "learning_rate": 4.92379390408607e-06, "loss": 0.546, "step": 4420 }, { "epoch": 1.7472462336379353, "grad_norm": 0.4566302235906808, "learning_rate": 4.9237555111964204e-06, "loss": 0.577, "step": 4421 }, { "epoch": 1.7476413929365275, "grad_norm": 0.4544004401768208, "learning_rate": 4.923717108787731e-06, "loss": 0.5863, "step": 4422 }, { "epoch": 1.7480365522351198, "grad_norm": 0.4364008166512911, "learning_rate": 4.923678696860153e-06, "loss": 0.5588, "step": 4423 }, { "epoch": 1.748431711533712, "grad_norm": 0.44646539196245927, "learning_rate": 4.923640275413838e-06, "loss": 0.5797, "step": 4424 }, { "epoch": 1.7488268708323043, "grad_norm": 0.42595898093134776, "learning_rate": 4.923601844448934e-06, "loss": 0.5326, "step": 4425 }, { "epoch": 1.7492220301308965, "grad_norm": 0.43632414312178214, "learning_rate": 4.923563403965595e-06, "loss": 0.5477, "step": 4426 }, { "epoch": 1.7496171894294887, "grad_norm": 0.43161605146182297, "learning_rate": 4.923524953963969e-06, "loss": 0.5652, "step": 4427 }, { "epoch": 1.750012348728081, "grad_norm": 0.42112039961812653, "learning_rate": 4.923486494444209e-06, "loss": 0.5403, "step": 4428 }, { "epoch": 1.7504075080266732, "grad_norm": 0.4387403266260913, "learning_rate": 4.923448025406467e-06, "loss": 0.5646, "step": 4429 }, { "epoch": 1.7508026673252655, "grad_norm": 0.44195630371865374, "learning_rate": 4.923409546850891e-06, "loss": 0.5514, "step": 4430 }, { "epoch": 1.7511978266238577, "grad_norm": 0.44395804443010556, "learning_rate": 4.923371058777635e-06, "loss": 0.5645, "step": 4431 }, { "epoch": 1.75159298592245, "grad_norm": 0.4326703585981714, "learning_rate": 4.923332561186849e-06, "loss": 0.5616, "step": 4432 }, { "epoch": 1.7519881452210422, "grad_norm": 0.41917281291116726, "learning_rate": 4.923294054078684e-06, "loss": 0.5365, "step": 4433 }, { "epoch": 1.7523833045196344, "grad_norm": 0.43629197498616906, "learning_rate": 4.923255537453292e-06, "loss": 0.5715, "step": 4434 }, { "epoch": 1.7527784638182267, "grad_norm": 0.43248199576929525, "learning_rate": 4.923217011310823e-06, "loss": 0.5339, "step": 4435 }, { "epoch": 1.753173623116819, "grad_norm": 0.4796555742650671, "learning_rate": 4.923178475651429e-06, "loss": 0.5628, "step": 4436 }, { "epoch": 1.7535687824154111, "grad_norm": 0.44016438303089994, "learning_rate": 4.923139930475262e-06, "loss": 0.5796, "step": 4437 }, { "epoch": 1.7539639417140034, "grad_norm": 0.44061698759839063, "learning_rate": 4.923101375782472e-06, "loss": 0.5676, "step": 4438 }, { "epoch": 1.7543591010125956, "grad_norm": 0.43587278759955905, "learning_rate": 4.923062811573211e-06, "loss": 0.5731, "step": 4439 }, { "epoch": 1.7547542603111879, "grad_norm": 0.4324193780709019, "learning_rate": 4.9230242378476325e-06, "loss": 0.5662, "step": 4440 }, { "epoch": 1.75514941960978, "grad_norm": 0.4330363556293551, "learning_rate": 4.922985654605884e-06, "loss": 0.5526, "step": 4441 }, { "epoch": 1.7555445789083723, "grad_norm": 0.4314258458445518, "learning_rate": 4.922947061848121e-06, "loss": 0.5468, "step": 4442 }, { "epoch": 1.7559397382069646, "grad_norm": 0.4367345050157589, "learning_rate": 4.922908459574492e-06, "loss": 0.5573, "step": 4443 }, { "epoch": 1.7563348975055568, "grad_norm": 0.5054619145164584, "learning_rate": 4.92286984778515e-06, "loss": 0.5611, "step": 4444 }, { "epoch": 1.756730056804149, "grad_norm": 0.4244999054734717, "learning_rate": 4.922831226480247e-06, "loss": 0.5533, "step": 4445 }, { "epoch": 1.7571252161027413, "grad_norm": 0.4172564528587188, "learning_rate": 4.9227925956599336e-06, "loss": 0.5394, "step": 4446 }, { "epoch": 1.7575203754013335, "grad_norm": 0.4487356151355405, "learning_rate": 4.922753955324362e-06, "loss": 0.5593, "step": 4447 }, { "epoch": 1.7579155346999258, "grad_norm": 0.4471793604107619, "learning_rate": 4.922715305473684e-06, "loss": 0.545, "step": 4448 }, { "epoch": 1.758310693998518, "grad_norm": 0.42116771593933916, "learning_rate": 4.922676646108052e-06, "loss": 0.5565, "step": 4449 }, { "epoch": 1.7587058532971103, "grad_norm": 0.42986579453156515, "learning_rate": 4.9226379772276165e-06, "loss": 0.5542, "step": 4450 }, { "epoch": 1.7591010125957025, "grad_norm": 0.4247741467500481, "learning_rate": 4.922599298832531e-06, "loss": 0.5277, "step": 4451 }, { "epoch": 1.7594961718942947, "grad_norm": 0.43883975962836197, "learning_rate": 4.922560610922946e-06, "loss": 0.5554, "step": 4452 }, { "epoch": 1.759891331192887, "grad_norm": 0.4269542206063633, "learning_rate": 4.922521913499014e-06, "loss": 0.55, "step": 4453 }, { "epoch": 1.7602864904914792, "grad_norm": 0.44358761831837534, "learning_rate": 4.922483206560888e-06, "loss": 0.5681, "step": 4454 }, { "epoch": 1.7606816497900715, "grad_norm": 0.41524870259004626, "learning_rate": 4.9224444901087174e-06, "loss": 0.5287, "step": 4455 }, { "epoch": 1.7610768090886637, "grad_norm": 0.47449363146584195, "learning_rate": 4.922405764142656e-06, "loss": 0.5534, "step": 4456 }, { "epoch": 1.761471968387256, "grad_norm": 0.4274899599226474, "learning_rate": 4.9223670286628566e-06, "loss": 0.5513, "step": 4457 }, { "epoch": 1.7618671276858484, "grad_norm": 0.4229395462347126, "learning_rate": 4.92232828366947e-06, "loss": 0.5643, "step": 4458 }, { "epoch": 1.7622622869844407, "grad_norm": 0.43795229928660484, "learning_rate": 4.922289529162649e-06, "loss": 0.5577, "step": 4459 }, { "epoch": 1.762657446283033, "grad_norm": 0.4509336716051313, "learning_rate": 4.922250765142546e-06, "loss": 0.5369, "step": 4460 }, { "epoch": 1.7630526055816251, "grad_norm": 0.4963694155527903, "learning_rate": 4.9222119916093115e-06, "loss": 0.5729, "step": 4461 }, { "epoch": 1.7634477648802174, "grad_norm": 0.4311986192626347, "learning_rate": 4.9221732085631e-06, "loss": 0.5636, "step": 4462 }, { "epoch": 1.7638429241788096, "grad_norm": 0.4319021698000714, "learning_rate": 4.9221344160040626e-06, "loss": 0.554, "step": 4463 }, { "epoch": 1.7642380834774019, "grad_norm": 0.42976334810476374, "learning_rate": 4.922095613932353e-06, "loss": 0.5721, "step": 4464 }, { "epoch": 1.764633242775994, "grad_norm": 0.4348605034262508, "learning_rate": 4.922056802348122e-06, "loss": 0.5481, "step": 4465 }, { "epoch": 1.7650284020745863, "grad_norm": 0.4402318664595867, "learning_rate": 4.9220179812515226e-06, "loss": 0.5659, "step": 4466 }, { "epoch": 1.7654235613731786, "grad_norm": 0.43124398530041586, "learning_rate": 4.921979150642707e-06, "loss": 0.5737, "step": 4467 }, { "epoch": 1.7658187206717708, "grad_norm": 0.43936196155478946, "learning_rate": 4.921940310521828e-06, "loss": 0.5595, "step": 4468 }, { "epoch": 1.766213879970363, "grad_norm": 0.43223733140815723, "learning_rate": 4.921901460889039e-06, "loss": 0.5488, "step": 4469 }, { "epoch": 1.7666090392689553, "grad_norm": 0.43010489859753376, "learning_rate": 4.921862601744491e-06, "loss": 0.5597, "step": 4470 }, { "epoch": 1.7670041985675475, "grad_norm": 0.4258097789495025, "learning_rate": 4.9218237330883375e-06, "loss": 0.5548, "step": 4471 }, { "epoch": 1.7673993578661398, "grad_norm": 0.4441676656075857, "learning_rate": 4.921784854920731e-06, "loss": 0.5439, "step": 4472 }, { "epoch": 1.767794517164732, "grad_norm": 0.429127198914778, "learning_rate": 4.921745967241825e-06, "loss": 0.5624, "step": 4473 }, { "epoch": 1.7681896764633243, "grad_norm": 0.42973439941503405, "learning_rate": 4.921707070051769e-06, "loss": 0.5405, "step": 4474 }, { "epoch": 1.7685848357619165, "grad_norm": 0.4368917818588069, "learning_rate": 4.92166816335072e-06, "loss": 0.571, "step": 4475 }, { "epoch": 1.7689799950605087, "grad_norm": 0.4329313735901494, "learning_rate": 4.921629247138829e-06, "loss": 0.5425, "step": 4476 }, { "epoch": 1.769375154359101, "grad_norm": 0.4522903482231501, "learning_rate": 4.9215903214162485e-06, "loss": 0.5566, "step": 4477 }, { "epoch": 1.7697703136576932, "grad_norm": 0.4460656956099865, "learning_rate": 4.921551386183131e-06, "loss": 0.5558, "step": 4478 }, { "epoch": 1.7701654729562855, "grad_norm": 0.41968856516312, "learning_rate": 4.921512441439631e-06, "loss": 0.558, "step": 4479 }, { "epoch": 1.7705606322548777, "grad_norm": 0.4512166443513065, "learning_rate": 4.9214734871859e-06, "loss": 0.543, "step": 4480 }, { "epoch": 1.77095579155347, "grad_norm": 0.44040585068512283, "learning_rate": 4.921434523422093e-06, "loss": 0.5606, "step": 4481 }, { "epoch": 1.7713509508520624, "grad_norm": 0.41848653488225906, "learning_rate": 4.9213955501483605e-06, "loss": 0.5361, "step": 4482 }, { "epoch": 1.7717461101506546, "grad_norm": 0.43807590314563183, "learning_rate": 4.921356567364856e-06, "loss": 0.5314, "step": 4483 }, { "epoch": 1.7721412694492469, "grad_norm": 0.4700972659356998, "learning_rate": 4.921317575071733e-06, "loss": 0.5636, "step": 4484 }, { "epoch": 1.7725364287478391, "grad_norm": 0.45042036525685325, "learning_rate": 4.921278573269146e-06, "loss": 0.5544, "step": 4485 }, { "epoch": 1.7729315880464314, "grad_norm": 0.44614873313304904, "learning_rate": 4.9212395619572474e-06, "loss": 0.5693, "step": 4486 }, { "epoch": 1.7733267473450236, "grad_norm": 0.44860751788633424, "learning_rate": 4.92120054113619e-06, "loss": 0.558, "step": 4487 }, { "epoch": 1.7737219066436158, "grad_norm": 0.43151386401272274, "learning_rate": 4.921161510806125e-06, "loss": 0.5542, "step": 4488 }, { "epoch": 1.774117065942208, "grad_norm": 0.4356644343958426, "learning_rate": 4.92112247096721e-06, "loss": 0.5368, "step": 4489 }, { "epoch": 1.7745122252408003, "grad_norm": 0.4548142559509372, "learning_rate": 4.921083421619595e-06, "loss": 0.5736, "step": 4490 }, { "epoch": 1.7749073845393926, "grad_norm": 0.4459993625684455, "learning_rate": 4.921044362763436e-06, "loss": 0.557, "step": 4491 }, { "epoch": 1.7753025438379848, "grad_norm": 0.44551019969621913, "learning_rate": 4.921005294398883e-06, "loss": 0.5612, "step": 4492 }, { "epoch": 1.775697703136577, "grad_norm": 0.4534954772992248, "learning_rate": 4.9209662165260916e-06, "loss": 0.5764, "step": 4493 }, { "epoch": 1.7760928624351693, "grad_norm": 0.45806011580752654, "learning_rate": 4.9209271291452156e-06, "loss": 0.5565, "step": 4494 }, { "epoch": 1.7764880217337615, "grad_norm": 0.42599165598927907, "learning_rate": 4.920888032256408e-06, "loss": 0.5434, "step": 4495 }, { "epoch": 1.7768831810323538, "grad_norm": 0.427438042814662, "learning_rate": 4.920848925859822e-06, "loss": 0.5496, "step": 4496 }, { "epoch": 1.777278340330946, "grad_norm": 0.4584387507023162, "learning_rate": 4.9208098099556114e-06, "loss": 0.5712, "step": 4497 }, { "epoch": 1.7776734996295382, "grad_norm": 0.4355189310861454, "learning_rate": 4.920770684543929e-06, "loss": 0.5534, "step": 4498 }, { "epoch": 1.7780686589281305, "grad_norm": 0.43597371943223134, "learning_rate": 4.920731549624931e-06, "loss": 0.565, "step": 4499 }, { "epoch": 1.7784638182267227, "grad_norm": 0.4268466727675861, "learning_rate": 4.920692405198769e-06, "loss": 0.5426, "step": 4500 }, { "epoch": 1.778858977525315, "grad_norm": 0.4224690255424684, "learning_rate": 4.920653251265597e-06, "loss": 0.555, "step": 4501 }, { "epoch": 1.7792541368239072, "grad_norm": 0.42762282174827215, "learning_rate": 4.920614087825568e-06, "loss": 0.5525, "step": 4502 }, { "epoch": 1.7796492961224994, "grad_norm": 0.4290253808006352, "learning_rate": 4.9205749148788376e-06, "loss": 0.5723, "step": 4503 }, { "epoch": 1.7800444554210917, "grad_norm": 0.4316979082254998, "learning_rate": 4.920535732425559e-06, "loss": 0.555, "step": 4504 }, { "epoch": 1.780439614719684, "grad_norm": 0.438272777807001, "learning_rate": 4.920496540465885e-06, "loss": 0.5421, "step": 4505 }, { "epoch": 1.7808347740182762, "grad_norm": 0.455364594727949, "learning_rate": 4.920457338999971e-06, "loss": 0.5472, "step": 4506 }, { "epoch": 1.7812299333168684, "grad_norm": 0.44459923067197527, "learning_rate": 4.920418128027971e-06, "loss": 0.5707, "step": 4507 }, { "epoch": 1.7816250926154606, "grad_norm": 0.4417890387860095, "learning_rate": 4.920378907550037e-06, "loss": 0.5691, "step": 4508 }, { "epoch": 1.7820202519140529, "grad_norm": 0.44085319317841215, "learning_rate": 4.9203396775663245e-06, "loss": 0.5439, "step": 4509 }, { "epoch": 1.7824154112126451, "grad_norm": 0.45781235310847124, "learning_rate": 4.920300438076989e-06, "loss": 0.5535, "step": 4510 }, { "epoch": 1.7828105705112374, "grad_norm": 0.42751841581481953, "learning_rate": 4.9202611890821815e-06, "loss": 0.5404, "step": 4511 }, { "epoch": 1.7832057298098296, "grad_norm": 0.4341840586645018, "learning_rate": 4.920221930582059e-06, "loss": 0.5357, "step": 4512 }, { "epoch": 1.7836008891084218, "grad_norm": 0.44062665450398303, "learning_rate": 4.920182662576773e-06, "loss": 0.5533, "step": 4513 }, { "epoch": 1.783996048407014, "grad_norm": 0.4569945381667453, "learning_rate": 4.920143385066479e-06, "loss": 0.5605, "step": 4514 }, { "epoch": 1.7843912077056063, "grad_norm": 0.5311247214308134, "learning_rate": 4.920104098051333e-06, "loss": 0.5786, "step": 4515 }, { "epoch": 1.7847863670041986, "grad_norm": 0.46139739197081714, "learning_rate": 4.920064801531486e-06, "loss": 0.5773, "step": 4516 }, { "epoch": 1.7851815263027908, "grad_norm": 0.43510101422224146, "learning_rate": 4.920025495507095e-06, "loss": 0.5375, "step": 4517 }, { "epoch": 1.785576685601383, "grad_norm": 0.4299696722046226, "learning_rate": 4.919986179978313e-06, "loss": 0.5596, "step": 4518 }, { "epoch": 1.7859718448999753, "grad_norm": 0.4245007247792506, "learning_rate": 4.9199468549452956e-06, "loss": 0.5575, "step": 4519 }, { "epoch": 1.7863670041985675, "grad_norm": 0.427078611580864, "learning_rate": 4.919907520408196e-06, "loss": 0.5594, "step": 4520 }, { "epoch": 1.7867621634971598, "grad_norm": 0.43840491567979, "learning_rate": 4.919868176367168e-06, "loss": 0.5404, "step": 4521 }, { "epoch": 1.787157322795752, "grad_norm": 0.4569134416853893, "learning_rate": 4.919828822822369e-06, "loss": 0.5508, "step": 4522 }, { "epoch": 1.7875524820943443, "grad_norm": 0.43107835382000415, "learning_rate": 4.91978945977395e-06, "loss": 0.5594, "step": 4523 }, { "epoch": 1.7879476413929365, "grad_norm": 0.4361231870002236, "learning_rate": 4.919750087222068e-06, "loss": 0.5726, "step": 4524 }, { "epoch": 1.7883428006915287, "grad_norm": 0.4353192973361471, "learning_rate": 4.919710705166878e-06, "loss": 0.5641, "step": 4525 }, { "epoch": 1.788737959990121, "grad_norm": 0.4364332444547788, "learning_rate": 4.919671313608533e-06, "loss": 0.541, "step": 4526 }, { "epoch": 1.7891331192887132, "grad_norm": 0.43409123496491997, "learning_rate": 4.919631912547188e-06, "loss": 0.5687, "step": 4527 }, { "epoch": 1.7895282785873055, "grad_norm": 0.43446982982003957, "learning_rate": 4.919592501982998e-06, "loss": 0.5556, "step": 4528 }, { "epoch": 1.7899234378858977, "grad_norm": 0.4345438030521792, "learning_rate": 4.9195530819161185e-06, "loss": 0.5474, "step": 4529 }, { "epoch": 1.79031859718449, "grad_norm": 0.43436414603534473, "learning_rate": 4.919513652346704e-06, "loss": 0.5548, "step": 4530 }, { "epoch": 1.7907137564830822, "grad_norm": 0.4320792577761697, "learning_rate": 4.919474213274908e-06, "loss": 0.5293, "step": 4531 }, { "epoch": 1.7911089157816744, "grad_norm": 0.4417297782885469, "learning_rate": 4.919434764700888e-06, "loss": 0.5532, "step": 4532 }, { "epoch": 1.7915040750802667, "grad_norm": 0.4491898130529519, "learning_rate": 4.9193953066247965e-06, "loss": 0.5691, "step": 4533 }, { "epoch": 1.791899234378859, "grad_norm": 0.44010561319843694, "learning_rate": 4.919355839046789e-06, "loss": 0.5601, "step": 4534 }, { "epoch": 1.7922943936774511, "grad_norm": 0.4326683763258569, "learning_rate": 4.919316361967021e-06, "loss": 0.5601, "step": 4535 }, { "epoch": 1.7926895529760434, "grad_norm": 0.49880321858382415, "learning_rate": 4.919276875385648e-06, "loss": 0.5802, "step": 4536 }, { "epoch": 1.7930847122746356, "grad_norm": 0.4402605348668624, "learning_rate": 4.919237379302824e-06, "loss": 0.5442, "step": 4537 }, { "epoch": 1.7934798715732279, "grad_norm": 0.4487929856641043, "learning_rate": 4.919197873718705e-06, "loss": 0.5683, "step": 4538 }, { "epoch": 1.79387503087182, "grad_norm": 0.42534492592015277, "learning_rate": 4.919158358633445e-06, "loss": 0.5652, "step": 4539 }, { "epoch": 1.7942701901704123, "grad_norm": 0.4457639959321754, "learning_rate": 4.919118834047201e-06, "loss": 0.5658, "step": 4540 }, { "epoch": 1.7946653494690046, "grad_norm": 0.4541593885926761, "learning_rate": 4.919079299960127e-06, "loss": 0.5688, "step": 4541 }, { "epoch": 1.7950605087675968, "grad_norm": 0.42333628277611585, "learning_rate": 4.919039756372378e-06, "loss": 0.5474, "step": 4542 }, { "epoch": 1.795455668066189, "grad_norm": 0.42037667148139624, "learning_rate": 4.91900020328411e-06, "loss": 0.5578, "step": 4543 }, { "epoch": 1.7958508273647813, "grad_norm": 0.41437397951305344, "learning_rate": 4.918960640695478e-06, "loss": 0.5507, "step": 4544 }, { "epoch": 1.7962459866633735, "grad_norm": 0.43166169225760287, "learning_rate": 4.918921068606638e-06, "loss": 0.5518, "step": 4545 }, { "epoch": 1.7966411459619658, "grad_norm": 0.4327031422658885, "learning_rate": 4.9188814870177435e-06, "loss": 0.5559, "step": 4546 }, { "epoch": 1.797036305260558, "grad_norm": 0.43810822912426284, "learning_rate": 4.918841895928953e-06, "loss": 0.5538, "step": 4547 }, { "epoch": 1.7974314645591503, "grad_norm": 0.43509704083525685, "learning_rate": 4.918802295340419e-06, "loss": 0.5534, "step": 4548 }, { "epoch": 1.7978266238577425, "grad_norm": 0.4204249306753487, "learning_rate": 4.918762685252299e-06, "loss": 0.555, "step": 4549 }, { "epoch": 1.7982217831563347, "grad_norm": 0.429252379340873, "learning_rate": 4.918723065664747e-06, "loss": 0.5569, "step": 4550 }, { "epoch": 1.798616942454927, "grad_norm": 0.4222980081248242, "learning_rate": 4.918683436577921e-06, "loss": 0.5271, "step": 4551 }, { "epoch": 1.7990121017535192, "grad_norm": 0.4313877563692727, "learning_rate": 4.918643797991975e-06, "loss": 0.5702, "step": 4552 }, { "epoch": 1.7994072610521117, "grad_norm": 0.43938081010488067, "learning_rate": 4.918604149907064e-06, "loss": 0.5914, "step": 4553 }, { "epoch": 1.799802420350704, "grad_norm": 0.42842791710741546, "learning_rate": 4.918564492323346e-06, "loss": 0.5464, "step": 4554 }, { "epoch": 1.8001975796492962, "grad_norm": 0.4346701478393733, "learning_rate": 4.918524825240973e-06, "loss": 0.5426, "step": 4555 }, { "epoch": 1.8005927389478884, "grad_norm": 0.4300228304840291, "learning_rate": 4.918485148660105e-06, "loss": 0.5718, "step": 4556 }, { "epoch": 1.8009878982464806, "grad_norm": 0.4328620116394069, "learning_rate": 4.918445462580895e-06, "loss": 0.567, "step": 4557 }, { "epoch": 1.8013830575450729, "grad_norm": 0.43079626932416043, "learning_rate": 4.9184057670035e-06, "loss": 0.5518, "step": 4558 }, { "epoch": 1.8017782168436651, "grad_norm": 0.43706587631711175, "learning_rate": 4.918366061928076e-06, "loss": 0.5492, "step": 4559 }, { "epoch": 1.8021733761422574, "grad_norm": 0.4403528370210141, "learning_rate": 4.918326347354778e-06, "loss": 0.564, "step": 4560 }, { "epoch": 1.8025685354408496, "grad_norm": 0.4265423519618484, "learning_rate": 4.918286623283763e-06, "loss": 0.5441, "step": 4561 }, { "epoch": 1.8029636947394418, "grad_norm": 0.4497893586884997, "learning_rate": 4.918246889715186e-06, "loss": 0.5729, "step": 4562 }, { "epoch": 1.803358854038034, "grad_norm": 0.4394008615994604, "learning_rate": 4.918207146649204e-06, "loss": 0.5577, "step": 4563 }, { "epoch": 1.8037540133366263, "grad_norm": 0.4541079886766186, "learning_rate": 4.918167394085974e-06, "loss": 0.5629, "step": 4564 }, { "epoch": 1.8041491726352186, "grad_norm": 0.45506948243874085, "learning_rate": 4.91812763202565e-06, "loss": 0.5561, "step": 4565 }, { "epoch": 1.8045443319338108, "grad_norm": 0.4342638593346457, "learning_rate": 4.918087860468388e-06, "loss": 0.576, "step": 4566 }, { "epoch": 1.804939491232403, "grad_norm": 0.4340982482700696, "learning_rate": 4.918048079414346e-06, "loss": 0.5554, "step": 4567 }, { "epoch": 1.8053346505309953, "grad_norm": 0.4409861288626772, "learning_rate": 4.91800828886368e-06, "loss": 0.5726, "step": 4568 }, { "epoch": 1.8057298098295875, "grad_norm": 0.4322705316149436, "learning_rate": 4.917968488816545e-06, "loss": 0.5329, "step": 4569 }, { "epoch": 1.8061249691281798, "grad_norm": 0.4192848854350575, "learning_rate": 4.917928679273098e-06, "loss": 0.5427, "step": 4570 }, { "epoch": 1.806520128426772, "grad_norm": 0.44939852167500016, "learning_rate": 4.917888860233496e-06, "loss": 0.556, "step": 4571 }, { "epoch": 1.8069152877253642, "grad_norm": 0.5167355570733759, "learning_rate": 4.917849031697894e-06, "loss": 0.6005, "step": 4572 }, { "epoch": 1.8073104470239565, "grad_norm": 0.4403454466703087, "learning_rate": 4.91780919366645e-06, "loss": 0.5508, "step": 4573 }, { "epoch": 1.8077056063225487, "grad_norm": 0.4293181191267813, "learning_rate": 4.917769346139319e-06, "loss": 0.5607, "step": 4574 }, { "epoch": 1.808100765621141, "grad_norm": 0.4338122274539976, "learning_rate": 4.9177294891166585e-06, "loss": 0.5423, "step": 4575 }, { "epoch": 1.8084959249197334, "grad_norm": 0.43148032764038485, "learning_rate": 4.917689622598625e-06, "loss": 0.5718, "step": 4576 }, { "epoch": 1.8088910842183257, "grad_norm": 0.43602229802024295, "learning_rate": 4.917649746585374e-06, "loss": 0.5404, "step": 4577 }, { "epoch": 1.809286243516918, "grad_norm": 0.44355545237967436, "learning_rate": 4.917609861077064e-06, "loss": 0.5673, "step": 4578 }, { "epoch": 1.8096814028155102, "grad_norm": 0.45356140574084836, "learning_rate": 4.917569966073849e-06, "loss": 0.5466, "step": 4579 }, { "epoch": 1.8100765621141024, "grad_norm": 0.4133512710636827, "learning_rate": 4.917530061575888e-06, "loss": 0.5353, "step": 4580 }, { "epoch": 1.8104717214126946, "grad_norm": 0.4368743815868589, "learning_rate": 4.917490147583337e-06, "loss": 0.5711, "step": 4581 }, { "epoch": 1.8108668807112869, "grad_norm": 0.429475779733978, "learning_rate": 4.917450224096353e-06, "loss": 0.5663, "step": 4582 }, { "epoch": 1.8112620400098791, "grad_norm": 0.4489675045116446, "learning_rate": 4.917410291115092e-06, "loss": 0.569, "step": 4583 }, { "epoch": 1.8116571993084714, "grad_norm": 0.4253227142982292, "learning_rate": 4.917370348639712e-06, "loss": 0.5518, "step": 4584 }, { "epoch": 1.8120523586070636, "grad_norm": 0.43792903853943244, "learning_rate": 4.917330396670368e-06, "loss": 0.5547, "step": 4585 }, { "epoch": 1.8124475179056558, "grad_norm": 0.431983453096147, "learning_rate": 4.917290435207219e-06, "loss": 0.5622, "step": 4586 }, { "epoch": 1.812842677204248, "grad_norm": 0.4421445444609276, "learning_rate": 4.9172504642504204e-06, "loss": 0.5734, "step": 4587 }, { "epoch": 1.8132378365028403, "grad_norm": 0.4410353497824584, "learning_rate": 4.91721048380013e-06, "loss": 0.5692, "step": 4588 }, { "epoch": 1.8136329958014326, "grad_norm": 0.41201401996131587, "learning_rate": 4.917170493856504e-06, "loss": 0.5427, "step": 4589 }, { "epoch": 1.8140281551000248, "grad_norm": 0.4334126547698007, "learning_rate": 4.917130494419702e-06, "loss": 0.553, "step": 4590 }, { "epoch": 1.814423314398617, "grad_norm": 0.4336096435165992, "learning_rate": 4.917090485489877e-06, "loss": 0.5694, "step": 4591 }, { "epoch": 1.8148184736972093, "grad_norm": 0.43402803896434733, "learning_rate": 4.91705046706719e-06, "loss": 0.5671, "step": 4592 }, { "epoch": 1.8152136329958015, "grad_norm": 0.43863493460328706, "learning_rate": 4.917010439151796e-06, "loss": 0.5469, "step": 4593 }, { "epoch": 1.8156087922943938, "grad_norm": 0.4232411523985182, "learning_rate": 4.916970401743852e-06, "loss": 0.5397, "step": 4594 }, { "epoch": 1.816003951592986, "grad_norm": 0.41773834042402014, "learning_rate": 4.916930354843516e-06, "loss": 0.5595, "step": 4595 }, { "epoch": 1.8163991108915782, "grad_norm": 0.42847635907614967, "learning_rate": 4.9168902984509456e-06, "loss": 0.5675, "step": 4596 }, { "epoch": 1.8167942701901705, "grad_norm": 0.43702921400183176, "learning_rate": 4.9168502325662985e-06, "loss": 0.544, "step": 4597 }, { "epoch": 1.8171894294887627, "grad_norm": 0.4359050509194601, "learning_rate": 4.91681015718973e-06, "loss": 0.5531, "step": 4598 }, { "epoch": 1.817584588787355, "grad_norm": 0.4238189029244474, "learning_rate": 4.9167700723214e-06, "loss": 0.5222, "step": 4599 }, { "epoch": 1.8179797480859472, "grad_norm": 0.42884238541026204, "learning_rate": 4.916729977961463e-06, "loss": 0.5486, "step": 4600 }, { "epoch": 1.8183749073845394, "grad_norm": 0.42474360636234726, "learning_rate": 4.91668987411008e-06, "loss": 0.5553, "step": 4601 }, { "epoch": 1.8187700666831317, "grad_norm": 0.44168858012742196, "learning_rate": 4.916649760767405e-06, "loss": 0.5518, "step": 4602 }, { "epoch": 1.819165225981724, "grad_norm": 0.4556359965693222, "learning_rate": 4.916609637933598e-06, "loss": 0.5668, "step": 4603 }, { "epoch": 1.8195603852803162, "grad_norm": 0.43307847780187875, "learning_rate": 4.916569505608816e-06, "loss": 0.5498, "step": 4604 }, { "epoch": 1.8199555445789084, "grad_norm": 0.4311936492838269, "learning_rate": 4.916529363793216e-06, "loss": 0.554, "step": 4605 }, { "epoch": 1.8203507038775006, "grad_norm": 0.42973054647548137, "learning_rate": 4.916489212486956e-06, "loss": 0.565, "step": 4606 }, { "epoch": 1.8207458631760929, "grad_norm": 0.42836235280682294, "learning_rate": 4.916449051690194e-06, "loss": 0.5599, "step": 4607 }, { "epoch": 1.8211410224746851, "grad_norm": 0.41648067518333354, "learning_rate": 4.916408881403087e-06, "loss": 0.5499, "step": 4608 }, { "epoch": 1.8215361817732774, "grad_norm": 0.4389273257682643, "learning_rate": 4.916368701625795e-06, "loss": 0.5582, "step": 4609 }, { "epoch": 1.8219313410718696, "grad_norm": 0.4996066817560874, "learning_rate": 4.916328512358472e-06, "loss": 0.544, "step": 4610 }, { "epoch": 1.8223265003704618, "grad_norm": 0.4320671880478789, "learning_rate": 4.916288313601278e-06, "loss": 0.5684, "step": 4611 }, { "epoch": 1.822721659669054, "grad_norm": 0.47994501476908125, "learning_rate": 4.916248105354372e-06, "loss": 0.5627, "step": 4612 }, { "epoch": 1.8231168189676463, "grad_norm": 0.44725918814624716, "learning_rate": 4.91620788761791e-06, "loss": 0.5801, "step": 4613 }, { "epoch": 1.8235119782662386, "grad_norm": 0.41780605996678455, "learning_rate": 4.9161676603920505e-06, "loss": 0.5368, "step": 4614 }, { "epoch": 1.8239071375648308, "grad_norm": 0.42149016682401924, "learning_rate": 4.9161274236769516e-06, "loss": 0.5417, "step": 4615 }, { "epoch": 1.824302296863423, "grad_norm": 0.4159145131021071, "learning_rate": 4.916087177472771e-06, "loss": 0.5634, "step": 4616 }, { "epoch": 1.8246974561620153, "grad_norm": 0.427772247807, "learning_rate": 4.916046921779668e-06, "loss": 0.5597, "step": 4617 }, { "epoch": 1.8250926154606075, "grad_norm": 0.504175016285204, "learning_rate": 4.916006656597799e-06, "loss": 0.5534, "step": 4618 }, { "epoch": 1.8254877747591998, "grad_norm": 0.43930170796960316, "learning_rate": 4.915966381927324e-06, "loss": 0.5625, "step": 4619 }, { "epoch": 1.825882934057792, "grad_norm": 0.4317015969701484, "learning_rate": 4.9159260977683986e-06, "loss": 0.5535, "step": 4620 }, { "epoch": 1.8262780933563842, "grad_norm": 0.4319583379497269, "learning_rate": 4.915885804121184e-06, "loss": 0.5443, "step": 4621 }, { "epoch": 1.8266732526549765, "grad_norm": 0.46283454804644775, "learning_rate": 4.915845500985836e-06, "loss": 0.5798, "step": 4622 }, { "epoch": 1.8270684119535687, "grad_norm": 0.4578316028623526, "learning_rate": 4.915805188362514e-06, "loss": 0.5873, "step": 4623 }, { "epoch": 1.827463571252161, "grad_norm": 0.4380807378788219, "learning_rate": 4.915764866251376e-06, "loss": 0.5702, "step": 4624 }, { "epoch": 1.8278587305507532, "grad_norm": 0.4635611902517724, "learning_rate": 4.915724534652581e-06, "loss": 0.5676, "step": 4625 }, { "epoch": 1.8282538898493454, "grad_norm": 0.44336086993981816, "learning_rate": 4.915684193566287e-06, "loss": 0.571, "step": 4626 }, { "epoch": 1.8286490491479377, "grad_norm": 0.49804959145623445, "learning_rate": 4.915643842992652e-06, "loss": 0.5704, "step": 4627 }, { "epoch": 1.82904420844653, "grad_norm": 0.4484804351198767, "learning_rate": 4.915603482931835e-06, "loss": 0.5601, "step": 4628 }, { "epoch": 1.8294393677451222, "grad_norm": 0.4259382481340384, "learning_rate": 4.915563113383994e-06, "loss": 0.5414, "step": 4629 }, { "epoch": 1.8298345270437144, "grad_norm": 0.46410055417082385, "learning_rate": 4.915522734349289e-06, "loss": 0.5668, "step": 4630 }, { "epoch": 1.8302296863423066, "grad_norm": 0.4450378060474863, "learning_rate": 4.915482345827876e-06, "loss": 0.5625, "step": 4631 }, { "epoch": 1.830624845640899, "grad_norm": 0.4469682638808664, "learning_rate": 4.915441947819916e-06, "loss": 0.56, "step": 4632 }, { "epoch": 1.8310200049394911, "grad_norm": 0.4386874685983757, "learning_rate": 4.915401540325566e-06, "loss": 0.5763, "step": 4633 }, { "epoch": 1.8314151642380834, "grad_norm": 0.4386404133941846, "learning_rate": 4.9153611233449864e-06, "loss": 0.5513, "step": 4634 }, { "epoch": 1.8318103235366756, "grad_norm": 0.45589832428091503, "learning_rate": 4.915320696878335e-06, "loss": 0.5545, "step": 4635 }, { "epoch": 1.8322054828352679, "grad_norm": 0.4388397614681877, "learning_rate": 4.91528026092577e-06, "loss": 0.5369, "step": 4636 }, { "epoch": 1.83260064213386, "grad_norm": 0.4220869515007803, "learning_rate": 4.915239815487451e-06, "loss": 0.5536, "step": 4637 }, { "epoch": 1.8329958014324523, "grad_norm": 0.4516307562377459, "learning_rate": 4.915199360563536e-06, "loss": 0.5663, "step": 4638 }, { "epoch": 1.8333909607310446, "grad_norm": 0.43082783021013155, "learning_rate": 4.915158896154185e-06, "loss": 0.5711, "step": 4639 }, { "epoch": 1.8337861200296368, "grad_norm": 0.4280112586626305, "learning_rate": 4.915118422259557e-06, "loss": 0.5586, "step": 4640 }, { "epoch": 1.834181279328229, "grad_norm": 0.455862847226725, "learning_rate": 4.91507793887981e-06, "loss": 0.5567, "step": 4641 }, { "epoch": 1.8345764386268213, "grad_norm": 0.43432786099005233, "learning_rate": 4.915037446015103e-06, "loss": 0.5722, "step": 4642 }, { "epoch": 1.8349715979254135, "grad_norm": 0.45288339991619825, "learning_rate": 4.914996943665596e-06, "loss": 0.5548, "step": 4643 }, { "epoch": 1.8353667572240058, "grad_norm": 0.430304761849555, "learning_rate": 4.914956431831447e-06, "loss": 0.5569, "step": 4644 }, { "epoch": 1.835761916522598, "grad_norm": 0.4516766791005426, "learning_rate": 4.914915910512815e-06, "loss": 0.5439, "step": 4645 }, { "epoch": 1.8361570758211903, "grad_norm": 0.4262026911256271, "learning_rate": 4.914875379709861e-06, "loss": 0.5502, "step": 4646 }, { "epoch": 1.8365522351197827, "grad_norm": 0.44982296126025034, "learning_rate": 4.914834839422742e-06, "loss": 0.5753, "step": 4647 }, { "epoch": 1.836947394418375, "grad_norm": 0.4305160106289168, "learning_rate": 4.914794289651619e-06, "loss": 0.5476, "step": 4648 }, { "epoch": 1.8373425537169672, "grad_norm": 0.43698124283033885, "learning_rate": 4.91475373039665e-06, "loss": 0.5626, "step": 4649 }, { "epoch": 1.8377377130155594, "grad_norm": 0.4324559418236934, "learning_rate": 4.914713161657993e-06, "loss": 0.5465, "step": 4650 }, { "epoch": 1.8381328723141517, "grad_norm": 0.46505067847109016, "learning_rate": 4.914672583435811e-06, "loss": 0.5773, "step": 4651 }, { "epoch": 1.838528031612744, "grad_norm": 0.44054104719934584, "learning_rate": 4.9146319957302615e-06, "loss": 0.5462, "step": 4652 }, { "epoch": 1.8389231909113362, "grad_norm": 0.4293700652645379, "learning_rate": 4.914591398541503e-06, "loss": 0.5514, "step": 4653 }, { "epoch": 1.8393183502099284, "grad_norm": 0.45753087472514964, "learning_rate": 4.9145507918696956e-06, "loss": 0.5478, "step": 4654 }, { "epoch": 1.8397135095085206, "grad_norm": 0.4461916227350306, "learning_rate": 4.9145101757149994e-06, "loss": 0.5548, "step": 4655 }, { "epoch": 1.8401086688071129, "grad_norm": 0.4471190461133453, "learning_rate": 4.914469550077573e-06, "loss": 0.5522, "step": 4656 }, { "epoch": 1.8405038281057051, "grad_norm": 0.44384611797235896, "learning_rate": 4.914428914957576e-06, "loss": 0.572, "step": 4657 }, { "epoch": 1.8408989874042974, "grad_norm": 0.4574610337875189, "learning_rate": 4.9143882703551685e-06, "loss": 0.5625, "step": 4658 }, { "epoch": 1.8412941467028896, "grad_norm": 0.5048055930740011, "learning_rate": 4.914347616270511e-06, "loss": 0.5431, "step": 4659 }, { "epoch": 1.8416893060014818, "grad_norm": 0.4421724478683854, "learning_rate": 4.914306952703761e-06, "loss": 0.5472, "step": 4660 }, { "epoch": 1.842084465300074, "grad_norm": 0.43918097376339305, "learning_rate": 4.914266279655079e-06, "loss": 0.5677, "step": 4661 }, { "epoch": 1.8424796245986663, "grad_norm": 0.4518794942674701, "learning_rate": 4.914225597124626e-06, "loss": 0.5492, "step": 4662 }, { "epoch": 1.8428747838972586, "grad_norm": 0.47641842318354, "learning_rate": 4.9141849051125614e-06, "loss": 0.5769, "step": 4663 }, { "epoch": 1.8432699431958508, "grad_norm": 0.43242823970216165, "learning_rate": 4.9141442036190435e-06, "loss": 0.5474, "step": 4664 }, { "epoch": 1.843665102494443, "grad_norm": 0.43975219203201016, "learning_rate": 4.914103492644233e-06, "loss": 0.5428, "step": 4665 }, { "epoch": 1.8440602617930353, "grad_norm": 0.44273932980174646, "learning_rate": 4.91406277218829e-06, "loss": 0.5429, "step": 4666 }, { "epoch": 1.8444554210916275, "grad_norm": 0.4371689468098226, "learning_rate": 4.914022042251375e-06, "loss": 0.5443, "step": 4667 }, { "epoch": 1.8448505803902198, "grad_norm": 0.46091840391838906, "learning_rate": 4.9139813028336465e-06, "loss": 0.5623, "step": 4668 }, { "epoch": 1.845245739688812, "grad_norm": 0.4515022813696254, "learning_rate": 4.9139405539352655e-06, "loss": 0.5615, "step": 4669 }, { "epoch": 1.8456408989874042, "grad_norm": 0.44395923841481416, "learning_rate": 4.913899795556391e-06, "loss": 0.5688, "step": 4670 }, { "epoch": 1.8460360582859967, "grad_norm": 0.44117565523570207, "learning_rate": 4.913859027697185e-06, "loss": 0.5654, "step": 4671 }, { "epoch": 1.846431217584589, "grad_norm": 0.42429096303861064, "learning_rate": 4.913818250357807e-06, "loss": 0.5462, "step": 4672 }, { "epoch": 1.8468263768831812, "grad_norm": 0.44958055380119594, "learning_rate": 4.913777463538416e-06, "loss": 0.5686, "step": 4673 }, { "epoch": 1.8472215361817734, "grad_norm": 0.4443331190570723, "learning_rate": 4.913736667239173e-06, "loss": 0.5567, "step": 4674 }, { "epoch": 1.8476166954803657, "grad_norm": 0.42627047085078384, "learning_rate": 4.913695861460238e-06, "loss": 0.5426, "step": 4675 }, { "epoch": 1.848011854778958, "grad_norm": 0.4500558947718227, "learning_rate": 4.9136550462017716e-06, "loss": 0.5542, "step": 4676 }, { "epoch": 1.8484070140775501, "grad_norm": 0.4414303454674209, "learning_rate": 4.913614221463932e-06, "loss": 0.555, "step": 4677 }, { "epoch": 1.8488021733761424, "grad_norm": 0.43962204738708255, "learning_rate": 4.913573387246884e-06, "loss": 0.5704, "step": 4678 }, { "epoch": 1.8491973326747346, "grad_norm": 0.44664322555736324, "learning_rate": 4.9135325435507845e-06, "loss": 0.57, "step": 4679 }, { "epoch": 1.8495924919733269, "grad_norm": 0.4539113882483917, "learning_rate": 4.913491690375794e-06, "loss": 0.5679, "step": 4680 }, { "epoch": 1.849987651271919, "grad_norm": 0.4392523191394003, "learning_rate": 4.913450827722074e-06, "loss": 0.5388, "step": 4681 }, { "epoch": 1.8503828105705113, "grad_norm": 0.430956543329709, "learning_rate": 4.913409955589785e-06, "loss": 0.5727, "step": 4682 }, { "epoch": 1.8507779698691036, "grad_norm": 0.4509366979993098, "learning_rate": 4.9133690739790864e-06, "loss": 0.5503, "step": 4683 }, { "epoch": 1.8511731291676958, "grad_norm": 0.4412502685023789, "learning_rate": 4.91332818289014e-06, "loss": 0.542, "step": 4684 }, { "epoch": 1.851568288466288, "grad_norm": 0.4421900270354689, "learning_rate": 4.913287282323107e-06, "loss": 0.5562, "step": 4685 }, { "epoch": 1.8519634477648803, "grad_norm": 0.5017741843721322, "learning_rate": 4.913246372278145e-06, "loss": 0.5546, "step": 4686 }, { "epoch": 1.8523586070634726, "grad_norm": 0.42598558349235277, "learning_rate": 4.913205452755418e-06, "loss": 0.5565, "step": 4687 }, { "epoch": 1.8527537663620648, "grad_norm": 0.44801891691212664, "learning_rate": 4.913164523755085e-06, "loss": 0.5622, "step": 4688 }, { "epoch": 1.853148925660657, "grad_norm": 0.46504021620652847, "learning_rate": 4.9131235852773075e-06, "loss": 0.5486, "step": 4689 }, { "epoch": 1.8535440849592493, "grad_norm": 0.42392415227635827, "learning_rate": 4.913082637322245e-06, "loss": 0.5462, "step": 4690 }, { "epoch": 1.8539392442578415, "grad_norm": 0.43104217850617627, "learning_rate": 4.91304167989006e-06, "loss": 0.5406, "step": 4691 }, { "epoch": 1.8543344035564338, "grad_norm": 0.47167572808016, "learning_rate": 4.9130007129809135e-06, "loss": 0.5469, "step": 4692 }, { "epoch": 1.854729562855026, "grad_norm": 0.4747344328170053, "learning_rate": 4.912959736594963e-06, "loss": 0.5653, "step": 4693 }, { "epoch": 1.8551247221536182, "grad_norm": 0.4343101893171199, "learning_rate": 4.912918750732374e-06, "loss": 0.5373, "step": 4694 }, { "epoch": 1.8555198814522105, "grad_norm": 0.42499504251024883, "learning_rate": 4.9128777553933035e-06, "loss": 0.5499, "step": 4695 }, { "epoch": 1.8559150407508027, "grad_norm": 0.45857770081696003, "learning_rate": 4.9128367505779165e-06, "loss": 0.5665, "step": 4696 }, { "epoch": 1.856310200049395, "grad_norm": 0.47188633170732824, "learning_rate": 4.91279573628637e-06, "loss": 0.5745, "step": 4697 }, { "epoch": 1.8567053593479872, "grad_norm": 0.43004014361775683, "learning_rate": 4.912754712518828e-06, "loss": 0.5387, "step": 4698 }, { "epoch": 1.8571005186465794, "grad_norm": 0.450368047124033, "learning_rate": 4.912713679275451e-06, "loss": 0.551, "step": 4699 }, { "epoch": 1.8574956779451717, "grad_norm": 0.4561974358066812, "learning_rate": 4.912672636556398e-06, "loss": 0.5645, "step": 4700 }, { "epoch": 1.857890837243764, "grad_norm": 0.4327321080207485, "learning_rate": 4.912631584361833e-06, "loss": 0.5449, "step": 4701 }, { "epoch": 1.8582859965423562, "grad_norm": 0.42737763737038065, "learning_rate": 4.912590522691917e-06, "loss": 0.5381, "step": 4702 }, { "epoch": 1.8586811558409484, "grad_norm": 0.43781196017871354, "learning_rate": 4.912549451546809e-06, "loss": 0.5615, "step": 4703 }, { "epoch": 1.8590763151395406, "grad_norm": 0.43337841503621866, "learning_rate": 4.912508370926672e-06, "loss": 0.5569, "step": 4704 }, { "epoch": 1.8594714744381329, "grad_norm": 0.451977525541082, "learning_rate": 4.912467280831668e-06, "loss": 0.5494, "step": 4705 }, { "epoch": 1.8598666337367251, "grad_norm": 0.4728561044232118, "learning_rate": 4.9124261812619566e-06, "loss": 0.5483, "step": 4706 }, { "epoch": 1.8602617930353174, "grad_norm": 0.4415020307643117, "learning_rate": 4.9123850722177e-06, "loss": 0.548, "step": 4707 }, { "epoch": 1.8606569523339096, "grad_norm": 0.45173525627796496, "learning_rate": 4.912343953699061e-06, "loss": 0.5574, "step": 4708 }, { "epoch": 1.8610521116325018, "grad_norm": 0.4267764458634889, "learning_rate": 4.912302825706198e-06, "loss": 0.553, "step": 4709 }, { "epoch": 1.861447270931094, "grad_norm": 0.4357451577511683, "learning_rate": 4.912261688239275e-06, "loss": 0.5541, "step": 4710 }, { "epoch": 1.8618424302296863, "grad_norm": 0.46999902555719913, "learning_rate": 4.912220541298454e-06, "loss": 0.5777, "step": 4711 }, { "epoch": 1.8622375895282786, "grad_norm": 0.4574693196717579, "learning_rate": 4.912179384883894e-06, "loss": 0.5587, "step": 4712 }, { "epoch": 1.8626327488268708, "grad_norm": 0.44445542433033436, "learning_rate": 4.912138218995759e-06, "loss": 0.5649, "step": 4713 }, { "epoch": 1.863027908125463, "grad_norm": 0.4342553915239652, "learning_rate": 4.9120970436342095e-06, "loss": 0.562, "step": 4714 }, { "epoch": 1.8634230674240553, "grad_norm": 0.4543224773995171, "learning_rate": 4.912055858799407e-06, "loss": 0.5718, "step": 4715 }, { "epoch": 1.8638182267226475, "grad_norm": 0.42547741598029465, "learning_rate": 4.912014664491514e-06, "loss": 0.5439, "step": 4716 }, { "epoch": 1.8642133860212398, "grad_norm": 0.45619787933952977, "learning_rate": 4.911973460710692e-06, "loss": 0.5515, "step": 4717 }, { "epoch": 1.864608545319832, "grad_norm": 0.46059272985729816, "learning_rate": 4.911932247457104e-06, "loss": 0.5648, "step": 4718 }, { "epoch": 1.8650037046184242, "grad_norm": 0.44116740865007975, "learning_rate": 4.911891024730911e-06, "loss": 0.5574, "step": 4719 }, { "epoch": 1.8653988639170165, "grad_norm": 0.41870208648406154, "learning_rate": 4.9118497925322725e-06, "loss": 0.537, "step": 4720 }, { "epoch": 1.8657940232156087, "grad_norm": 0.5142452344989176, "learning_rate": 4.911808550861353e-06, "loss": 0.5673, "step": 4721 }, { "epoch": 1.866189182514201, "grad_norm": 0.4401669678300714, "learning_rate": 4.9117672997183155e-06, "loss": 0.5578, "step": 4722 }, { "epoch": 1.8665843418127932, "grad_norm": 0.4455903304729346, "learning_rate": 4.911726039103319e-06, "loss": 0.5608, "step": 4723 }, { "epoch": 1.8669795011113854, "grad_norm": 0.44233877100194063, "learning_rate": 4.911684769016528e-06, "loss": 0.5764, "step": 4724 }, { "epoch": 1.8673746604099777, "grad_norm": 0.43688108562586403, "learning_rate": 4.911643489458104e-06, "loss": 0.5526, "step": 4725 }, { "epoch": 1.86776981970857, "grad_norm": 0.4534632460594352, "learning_rate": 4.911602200428208e-06, "loss": 0.5542, "step": 4726 }, { "epoch": 1.8681649790071622, "grad_norm": 0.4803823461812875, "learning_rate": 4.911560901927003e-06, "loss": 0.5458, "step": 4727 }, { "epoch": 1.8685601383057544, "grad_norm": 0.4390720418788202, "learning_rate": 4.911519593954652e-06, "loss": 0.5585, "step": 4728 }, { "epoch": 1.8689552976043466, "grad_norm": 0.43151939301169534, "learning_rate": 4.9114782765113155e-06, "loss": 0.5717, "step": 4729 }, { "epoch": 1.8693504569029389, "grad_norm": 0.43397827245657605, "learning_rate": 4.911436949597157e-06, "loss": 0.5294, "step": 4730 }, { "epoch": 1.8697456162015311, "grad_norm": 0.4348332309375543, "learning_rate": 4.911395613212339e-06, "loss": 0.5472, "step": 4731 }, { "epoch": 1.8701407755001234, "grad_norm": 0.4244300368938872, "learning_rate": 4.911354267357022e-06, "loss": 0.565, "step": 4732 }, { "epoch": 1.8705359347987156, "grad_norm": 0.4353867070970591, "learning_rate": 4.911312912031371e-06, "loss": 0.5642, "step": 4733 }, { "epoch": 1.8709310940973078, "grad_norm": 0.42839969001511663, "learning_rate": 4.9112715472355464e-06, "loss": 0.5475, "step": 4734 }, { "epoch": 1.8713262533959, "grad_norm": 0.44110960690425427, "learning_rate": 4.911230172969711e-06, "loss": 0.5614, "step": 4735 }, { "epoch": 1.8717214126944923, "grad_norm": 0.4995723449596207, "learning_rate": 4.911188789234028e-06, "loss": 0.5662, "step": 4736 }, { "epoch": 1.8721165719930846, "grad_norm": 0.423171830947674, "learning_rate": 4.91114739602866e-06, "loss": 0.5564, "step": 4737 }, { "epoch": 1.8725117312916768, "grad_norm": 0.44108599595757897, "learning_rate": 4.911105993353769e-06, "loss": 0.5664, "step": 4738 }, { "epoch": 1.872906890590269, "grad_norm": 0.4502796994319581, "learning_rate": 4.9110645812095174e-06, "loss": 0.5572, "step": 4739 }, { "epoch": 1.8733020498888613, "grad_norm": 0.4245422544885071, "learning_rate": 4.911023159596069e-06, "loss": 0.538, "step": 4740 }, { "epoch": 1.8736972091874535, "grad_norm": 0.41908462248992445, "learning_rate": 4.910981728513586e-06, "loss": 0.5383, "step": 4741 }, { "epoch": 1.874092368486046, "grad_norm": 0.4232818477813306, "learning_rate": 4.910940287962229e-06, "loss": 0.5348, "step": 4742 }, { "epoch": 1.8744875277846382, "grad_norm": 0.43217070766658694, "learning_rate": 4.910898837942163e-06, "loss": 0.5527, "step": 4743 }, { "epoch": 1.8748826870832305, "grad_norm": 0.4312801007800965, "learning_rate": 4.9108573784535515e-06, "loss": 0.5627, "step": 4744 }, { "epoch": 1.8752778463818227, "grad_norm": 0.44318569935703905, "learning_rate": 4.910815909496555e-06, "loss": 0.5666, "step": 4745 }, { "epoch": 1.875673005680415, "grad_norm": 0.4264547180929095, "learning_rate": 4.910774431071338e-06, "loss": 0.5566, "step": 4746 }, { "epoch": 1.8760681649790072, "grad_norm": 0.43447024061939904, "learning_rate": 4.910732943178063e-06, "loss": 0.5388, "step": 4747 }, { "epoch": 1.8764633242775994, "grad_norm": 0.44201190438269433, "learning_rate": 4.9106914458168934e-06, "loss": 0.537, "step": 4748 }, { "epoch": 1.8768584835761917, "grad_norm": 0.43667832935964324, "learning_rate": 4.91064993898799e-06, "loss": 0.554, "step": 4749 }, { "epoch": 1.877253642874784, "grad_norm": 0.4332594072357512, "learning_rate": 4.910608422691519e-06, "loss": 0.5458, "step": 4750 }, { "epoch": 1.8776488021733762, "grad_norm": 0.4281199350474413, "learning_rate": 4.910566896927642e-06, "loss": 0.5343, "step": 4751 }, { "epoch": 1.8780439614719684, "grad_norm": 0.4300151216135207, "learning_rate": 4.910525361696521e-06, "loss": 0.5442, "step": 4752 }, { "epoch": 1.8784391207705606, "grad_norm": 0.4448357946018024, "learning_rate": 4.91048381699832e-06, "loss": 0.5608, "step": 4753 }, { "epoch": 1.8788342800691529, "grad_norm": 0.43367677911395175, "learning_rate": 4.910442262833204e-06, "loss": 0.5497, "step": 4754 }, { "epoch": 1.8792294393677451, "grad_norm": 0.4307967775333535, "learning_rate": 4.9104006992013335e-06, "loss": 0.5522, "step": 4755 }, { "epoch": 1.8796245986663374, "grad_norm": 0.44915265676312033, "learning_rate": 4.910359126102872e-06, "loss": 0.5541, "step": 4756 }, { "epoch": 1.8800197579649296, "grad_norm": 0.4513270587407442, "learning_rate": 4.910317543537984e-06, "loss": 0.5704, "step": 4757 }, { "epoch": 1.8804149172635218, "grad_norm": 0.42836930222487574, "learning_rate": 4.910275951506832e-06, "loss": 0.5455, "step": 4758 }, { "epoch": 1.880810076562114, "grad_norm": 0.4200332636052868, "learning_rate": 4.91023435000958e-06, "loss": 0.5493, "step": 4759 }, { "epoch": 1.8812052358607063, "grad_norm": 0.4322816077163206, "learning_rate": 4.910192739046392e-06, "loss": 0.5828, "step": 4760 }, { "epoch": 1.8816003951592986, "grad_norm": 0.42914581453180367, "learning_rate": 4.910151118617429e-06, "loss": 0.5444, "step": 4761 }, { "epoch": 1.8819955544578908, "grad_norm": 0.4164751887870663, "learning_rate": 4.910109488722857e-06, "loss": 0.5345, "step": 4762 }, { "epoch": 1.882390713756483, "grad_norm": 0.4231358763864964, "learning_rate": 4.910067849362838e-06, "loss": 0.5464, "step": 4763 }, { "epoch": 1.8827858730550753, "grad_norm": 0.4289620642079432, "learning_rate": 4.910026200537535e-06, "loss": 0.5629, "step": 4764 }, { "epoch": 1.8831810323536677, "grad_norm": 0.4296872173261681, "learning_rate": 4.909984542247115e-06, "loss": 0.5323, "step": 4765 }, { "epoch": 1.88357619165226, "grad_norm": 0.4405891662125567, "learning_rate": 4.909942874491736e-06, "loss": 0.5581, "step": 4766 }, { "epoch": 1.8839713509508522, "grad_norm": 0.49421695499827306, "learning_rate": 4.9099011972715674e-06, "loss": 0.561, "step": 4767 }, { "epoch": 1.8843665102494445, "grad_norm": 0.44152259881922284, "learning_rate": 4.909859510586769e-06, "loss": 0.55, "step": 4768 }, { "epoch": 1.8847616695480367, "grad_norm": 0.5509522024405635, "learning_rate": 4.909817814437506e-06, "loss": 0.5424, "step": 4769 }, { "epoch": 1.885156828846629, "grad_norm": 0.42019717906356363, "learning_rate": 4.909776108823941e-06, "loss": 0.5529, "step": 4770 }, { "epoch": 1.8855519881452212, "grad_norm": 0.42580576090703337, "learning_rate": 4.909734393746241e-06, "loss": 0.5465, "step": 4771 }, { "epoch": 1.8859471474438134, "grad_norm": 0.429426688268032, "learning_rate": 4.909692669204565e-06, "loss": 0.5518, "step": 4772 }, { "epoch": 1.8863423067424057, "grad_norm": 0.4597075112764053, "learning_rate": 4.909650935199082e-06, "loss": 0.593, "step": 4773 }, { "epoch": 1.886737466040998, "grad_norm": 0.4231583750958899, "learning_rate": 4.909609191729951e-06, "loss": 0.5623, "step": 4774 }, { "epoch": 1.8871326253395901, "grad_norm": 0.42211619397536165, "learning_rate": 4.90956743879734e-06, "loss": 0.5507, "step": 4775 }, { "epoch": 1.8875277846381824, "grad_norm": 0.42252973826533957, "learning_rate": 4.90952567640141e-06, "loss": 0.5614, "step": 4776 }, { "epoch": 1.8879229439367746, "grad_norm": 0.4531797772700644, "learning_rate": 4.909483904542327e-06, "loss": 0.5533, "step": 4777 }, { "epoch": 1.8883181032353669, "grad_norm": 0.4376214503057026, "learning_rate": 4.909442123220255e-06, "loss": 0.5564, "step": 4778 }, { "epoch": 1.888713262533959, "grad_norm": 0.43083585059658475, "learning_rate": 4.909400332435357e-06, "loss": 0.5523, "step": 4779 }, { "epoch": 1.8891084218325513, "grad_norm": 0.41258509304143803, "learning_rate": 4.909358532187796e-06, "loss": 0.5389, "step": 4780 }, { "epoch": 1.8895035811311436, "grad_norm": 0.4343631923478858, "learning_rate": 4.909316722477739e-06, "loss": 0.5707, "step": 4781 }, { "epoch": 1.8898987404297358, "grad_norm": 0.4432077662590313, "learning_rate": 4.909274903305349e-06, "loss": 0.5461, "step": 4782 }, { "epoch": 1.890293899728328, "grad_norm": 0.43233155649566796, "learning_rate": 4.909233074670791e-06, "loss": 0.568, "step": 4783 }, { "epoch": 1.8906890590269203, "grad_norm": 0.42852851485106386, "learning_rate": 4.909191236574227e-06, "loss": 0.5621, "step": 4784 }, { "epoch": 1.8910842183255125, "grad_norm": 0.4322477381113751, "learning_rate": 4.909149389015823e-06, "loss": 0.5531, "step": 4785 }, { "epoch": 1.8914793776241048, "grad_norm": 0.43499368654441956, "learning_rate": 4.909107531995744e-06, "loss": 0.5732, "step": 4786 }, { "epoch": 1.891874536922697, "grad_norm": 0.43733806705560363, "learning_rate": 4.909065665514152e-06, "loss": 0.5763, "step": 4787 }, { "epoch": 1.8922696962212893, "grad_norm": 0.45421207033142263, "learning_rate": 4.909023789571214e-06, "loss": 0.5762, "step": 4788 }, { "epoch": 1.8926648555198815, "grad_norm": 0.47864113189844826, "learning_rate": 4.908981904167094e-06, "loss": 0.5378, "step": 4789 }, { "epoch": 1.8930600148184737, "grad_norm": 0.4498106966078856, "learning_rate": 4.908940009301955e-06, "loss": 0.554, "step": 4790 }, { "epoch": 1.893455174117066, "grad_norm": 0.47655100568930403, "learning_rate": 4.908898104975962e-06, "loss": 0.5498, "step": 4791 }, { "epoch": 1.8938503334156582, "grad_norm": 0.43699221579300107, "learning_rate": 4.908856191189281e-06, "loss": 0.5803, "step": 4792 }, { "epoch": 1.8942454927142505, "grad_norm": 0.44031567020576257, "learning_rate": 4.908814267942075e-06, "loss": 0.5603, "step": 4793 }, { "epoch": 1.8946406520128427, "grad_norm": 0.4381512496766988, "learning_rate": 4.908772335234509e-06, "loss": 0.56, "step": 4794 }, { "epoch": 1.895035811311435, "grad_norm": 0.4387758165017901, "learning_rate": 4.9087303930667485e-06, "loss": 0.5642, "step": 4795 }, { "epoch": 1.8954309706100272, "grad_norm": 0.4239025821881657, "learning_rate": 4.908688441438957e-06, "loss": 0.5522, "step": 4796 }, { "epoch": 1.8958261299086194, "grad_norm": 0.4273577795828686, "learning_rate": 4.908646480351301e-06, "loss": 0.5502, "step": 4797 }, { "epoch": 1.8962212892072117, "grad_norm": 0.44421556427677866, "learning_rate": 4.908604509803944e-06, "loss": 0.5495, "step": 4798 }, { "epoch": 1.896616448505804, "grad_norm": 0.44492416211462005, "learning_rate": 4.908562529797051e-06, "loss": 0.5577, "step": 4799 }, { "epoch": 1.8970116078043961, "grad_norm": 0.4388278234952724, "learning_rate": 4.908520540330786e-06, "loss": 0.5615, "step": 4800 }, { "epoch": 1.8974067671029884, "grad_norm": 0.43228612343838674, "learning_rate": 4.908478541405316e-06, "loss": 0.5467, "step": 4801 }, { "epoch": 1.8978019264015806, "grad_norm": 0.4309700517194293, "learning_rate": 4.908436533020804e-06, "loss": 0.5321, "step": 4802 }, { "epoch": 1.8981970857001729, "grad_norm": 0.4281007407461444, "learning_rate": 4.908394515177416e-06, "loss": 0.5356, "step": 4803 }, { "epoch": 1.898592244998765, "grad_norm": 0.4323019441412228, "learning_rate": 4.908352487875317e-06, "loss": 0.5679, "step": 4804 }, { "epoch": 1.8989874042973574, "grad_norm": 0.4403836139433748, "learning_rate": 4.908310451114672e-06, "loss": 0.5662, "step": 4805 }, { "epoch": 1.8993825635959496, "grad_norm": 0.44165797245282984, "learning_rate": 4.908268404895645e-06, "loss": 0.543, "step": 4806 }, { "epoch": 1.8997777228945418, "grad_norm": 0.4437790299847047, "learning_rate": 4.908226349218404e-06, "loss": 0.5511, "step": 4807 }, { "epoch": 1.900172882193134, "grad_norm": 0.44724367559807077, "learning_rate": 4.908184284083111e-06, "loss": 0.5635, "step": 4808 }, { "epoch": 1.9005680414917263, "grad_norm": 0.4365212081127553, "learning_rate": 4.908142209489932e-06, "loss": 0.5542, "step": 4809 }, { "epoch": 1.9009632007903186, "grad_norm": 0.45163950732319247, "learning_rate": 4.908100125439033e-06, "loss": 0.5627, "step": 4810 }, { "epoch": 1.9013583600889108, "grad_norm": 0.4316244913107137, "learning_rate": 4.90805803193058e-06, "loss": 0.5498, "step": 4811 }, { "epoch": 1.901753519387503, "grad_norm": 0.445880369848173, "learning_rate": 4.908015928964735e-06, "loss": 0.5565, "step": 4812 }, { "epoch": 1.9021486786860953, "grad_norm": 0.4344296261072089, "learning_rate": 4.9079738165416676e-06, "loss": 0.5358, "step": 4813 }, { "epoch": 1.9025438379846875, "grad_norm": 0.4352424451727997, "learning_rate": 4.907931694661541e-06, "loss": 0.5495, "step": 4814 }, { "epoch": 1.9029389972832798, "grad_norm": 0.4200141642812343, "learning_rate": 4.907889563324521e-06, "loss": 0.5529, "step": 4815 }, { "epoch": 1.903334156581872, "grad_norm": 0.42328744956338943, "learning_rate": 4.907847422530773e-06, "loss": 0.5408, "step": 4816 }, { "epoch": 1.9037293158804642, "grad_norm": 0.43808904720068553, "learning_rate": 4.907805272280461e-06, "loss": 0.5654, "step": 4817 }, { "epoch": 1.9041244751790565, "grad_norm": 0.4345122275007242, "learning_rate": 4.907763112573754e-06, "loss": 0.5558, "step": 4818 }, { "epoch": 1.9045196344776487, "grad_norm": 0.41639788021854496, "learning_rate": 4.907720943410814e-06, "loss": 0.5506, "step": 4819 }, { "epoch": 1.904914793776241, "grad_norm": 0.42802462554370857, "learning_rate": 4.90767876479181e-06, "loss": 0.5433, "step": 4820 }, { "epoch": 1.9053099530748332, "grad_norm": 0.43712860812027704, "learning_rate": 4.907636576716904e-06, "loss": 0.5482, "step": 4821 }, { "epoch": 1.9057051123734254, "grad_norm": 0.4442981118293308, "learning_rate": 4.9075943791862645e-06, "loss": 0.5516, "step": 4822 }, { "epoch": 1.9061002716720177, "grad_norm": 0.42923155588906153, "learning_rate": 4.907552172200056e-06, "loss": 0.5646, "step": 4823 }, { "epoch": 1.90649543097061, "grad_norm": 0.4175409742504318, "learning_rate": 4.907509955758444e-06, "loss": 0.5476, "step": 4824 }, { "epoch": 1.9068905902692022, "grad_norm": 0.43260553320884426, "learning_rate": 4.907467729861595e-06, "loss": 0.5576, "step": 4825 }, { "epoch": 1.9072857495677944, "grad_norm": 0.4789723482405854, "learning_rate": 4.907425494509675e-06, "loss": 0.5863, "step": 4826 }, { "epoch": 1.9076809088663866, "grad_norm": 0.43256077905417156, "learning_rate": 4.90738324970285e-06, "loss": 0.5648, "step": 4827 }, { "epoch": 1.9080760681649789, "grad_norm": 0.4301753052479235, "learning_rate": 4.907340995441284e-06, "loss": 0.5765, "step": 4828 }, { "epoch": 1.9084712274635711, "grad_norm": 0.4262264166103991, "learning_rate": 4.907298731725146e-06, "loss": 0.549, "step": 4829 }, { "epoch": 1.9088663867621634, "grad_norm": 0.41997042490994213, "learning_rate": 4.9072564585546e-06, "loss": 0.558, "step": 4830 }, { "epoch": 1.9092615460607556, "grad_norm": 0.4364550869207615, "learning_rate": 4.9072141759298114e-06, "loss": 0.5643, "step": 4831 }, { "epoch": 1.9096567053593478, "grad_norm": 0.42201288173328566, "learning_rate": 4.907171883850948e-06, "loss": 0.534, "step": 4832 }, { "epoch": 1.91005186465794, "grad_norm": 0.4417197054389254, "learning_rate": 4.907129582318175e-06, "loss": 0.5823, "step": 4833 }, { "epoch": 1.9104470239565323, "grad_norm": 0.43489230519142597, "learning_rate": 4.907087271331658e-06, "loss": 0.5634, "step": 4834 }, { "epoch": 1.9108421832551246, "grad_norm": 0.4273968780202027, "learning_rate": 4.907044950891565e-06, "loss": 0.5668, "step": 4835 }, { "epoch": 1.911237342553717, "grad_norm": 0.42553680975953273, "learning_rate": 4.907002620998061e-06, "loss": 0.5562, "step": 4836 }, { "epoch": 1.9116325018523093, "grad_norm": 0.45057537156888056, "learning_rate": 4.906960281651312e-06, "loss": 0.5906, "step": 4837 }, { "epoch": 1.9120276611509015, "grad_norm": 0.43561521398581576, "learning_rate": 4.906917932851484e-06, "loss": 0.5673, "step": 4838 }, { "epoch": 1.9124228204494937, "grad_norm": 0.41696070212722824, "learning_rate": 4.906875574598745e-06, "loss": 0.545, "step": 4839 }, { "epoch": 1.912817979748086, "grad_norm": 0.4542669385242667, "learning_rate": 4.90683320689326e-06, "loss": 0.5732, "step": 4840 }, { "epoch": 1.9132131390466782, "grad_norm": 0.42925333709930363, "learning_rate": 4.906790829735195e-06, "loss": 0.5553, "step": 4841 }, { "epoch": 1.9136082983452705, "grad_norm": 0.4189953420798717, "learning_rate": 4.906748443124718e-06, "loss": 0.5412, "step": 4842 }, { "epoch": 1.9140034576438627, "grad_norm": 0.43842151872555235, "learning_rate": 4.906706047061994e-06, "loss": 0.5842, "step": 4843 }, { "epoch": 1.914398616942455, "grad_norm": 0.43303434492630205, "learning_rate": 4.906663641547191e-06, "loss": 0.5446, "step": 4844 }, { "epoch": 1.9147937762410472, "grad_norm": 0.44750651700901584, "learning_rate": 4.906621226580473e-06, "loss": 0.5565, "step": 4845 }, { "epoch": 1.9151889355396394, "grad_norm": 0.43669244850265104, "learning_rate": 4.906578802162008e-06, "loss": 0.5539, "step": 4846 }, { "epoch": 1.9155840948382317, "grad_norm": 0.42310364393737065, "learning_rate": 4.906536368291964e-06, "loss": 0.5537, "step": 4847 }, { "epoch": 1.915979254136824, "grad_norm": 0.41308731234212065, "learning_rate": 4.9064939249705066e-06, "loss": 0.5426, "step": 4848 }, { "epoch": 1.9163744134354161, "grad_norm": 0.4284823800137259, "learning_rate": 4.906451472197802e-06, "loss": 0.5337, "step": 4849 }, { "epoch": 1.9167695727340084, "grad_norm": 0.44109819338095785, "learning_rate": 4.906409009974018e-06, "loss": 0.5352, "step": 4850 }, { "epoch": 1.9171647320326006, "grad_norm": 0.43268349819650925, "learning_rate": 4.90636653829932e-06, "loss": 0.5475, "step": 4851 }, { "epoch": 1.9175598913311929, "grad_norm": 0.430803043302547, "learning_rate": 4.906324057173875e-06, "loss": 0.5426, "step": 4852 }, { "epoch": 1.917955050629785, "grad_norm": 0.4266825335162365, "learning_rate": 4.9062815665978504e-06, "loss": 0.5547, "step": 4853 }, { "epoch": 1.9183502099283773, "grad_norm": 0.4498735174649945, "learning_rate": 4.906239066571413e-06, "loss": 0.5621, "step": 4854 }, { "epoch": 1.9187453692269696, "grad_norm": 0.4363381817986835, "learning_rate": 4.90619655709473e-06, "loss": 0.5578, "step": 4855 }, { "epoch": 1.9191405285255618, "grad_norm": 0.4320421517760889, "learning_rate": 4.906154038167968e-06, "loss": 0.5516, "step": 4856 }, { "epoch": 1.919535687824154, "grad_norm": 0.4377025215430826, "learning_rate": 4.9061115097912944e-06, "loss": 0.5528, "step": 4857 }, { "epoch": 1.9199308471227463, "grad_norm": 0.45155283623556963, "learning_rate": 4.906068971964876e-06, "loss": 0.5677, "step": 4858 }, { "epoch": 1.9203260064213385, "grad_norm": 0.4400211424480233, "learning_rate": 4.906026424688879e-06, "loss": 0.562, "step": 4859 }, { "epoch": 1.920721165719931, "grad_norm": 0.4343248105860274, "learning_rate": 4.905983867963472e-06, "loss": 0.5668, "step": 4860 }, { "epoch": 1.9211163250185233, "grad_norm": 0.4353078976406567, "learning_rate": 4.905941301788821e-06, "loss": 0.5536, "step": 4861 }, { "epoch": 1.9215114843171155, "grad_norm": 0.44191754785521875, "learning_rate": 4.905898726165093e-06, "loss": 0.5397, "step": 4862 }, { "epoch": 1.9219066436157077, "grad_norm": 0.4319364852789675, "learning_rate": 4.905856141092457e-06, "loss": 0.5535, "step": 4863 }, { "epoch": 1.9223018029143, "grad_norm": 0.4303876762850279, "learning_rate": 4.9058135465710776e-06, "loss": 0.5388, "step": 4864 }, { "epoch": 1.9226969622128922, "grad_norm": 0.4264199761833403, "learning_rate": 4.9057709426011236e-06, "loss": 0.5561, "step": 4865 }, { "epoch": 1.9230921215114845, "grad_norm": 0.44314277804736335, "learning_rate": 4.905728329182763e-06, "loss": 0.5609, "step": 4866 }, { "epoch": 1.9234872808100767, "grad_norm": 0.4404975868482517, "learning_rate": 4.905685706316162e-06, "loss": 0.5404, "step": 4867 }, { "epoch": 1.923882440108669, "grad_norm": 0.45402270420659185, "learning_rate": 4.9056430740014885e-06, "loss": 0.5515, "step": 4868 }, { "epoch": 1.9242775994072612, "grad_norm": 0.43115642120987685, "learning_rate": 4.90560043223891e-06, "loss": 0.5809, "step": 4869 }, { "epoch": 1.9246727587058534, "grad_norm": 0.4508455280612912, "learning_rate": 4.905557781028593e-06, "loss": 0.5532, "step": 4870 }, { "epoch": 1.9250679180044457, "grad_norm": 0.42373614920434, "learning_rate": 4.905515120370706e-06, "loss": 0.5552, "step": 4871 }, { "epoch": 1.925463077303038, "grad_norm": 0.5279871782928506, "learning_rate": 4.905472450265416e-06, "loss": 0.5716, "step": 4872 }, { "epoch": 1.9258582366016301, "grad_norm": 0.42805135252978943, "learning_rate": 4.905429770712892e-06, "loss": 0.5521, "step": 4873 }, { "epoch": 1.9262533959002224, "grad_norm": 0.4368318688771723, "learning_rate": 4.9053870817133e-06, "loss": 0.5486, "step": 4874 }, { "epoch": 1.9266485551988146, "grad_norm": 0.44355571797903176, "learning_rate": 4.905344383266808e-06, "loss": 0.5667, "step": 4875 }, { "epoch": 1.9270437144974069, "grad_norm": 0.42560534528870747, "learning_rate": 4.9053016753735836e-06, "loss": 0.5561, "step": 4876 }, { "epoch": 1.927438873795999, "grad_norm": 0.43288235759183613, "learning_rate": 4.905258958033795e-06, "loss": 0.5374, "step": 4877 }, { "epoch": 1.9278340330945913, "grad_norm": 0.42388914415273304, "learning_rate": 4.90521623124761e-06, "loss": 0.5719, "step": 4878 }, { "epoch": 1.9282291923931836, "grad_norm": 0.4261428683430966, "learning_rate": 4.905173495015196e-06, "loss": 0.5525, "step": 4879 }, { "epoch": 1.9286243516917758, "grad_norm": 0.4238364660888561, "learning_rate": 4.9051307493367205e-06, "loss": 0.5456, "step": 4880 }, { "epoch": 1.929019510990368, "grad_norm": 0.43787334764811864, "learning_rate": 4.905087994212353e-06, "loss": 0.5666, "step": 4881 }, { "epoch": 1.9294146702889603, "grad_norm": 0.41851611764846186, "learning_rate": 4.9050452296422595e-06, "loss": 0.5355, "step": 4882 }, { "epoch": 1.9298098295875525, "grad_norm": 0.4221693340127139, "learning_rate": 4.905002455626609e-06, "loss": 0.5524, "step": 4883 }, { "epoch": 1.9302049888861448, "grad_norm": 0.4334189440111412, "learning_rate": 4.904959672165569e-06, "loss": 0.5448, "step": 4884 }, { "epoch": 1.930600148184737, "grad_norm": 0.4304554597293204, "learning_rate": 4.904916879259308e-06, "loss": 0.5651, "step": 4885 }, { "epoch": 1.9309953074833293, "grad_norm": 0.4325837099787629, "learning_rate": 4.904874076907994e-06, "loss": 0.571, "step": 4886 }, { "epoch": 1.9313904667819215, "grad_norm": 0.43207355005966136, "learning_rate": 4.904831265111795e-06, "loss": 0.5691, "step": 4887 }, { "epoch": 1.9317856260805137, "grad_norm": 0.432638152421682, "learning_rate": 4.904788443870879e-06, "loss": 0.5524, "step": 4888 }, { "epoch": 1.932180785379106, "grad_norm": 0.4512618098367983, "learning_rate": 4.904745613185415e-06, "loss": 0.5496, "step": 4889 }, { "epoch": 1.9325759446776982, "grad_norm": 0.4374527551224479, "learning_rate": 4.904702773055568e-06, "loss": 0.5732, "step": 4890 }, { "epoch": 1.9329711039762905, "grad_norm": 0.4468449203480439, "learning_rate": 4.9046599234815105e-06, "loss": 0.5587, "step": 4891 }, { "epoch": 1.9333662632748827, "grad_norm": 0.4316133274910195, "learning_rate": 4.90461706446341e-06, "loss": 0.5355, "step": 4892 }, { "epoch": 1.933761422573475, "grad_norm": 0.4246061221952136, "learning_rate": 4.904574196001432e-06, "loss": 0.5326, "step": 4893 }, { "epoch": 1.9341565818720672, "grad_norm": 0.45695254659882967, "learning_rate": 4.9045313180957474e-06, "loss": 0.5655, "step": 4894 }, { "epoch": 1.9345517411706594, "grad_norm": 0.44841848061126316, "learning_rate": 4.904488430746524e-06, "loss": 0.566, "step": 4895 }, { "epoch": 1.9349469004692517, "grad_norm": 0.43653829446312165, "learning_rate": 4.90444553395393e-06, "loss": 0.5223, "step": 4896 }, { "epoch": 1.935342059767844, "grad_norm": 0.44040644249133537, "learning_rate": 4.904402627718134e-06, "loss": 0.5481, "step": 4897 }, { "epoch": 1.9357372190664361, "grad_norm": 0.4403357250362934, "learning_rate": 4.904359712039304e-06, "loss": 0.5652, "step": 4898 }, { "epoch": 1.9361323783650284, "grad_norm": 0.4196447153062507, "learning_rate": 4.90431678691761e-06, "loss": 0.5494, "step": 4899 }, { "epoch": 1.9365275376636206, "grad_norm": 0.4507476048268143, "learning_rate": 4.904273852353219e-06, "loss": 0.5388, "step": 4900 }, { "epoch": 1.9369226969622129, "grad_norm": 0.45818820763144874, "learning_rate": 4.9042309083463e-06, "loss": 0.5647, "step": 4901 }, { "epoch": 1.937317856260805, "grad_norm": 0.4169132869917713, "learning_rate": 4.904187954897023e-06, "loss": 0.5328, "step": 4902 }, { "epoch": 1.9377130155593973, "grad_norm": 0.42776873057488746, "learning_rate": 4.904144992005555e-06, "loss": 0.5401, "step": 4903 }, { "epoch": 1.9381081748579896, "grad_norm": 0.4452737398761506, "learning_rate": 4.904102019672066e-06, "loss": 0.5849, "step": 4904 }, { "epoch": 1.9385033341565818, "grad_norm": 0.4268122814283881, "learning_rate": 4.904059037896723e-06, "loss": 0.5577, "step": 4905 }, { "epoch": 1.938898493455174, "grad_norm": 0.43474540031672093, "learning_rate": 4.904016046679696e-06, "loss": 0.5651, "step": 4906 }, { "epoch": 1.9392936527537663, "grad_norm": 0.4439080883548797, "learning_rate": 4.9039730460211545e-06, "loss": 0.5525, "step": 4907 }, { "epoch": 1.9396888120523585, "grad_norm": 0.4380749282591096, "learning_rate": 4.9039300359212665e-06, "loss": 0.5647, "step": 4908 }, { "epoch": 1.9400839713509508, "grad_norm": 0.4409470401445144, "learning_rate": 4.9038870163802e-06, "loss": 0.5645, "step": 4909 }, { "epoch": 1.940479130649543, "grad_norm": 0.4453293955228133, "learning_rate": 4.903843987398127e-06, "loss": 0.5473, "step": 4910 }, { "epoch": 1.9408742899481353, "grad_norm": 0.4327605271211098, "learning_rate": 4.903800948975213e-06, "loss": 0.5391, "step": 4911 }, { "epoch": 1.9412694492467275, "grad_norm": 0.4436439359315798, "learning_rate": 4.903757901111629e-06, "loss": 0.5592, "step": 4912 }, { "epoch": 1.9416646085453197, "grad_norm": 0.4224199241527376, "learning_rate": 4.903714843807543e-06, "loss": 0.5408, "step": 4913 }, { "epoch": 1.942059767843912, "grad_norm": 0.44551596874456995, "learning_rate": 4.903671777063126e-06, "loss": 0.5426, "step": 4914 }, { "epoch": 1.9424549271425042, "grad_norm": 0.4296694239782312, "learning_rate": 4.9036287008785446e-06, "loss": 0.5388, "step": 4915 }, { "epoch": 1.9428500864410965, "grad_norm": 0.44248224422217924, "learning_rate": 4.903585615253969e-06, "loss": 0.5597, "step": 4916 }, { "epoch": 1.9432452457396887, "grad_norm": 0.4494129495563011, "learning_rate": 4.90354252018957e-06, "loss": 0.5744, "step": 4917 }, { "epoch": 1.943640405038281, "grad_norm": 0.4668938690509174, "learning_rate": 4.903499415685515e-06, "loss": 0.5719, "step": 4918 }, { "epoch": 1.9440355643368732, "grad_norm": 0.4397994559767837, "learning_rate": 4.903456301741973e-06, "loss": 0.5566, "step": 4919 }, { "epoch": 1.9444307236354654, "grad_norm": 0.44596460788657327, "learning_rate": 4.903413178359115e-06, "loss": 0.5804, "step": 4920 }, { "epoch": 1.9448258829340577, "grad_norm": 0.43722931620391176, "learning_rate": 4.9033700455371095e-06, "loss": 0.5356, "step": 4921 }, { "epoch": 1.94522104223265, "grad_norm": 0.42315317154912496, "learning_rate": 4.903326903276125e-06, "loss": 0.5544, "step": 4922 }, { "epoch": 1.9456162015312422, "grad_norm": 0.43461478276395443, "learning_rate": 4.903283751576333e-06, "loss": 0.5385, "step": 4923 }, { "epoch": 1.9460113608298344, "grad_norm": 0.43858622832546995, "learning_rate": 4.903240590437901e-06, "loss": 0.5365, "step": 4924 }, { "epoch": 1.9464065201284266, "grad_norm": 0.4381635670286083, "learning_rate": 4.903197419861e-06, "loss": 0.5522, "step": 4925 }, { "epoch": 1.9468016794270189, "grad_norm": 0.4211868434644149, "learning_rate": 4.903154239845798e-06, "loss": 0.5383, "step": 4926 }, { "epoch": 1.9471968387256111, "grad_norm": 0.4367363445108265, "learning_rate": 4.903111050392465e-06, "loss": 0.5585, "step": 4927 }, { "epoch": 1.9475919980242034, "grad_norm": 0.43254406845062815, "learning_rate": 4.903067851501172e-06, "loss": 0.5458, "step": 4928 }, { "epoch": 1.9479871573227956, "grad_norm": 0.42229361659520137, "learning_rate": 4.9030246431720875e-06, "loss": 0.5438, "step": 4929 }, { "epoch": 1.9483823166213878, "grad_norm": 0.44413774805607853, "learning_rate": 4.902981425405381e-06, "loss": 0.5668, "step": 4930 }, { "epoch": 1.9487774759199803, "grad_norm": 0.4380542711297867, "learning_rate": 4.902938198201223e-06, "loss": 0.5631, "step": 4931 }, { "epoch": 1.9491726352185725, "grad_norm": 0.42049828655472166, "learning_rate": 4.902894961559783e-06, "loss": 0.5418, "step": 4932 }, { "epoch": 1.9495677945171648, "grad_norm": 0.42075791692900755, "learning_rate": 4.90285171548123e-06, "loss": 0.543, "step": 4933 }, { "epoch": 1.949962953815757, "grad_norm": 0.43396399038359434, "learning_rate": 4.9028084599657355e-06, "loss": 0.5334, "step": 4934 }, { "epoch": 1.9503581131143493, "grad_norm": 0.4238423271020982, "learning_rate": 4.902765195013468e-06, "loss": 0.5629, "step": 4935 }, { "epoch": 1.9507532724129415, "grad_norm": 0.4390345402600252, "learning_rate": 4.902721920624598e-06, "loss": 0.5535, "step": 4936 }, { "epoch": 1.9511484317115337, "grad_norm": 0.433652416350482, "learning_rate": 4.9026786367992955e-06, "loss": 0.569, "step": 4937 }, { "epoch": 1.951543591010126, "grad_norm": 0.43072162266053016, "learning_rate": 4.90263534353773e-06, "loss": 0.5486, "step": 4938 }, { "epoch": 1.9519387503087182, "grad_norm": 0.4219105439697877, "learning_rate": 4.902592040840071e-06, "loss": 0.5656, "step": 4939 }, { "epoch": 1.9523339096073105, "grad_norm": 0.4397667987358108, "learning_rate": 4.9025487287064905e-06, "loss": 0.5836, "step": 4940 }, { "epoch": 1.9527290689059027, "grad_norm": 0.4444122529654959, "learning_rate": 4.9025054071371565e-06, "loss": 0.5395, "step": 4941 }, { "epoch": 1.953124228204495, "grad_norm": 0.446787855529709, "learning_rate": 4.9024620761322415e-06, "loss": 0.5595, "step": 4942 }, { "epoch": 1.9535193875030872, "grad_norm": 0.42700864336105265, "learning_rate": 4.902418735691914e-06, "loss": 0.5347, "step": 4943 }, { "epoch": 1.9539145468016794, "grad_norm": 0.44112913377219515, "learning_rate": 4.902375385816344e-06, "loss": 0.55, "step": 4944 }, { "epoch": 1.9543097061002717, "grad_norm": 0.452174058015207, "learning_rate": 4.902332026505703e-06, "loss": 0.5553, "step": 4945 }, { "epoch": 1.954704865398864, "grad_norm": 0.5493673198052214, "learning_rate": 4.902288657760159e-06, "loss": 0.5628, "step": 4946 }, { "epoch": 1.9551000246974561, "grad_norm": 0.4388936013126185, "learning_rate": 4.902245279579886e-06, "loss": 0.5442, "step": 4947 }, { "epoch": 1.9554951839960484, "grad_norm": 0.4342127172483669, "learning_rate": 4.9022018919650505e-06, "loss": 0.5716, "step": 4948 }, { "epoch": 1.9558903432946406, "grad_norm": 0.4409460564547779, "learning_rate": 4.902158494915825e-06, "loss": 0.555, "step": 4949 }, { "epoch": 1.9562855025932329, "grad_norm": 0.43477091609114554, "learning_rate": 4.90211508843238e-06, "loss": 0.5425, "step": 4950 }, { "epoch": 1.956680661891825, "grad_norm": 0.44011026483098825, "learning_rate": 4.902071672514886e-06, "loss": 0.5586, "step": 4951 }, { "epoch": 1.9570758211904173, "grad_norm": 0.4279433040735619, "learning_rate": 4.902028247163512e-06, "loss": 0.5603, "step": 4952 }, { "epoch": 1.9574709804890096, "grad_norm": 0.43493371661347063, "learning_rate": 4.901984812378431e-06, "loss": 0.5496, "step": 4953 }, { "epoch": 1.9578661397876018, "grad_norm": 0.429552671257518, "learning_rate": 4.901941368159812e-06, "loss": 0.5667, "step": 4954 }, { "epoch": 1.9582612990861943, "grad_norm": 0.45149418076182646, "learning_rate": 4.901897914507825e-06, "loss": 0.5721, "step": 4955 }, { "epoch": 1.9586564583847865, "grad_norm": 0.4440050617964623, "learning_rate": 4.901854451422642e-06, "loss": 0.5447, "step": 4956 }, { "epoch": 1.9590516176833788, "grad_norm": 0.4271232711419705, "learning_rate": 4.901810978904433e-06, "loss": 0.5303, "step": 4957 }, { "epoch": 1.959446776981971, "grad_norm": 0.4178784895693934, "learning_rate": 4.901767496953369e-06, "loss": 0.5398, "step": 4958 }, { "epoch": 1.9598419362805632, "grad_norm": 0.45219556101457836, "learning_rate": 4.90172400556962e-06, "loss": 0.5464, "step": 4959 }, { "epoch": 1.9602370955791555, "grad_norm": 0.43549913764734244, "learning_rate": 4.901680504753358e-06, "loss": 0.5469, "step": 4960 }, { "epoch": 1.9606322548777477, "grad_norm": 0.4288069140164951, "learning_rate": 4.901636994504754e-06, "loss": 0.5438, "step": 4961 }, { "epoch": 1.96102741417634, "grad_norm": 0.558867701364055, "learning_rate": 4.901593474823978e-06, "loss": 0.5632, "step": 4962 }, { "epoch": 1.9614225734749322, "grad_norm": 0.4402971314310295, "learning_rate": 4.9015499457112e-06, "loss": 0.5496, "step": 4963 }, { "epoch": 1.9618177327735244, "grad_norm": 0.4280995907810528, "learning_rate": 4.901506407166594e-06, "loss": 0.5323, "step": 4964 }, { "epoch": 1.9622128920721167, "grad_norm": 0.43660734305856075, "learning_rate": 4.901462859190328e-06, "loss": 0.5439, "step": 4965 }, { "epoch": 1.962608051370709, "grad_norm": 0.4364007886164183, "learning_rate": 4.9014193017825735e-06, "loss": 0.5532, "step": 4966 }, { "epoch": 1.9630032106693012, "grad_norm": 0.4597936604604986, "learning_rate": 4.901375734943504e-06, "loss": 0.5571, "step": 4967 }, { "epoch": 1.9633983699678934, "grad_norm": 0.45327235813160105, "learning_rate": 4.901332158673288e-06, "loss": 0.5574, "step": 4968 }, { "epoch": 1.9637935292664856, "grad_norm": 0.44715737005782, "learning_rate": 4.901288572972097e-06, "loss": 0.579, "step": 4969 }, { "epoch": 1.964188688565078, "grad_norm": 0.42524834539399503, "learning_rate": 4.901244977840103e-06, "loss": 0.5186, "step": 4970 }, { "epoch": 1.9645838478636701, "grad_norm": 0.45527798672472874, "learning_rate": 4.9012013732774765e-06, "loss": 0.5373, "step": 4971 }, { "epoch": 1.9649790071622624, "grad_norm": 0.4343605181054587, "learning_rate": 4.901157759284389e-06, "loss": 0.5686, "step": 4972 }, { "epoch": 1.9653741664608546, "grad_norm": 0.44258891979240134, "learning_rate": 4.901114135861013e-06, "loss": 0.5697, "step": 4973 }, { "epoch": 1.9657693257594469, "grad_norm": 0.42190562697963224, "learning_rate": 4.901070503007516e-06, "loss": 0.5309, "step": 4974 }, { "epoch": 1.966164485058039, "grad_norm": 0.45195265147050434, "learning_rate": 4.901026860724075e-06, "loss": 0.5568, "step": 4975 }, { "epoch": 1.9665596443566313, "grad_norm": 0.4352261875981932, "learning_rate": 4.900983209010858e-06, "loss": 0.5507, "step": 4976 }, { "epoch": 1.9669548036552236, "grad_norm": 0.43978785905912626, "learning_rate": 4.9009395478680355e-06, "loss": 0.5599, "step": 4977 }, { "epoch": 1.9673499629538158, "grad_norm": 0.4475750883161317, "learning_rate": 4.9008958772957815e-06, "loss": 0.5493, "step": 4978 }, { "epoch": 1.967745122252408, "grad_norm": 0.4328811619048258, "learning_rate": 4.900852197294266e-06, "loss": 0.5361, "step": 4979 }, { "epoch": 1.9681402815510003, "grad_norm": 0.43773523890770105, "learning_rate": 4.900808507863661e-06, "loss": 0.5548, "step": 4980 }, { "epoch": 1.9685354408495925, "grad_norm": 0.43452862600059244, "learning_rate": 4.900764809004138e-06, "loss": 0.5336, "step": 4981 }, { "epoch": 1.9689306001481848, "grad_norm": 0.419201512372097, "learning_rate": 4.900721100715869e-06, "loss": 0.551, "step": 4982 }, { "epoch": 1.969325759446777, "grad_norm": 0.45373661373694074, "learning_rate": 4.900677382999025e-06, "loss": 0.5657, "step": 4983 }, { "epoch": 1.9697209187453693, "grad_norm": 0.4370236493158466, "learning_rate": 4.900633655853778e-06, "loss": 0.528, "step": 4984 }, { "epoch": 1.9701160780439615, "grad_norm": 0.4528150258511628, "learning_rate": 4.9005899192803e-06, "loss": 0.5427, "step": 4985 }, { "epoch": 1.9705112373425537, "grad_norm": 0.4506026367726158, "learning_rate": 4.900546173278762e-06, "loss": 0.5555, "step": 4986 }, { "epoch": 1.970906396641146, "grad_norm": 0.4207709811591268, "learning_rate": 4.900502417849337e-06, "loss": 0.5502, "step": 4987 }, { "epoch": 1.9713015559397382, "grad_norm": 0.4266100877818548, "learning_rate": 4.900458652992196e-06, "loss": 0.5469, "step": 4988 }, { "epoch": 1.9716967152383305, "grad_norm": 0.4387635930216414, "learning_rate": 4.900414878707511e-06, "loss": 0.5587, "step": 4989 }, { "epoch": 1.9720918745369227, "grad_norm": 0.42751985993998615, "learning_rate": 4.9003710949954535e-06, "loss": 0.5488, "step": 4990 }, { "epoch": 1.972487033835515, "grad_norm": 0.429513289497151, "learning_rate": 4.900327301856196e-06, "loss": 0.5538, "step": 4991 }, { "epoch": 1.9728821931341072, "grad_norm": 0.42988406007517393, "learning_rate": 4.9002834992899104e-06, "loss": 0.5279, "step": 4992 }, { "epoch": 1.9732773524326994, "grad_norm": 0.4380519285585141, "learning_rate": 4.90023968729677e-06, "loss": 0.5651, "step": 4993 }, { "epoch": 1.9736725117312917, "grad_norm": 0.4489032418464982, "learning_rate": 4.900195865876944e-06, "loss": 0.5577, "step": 4994 }, { "epoch": 1.974067671029884, "grad_norm": 0.45508873672141004, "learning_rate": 4.900152035030607e-06, "loss": 0.5582, "step": 4995 }, { "epoch": 1.9744628303284761, "grad_norm": 0.43159558253407226, "learning_rate": 4.90010819475793e-06, "loss": 0.5623, "step": 4996 }, { "epoch": 1.9748579896270684, "grad_norm": 0.4372402377219953, "learning_rate": 4.900064345059086e-06, "loss": 0.5519, "step": 4997 }, { "epoch": 1.9752531489256606, "grad_norm": 0.4627096781522281, "learning_rate": 4.900020485934245e-06, "loss": 0.5693, "step": 4998 }, { "epoch": 1.9756483082242529, "grad_norm": 0.4518788906698329, "learning_rate": 4.899976617383583e-06, "loss": 0.5577, "step": 4999 }, { "epoch": 1.976043467522845, "grad_norm": 0.44234025817296496, "learning_rate": 4.899932739407268e-06, "loss": 0.552, "step": 5000 }, { "epoch": 1.9764386268214373, "grad_norm": 0.42902794698726177, "learning_rate": 4.899888852005477e-06, "loss": 0.535, "step": 5001 }, { "epoch": 1.9768337861200296, "grad_norm": 0.4337101022811792, "learning_rate": 4.899844955178378e-06, "loss": 0.5477, "step": 5002 }, { "epoch": 1.9772289454186218, "grad_norm": 0.4334561135618806, "learning_rate": 4.899801048926146e-06, "loss": 0.5536, "step": 5003 }, { "epoch": 1.977624104717214, "grad_norm": 0.4539433104177194, "learning_rate": 4.899757133248953e-06, "loss": 0.5634, "step": 5004 }, { "epoch": 1.9780192640158063, "grad_norm": 0.46398410451302946, "learning_rate": 4.89971320814697e-06, "loss": 0.5579, "step": 5005 }, { "epoch": 1.9784144233143985, "grad_norm": 0.4224716161481286, "learning_rate": 4.899669273620372e-06, "loss": 0.5213, "step": 5006 }, { "epoch": 1.9788095826129908, "grad_norm": 0.43890124660317376, "learning_rate": 4.899625329669329e-06, "loss": 0.5585, "step": 5007 }, { "epoch": 1.979204741911583, "grad_norm": 0.45718913393571137, "learning_rate": 4.899581376294016e-06, "loss": 0.5739, "step": 5008 }, { "epoch": 1.9795999012101753, "grad_norm": 0.4274912725996382, "learning_rate": 4.899537413494604e-06, "loss": 0.5284, "step": 5009 }, { "epoch": 1.9799950605087675, "grad_norm": 0.4263688372409198, "learning_rate": 4.899493441271266e-06, "loss": 0.5371, "step": 5010 }, { "epoch": 1.9803902198073597, "grad_norm": 0.42390616403436115, "learning_rate": 4.899449459624175e-06, "loss": 0.5525, "step": 5011 }, { "epoch": 1.980785379105952, "grad_norm": 0.439575208819799, "learning_rate": 4.899405468553503e-06, "loss": 0.5596, "step": 5012 }, { "epoch": 1.9811805384045442, "grad_norm": 0.438514692310827, "learning_rate": 4.899361468059424e-06, "loss": 0.5656, "step": 5013 }, { "epoch": 1.9815756977031365, "grad_norm": 0.4286665241622738, "learning_rate": 4.8993174581421095e-06, "loss": 0.5462, "step": 5014 }, { "epoch": 1.9819708570017287, "grad_norm": 0.4310129047836364, "learning_rate": 4.899273438801734e-06, "loss": 0.5717, "step": 5015 }, { "epoch": 1.982366016300321, "grad_norm": 0.4410419994536329, "learning_rate": 4.899229410038468e-06, "loss": 0.5432, "step": 5016 }, { "epoch": 1.9827611755989132, "grad_norm": 0.4421044202734814, "learning_rate": 4.899185371852487e-06, "loss": 0.552, "step": 5017 }, { "epoch": 1.9831563348975054, "grad_norm": 0.43915212841424395, "learning_rate": 4.899141324243962e-06, "loss": 0.5384, "step": 5018 }, { "epoch": 1.9835514941960977, "grad_norm": 0.42929780686144275, "learning_rate": 4.8990972672130675e-06, "loss": 0.5467, "step": 5019 }, { "epoch": 1.98394665349469, "grad_norm": 0.44257765457839277, "learning_rate": 4.899053200759975e-06, "loss": 0.5723, "step": 5020 }, { "epoch": 1.9843418127932821, "grad_norm": 0.4491299367521981, "learning_rate": 4.8990091248848586e-06, "loss": 0.56, "step": 5021 }, { "epoch": 1.9847369720918744, "grad_norm": 0.42594398347130197, "learning_rate": 4.898965039587891e-06, "loss": 0.544, "step": 5022 }, { "epoch": 1.9851321313904666, "grad_norm": 0.430300760036231, "learning_rate": 4.898920944869245e-06, "loss": 0.542, "step": 5023 }, { "epoch": 1.9855272906890589, "grad_norm": 0.43273660717146456, "learning_rate": 4.898876840729095e-06, "loss": 0.5361, "step": 5024 }, { "epoch": 1.985922449987651, "grad_norm": 0.44664865716579133, "learning_rate": 4.898832727167613e-06, "loss": 0.5703, "step": 5025 }, { "epoch": 1.9863176092862436, "grad_norm": 0.4268197117087961, "learning_rate": 4.898788604184973e-06, "loss": 0.5773, "step": 5026 }, { "epoch": 1.9867127685848358, "grad_norm": 0.4453737100715003, "learning_rate": 4.8987444717813475e-06, "loss": 0.5687, "step": 5027 }, { "epoch": 1.987107927883428, "grad_norm": 0.43272023462491616, "learning_rate": 4.898700329956911e-06, "loss": 0.558, "step": 5028 }, { "epoch": 1.9875030871820203, "grad_norm": 0.4264184633199343, "learning_rate": 4.898656178711836e-06, "loss": 0.5548, "step": 5029 }, { "epoch": 1.9878982464806125, "grad_norm": 0.4221266341561921, "learning_rate": 4.898612018046296e-06, "loss": 0.5594, "step": 5030 }, { "epoch": 1.9882934057792048, "grad_norm": 0.4347414491375779, "learning_rate": 4.898567847960463e-06, "loss": 0.5357, "step": 5031 }, { "epoch": 1.988688565077797, "grad_norm": 0.43586397162379814, "learning_rate": 4.898523668454514e-06, "loss": 0.5446, "step": 5032 }, { "epoch": 1.9890837243763893, "grad_norm": 0.5110052342258178, "learning_rate": 4.89847947952862e-06, "loss": 0.5537, "step": 5033 }, { "epoch": 1.9894788836749815, "grad_norm": 0.4451068478930137, "learning_rate": 4.898435281182955e-06, "loss": 0.5476, "step": 5034 }, { "epoch": 1.9898740429735737, "grad_norm": 0.4416455338525789, "learning_rate": 4.898391073417692e-06, "loss": 0.5623, "step": 5035 }, { "epoch": 1.990269202272166, "grad_norm": 0.4258928324683576, "learning_rate": 4.898346856233006e-06, "loss": 0.5516, "step": 5036 }, { "epoch": 1.9906643615707582, "grad_norm": 0.43772124082226155, "learning_rate": 4.89830262962907e-06, "loss": 0.5732, "step": 5037 }, { "epoch": 1.9910595208693505, "grad_norm": 0.44967948499329063, "learning_rate": 4.898258393606057e-06, "loss": 0.5642, "step": 5038 }, { "epoch": 1.9914546801679427, "grad_norm": 0.43814703911183817, "learning_rate": 4.898214148164142e-06, "loss": 0.5469, "step": 5039 }, { "epoch": 1.991849839466535, "grad_norm": 0.4230077406376979, "learning_rate": 4.898169893303497e-06, "loss": 0.5466, "step": 5040 }, { "epoch": 1.9922449987651272, "grad_norm": 0.43628697564315577, "learning_rate": 4.898125629024298e-06, "loss": 0.5445, "step": 5041 }, { "epoch": 1.9926401580637194, "grad_norm": 0.45113306979240975, "learning_rate": 4.898081355326717e-06, "loss": 0.5658, "step": 5042 }, { "epoch": 1.9930353173623117, "grad_norm": 0.4334951853126296, "learning_rate": 4.898037072210929e-06, "loss": 0.557, "step": 5043 }, { "epoch": 1.993430476660904, "grad_norm": 0.42752413492781205, "learning_rate": 4.897992779677108e-06, "loss": 0.5546, "step": 5044 }, { "epoch": 1.9938256359594961, "grad_norm": 0.4435208315687634, "learning_rate": 4.8979484777254275e-06, "loss": 0.5583, "step": 5045 }, { "epoch": 1.9942207952580884, "grad_norm": 0.4487454343438756, "learning_rate": 4.89790416635606e-06, "loss": 0.55, "step": 5046 }, { "epoch": 1.9946159545566806, "grad_norm": 0.4504942707382518, "learning_rate": 4.8978598455691825e-06, "loss": 0.5575, "step": 5047 }, { "epoch": 1.9950111138552729, "grad_norm": 0.45640354880823075, "learning_rate": 4.8978155153649674e-06, "loss": 0.5693, "step": 5048 }, { "epoch": 1.9954062731538653, "grad_norm": 0.442089413525396, "learning_rate": 4.897771175743588e-06, "loss": 0.559, "step": 5049 }, { "epoch": 1.9958014324524576, "grad_norm": 0.45021406883084675, "learning_rate": 4.89772682670522e-06, "loss": 0.566, "step": 5050 }, { "epoch": 1.9961965917510498, "grad_norm": 0.45325350234496126, "learning_rate": 4.897682468250038e-06, "loss": 0.5639, "step": 5051 }, { "epoch": 1.996591751049642, "grad_norm": 0.4320207290666673, "learning_rate": 4.897638100378214e-06, "loss": 0.559, "step": 5052 }, { "epoch": 1.9969869103482343, "grad_norm": 0.4471743726766185, "learning_rate": 4.897593723089924e-06, "loss": 0.5859, "step": 5053 }, { "epoch": 1.9973820696468265, "grad_norm": 0.4526408463021309, "learning_rate": 4.897549336385341e-06, "loss": 0.562, "step": 5054 }, { "epoch": 1.9977772289454188, "grad_norm": 0.4204803007502452, "learning_rate": 4.897504940264641e-06, "loss": 0.544, "step": 5055 }, { "epoch": 1.998172388244011, "grad_norm": 0.4293273991156161, "learning_rate": 4.897460534727997e-06, "loss": 0.5356, "step": 5056 }, { "epoch": 1.9985675475426032, "grad_norm": 0.43667313213086545, "learning_rate": 4.897416119775584e-06, "loss": 0.5415, "step": 5057 }, { "epoch": 1.9989627068411955, "grad_norm": 0.4463243145959753, "learning_rate": 4.897371695407576e-06, "loss": 0.5554, "step": 5058 }, { "epoch": 1.9993578661397877, "grad_norm": 0.4449190372166922, "learning_rate": 4.897327261624148e-06, "loss": 0.5541, "step": 5059 }, { "epoch": 1.99975302543838, "grad_norm": 0.4548254873381931, "learning_rate": 4.897282818425474e-06, "loss": 0.5736, "step": 5060 }, { "epoch": 2.000148184736972, "grad_norm": 0.45300066146970297, "learning_rate": 4.89723836581173e-06, "loss": 0.5866, "step": 5061 }, { "epoch": 2.0005433440355644, "grad_norm": 0.44240572381302623, "learning_rate": 4.897193903783087e-06, "loss": 0.5543, "step": 5062 }, { "epoch": 2.0009385033341567, "grad_norm": 0.4507961288693878, "learning_rate": 4.8971494323397236e-06, "loss": 0.5534, "step": 5063 }, { "epoch": 2.001333662632749, "grad_norm": 0.42461245435649125, "learning_rate": 4.897104951481813e-06, "loss": 0.546, "step": 5064 }, { "epoch": 2.001728821931341, "grad_norm": 0.43117780751367724, "learning_rate": 4.897060461209529e-06, "loss": 0.5413, "step": 5065 }, { "epoch": 2.0021239812299334, "grad_norm": 0.4590693569061441, "learning_rate": 4.8970159615230476e-06, "loss": 0.5553, "step": 5066 }, { "epoch": 2.0025191405285256, "grad_norm": 0.43910645784012625, "learning_rate": 4.896971452422543e-06, "loss": 0.5453, "step": 5067 }, { "epoch": 2.002914299827118, "grad_norm": 0.4491473519263672, "learning_rate": 4.89692693390819e-06, "loss": 0.5344, "step": 5068 }, { "epoch": 2.00330945912571, "grad_norm": 0.4558312418893952, "learning_rate": 4.896882405980164e-06, "loss": 0.5424, "step": 5069 }, { "epoch": 2.0037046184243024, "grad_norm": 0.4342578385234224, "learning_rate": 4.896837868638638e-06, "loss": 0.5609, "step": 5070 }, { "epoch": 2.0040997777228946, "grad_norm": 0.472485898038852, "learning_rate": 4.896793321883789e-06, "loss": 0.5507, "step": 5071 }, { "epoch": 2.004494937021487, "grad_norm": 0.43961186761669674, "learning_rate": 4.896748765715792e-06, "loss": 0.5627, "step": 5072 }, { "epoch": 2.004890096320079, "grad_norm": 0.5682972949137233, "learning_rate": 4.89670420013482e-06, "loss": 0.5572, "step": 5073 }, { "epoch": 2.0052852556186713, "grad_norm": 0.4414444744926705, "learning_rate": 4.89665962514105e-06, "loss": 0.5509, "step": 5074 }, { "epoch": 2.0056804149172636, "grad_norm": 0.41346400399643507, "learning_rate": 4.896615040734656e-06, "loss": 0.5201, "step": 5075 }, { "epoch": 2.006075574215856, "grad_norm": 0.4444163402767339, "learning_rate": 4.896570446915814e-06, "loss": 0.5373, "step": 5076 }, { "epoch": 2.006470733514448, "grad_norm": 0.4296320259295841, "learning_rate": 4.896525843684698e-06, "loss": 0.5574, "step": 5077 }, { "epoch": 2.0068658928130403, "grad_norm": 0.4324921273129445, "learning_rate": 4.896481231041483e-06, "loss": 0.5597, "step": 5078 }, { "epoch": 2.0072610521116325, "grad_norm": 0.5733673612744304, "learning_rate": 4.896436608986347e-06, "loss": 0.5648, "step": 5079 }, { "epoch": 2.0076562114102248, "grad_norm": 0.42676323466611693, "learning_rate": 4.896391977519461e-06, "loss": 0.5537, "step": 5080 }, { "epoch": 2.008051370708817, "grad_norm": 0.4430572425107361, "learning_rate": 4.896347336641004e-06, "loss": 0.5509, "step": 5081 }, { "epoch": 2.0084465300074092, "grad_norm": 0.45815046775746454, "learning_rate": 4.896302686351149e-06, "loss": 0.5684, "step": 5082 }, { "epoch": 2.0088416893060015, "grad_norm": 0.4498740575972979, "learning_rate": 4.896258026650072e-06, "loss": 0.5895, "step": 5083 }, { "epoch": 2.0092368486045937, "grad_norm": 0.4208347192689959, "learning_rate": 4.89621335753795e-06, "loss": 0.546, "step": 5084 }, { "epoch": 2.009632007903186, "grad_norm": 0.4859059756580774, "learning_rate": 4.8961686790149554e-06, "loss": 0.5585, "step": 5085 }, { "epoch": 2.010027167201778, "grad_norm": 0.43391858881607787, "learning_rate": 4.896123991081266e-06, "loss": 0.563, "step": 5086 }, { "epoch": 2.0104223265003704, "grad_norm": 0.4314428130560567, "learning_rate": 4.8960792937370565e-06, "loss": 0.544, "step": 5087 }, { "epoch": 2.0108174857989627, "grad_norm": 0.42222335118568166, "learning_rate": 4.896034586982502e-06, "loss": 0.5453, "step": 5088 }, { "epoch": 2.011212645097555, "grad_norm": 0.4360640333190303, "learning_rate": 4.89598987081778e-06, "loss": 0.5461, "step": 5089 }, { "epoch": 2.011607804396147, "grad_norm": 0.44026271873516537, "learning_rate": 4.8959451452430635e-06, "loss": 0.5541, "step": 5090 }, { "epoch": 2.0120029636947394, "grad_norm": 0.42715031631521877, "learning_rate": 4.895900410258529e-06, "loss": 0.537, "step": 5091 }, { "epoch": 2.0123981229933317, "grad_norm": 0.41412343827373277, "learning_rate": 4.8958556658643535e-06, "loss": 0.5494, "step": 5092 }, { "epoch": 2.012793282291924, "grad_norm": 0.4358377535786107, "learning_rate": 4.8958109120607115e-06, "loss": 0.5513, "step": 5093 }, { "epoch": 2.013188441590516, "grad_norm": 0.42454225257888584, "learning_rate": 4.895766148847779e-06, "loss": 0.542, "step": 5094 }, { "epoch": 2.0135836008891084, "grad_norm": 0.42998865577635037, "learning_rate": 4.895721376225732e-06, "loss": 0.5523, "step": 5095 }, { "epoch": 2.0139787601877006, "grad_norm": 0.4344501069089372, "learning_rate": 4.8956765941947456e-06, "loss": 0.5486, "step": 5096 }, { "epoch": 2.014373919486293, "grad_norm": 0.43353826879791363, "learning_rate": 4.895631802754997e-06, "loss": 0.543, "step": 5097 }, { "epoch": 2.014769078784885, "grad_norm": 0.4575411830513003, "learning_rate": 4.895587001906661e-06, "loss": 0.5689, "step": 5098 }, { "epoch": 2.0151642380834773, "grad_norm": 0.42786800682913717, "learning_rate": 4.895542191649914e-06, "loss": 0.5578, "step": 5099 }, { "epoch": 2.0155593973820696, "grad_norm": 0.4964495652383933, "learning_rate": 4.895497371984932e-06, "loss": 0.564, "step": 5100 }, { "epoch": 2.015954556680662, "grad_norm": 0.42115240187280495, "learning_rate": 4.895452542911891e-06, "loss": 0.5498, "step": 5101 }, { "epoch": 2.016349715979254, "grad_norm": 0.42588276441255346, "learning_rate": 4.895407704430967e-06, "loss": 0.5556, "step": 5102 }, { "epoch": 2.0167448752778463, "grad_norm": 0.4302156099061691, "learning_rate": 4.895362856542336e-06, "loss": 0.5442, "step": 5103 }, { "epoch": 2.0171400345764385, "grad_norm": 0.43604945564161973, "learning_rate": 4.895317999246174e-06, "loss": 0.5656, "step": 5104 }, { "epoch": 2.0000987898246483, "grad_norm": 0.44789874867544277, "learning_rate": 4.895273132542658e-06, "loss": 0.5109, "step": 5105 }, { "epoch": 2.0004939491232405, "grad_norm": 0.7893264350562275, "learning_rate": 4.895228256431963e-06, "loss": 0.4954, "step": 5106 }, { "epoch": 2.0008891084218328, "grad_norm": 0.5938590122165337, "learning_rate": 4.895183370914267e-06, "loss": 0.504, "step": 5107 }, { "epoch": 2.001284267720425, "grad_norm": 0.5235383132112481, "learning_rate": 4.8951384759897435e-06, "loss": 0.5104, "step": 5108 }, { "epoch": 2.0016794270190172, "grad_norm": 0.6882530599009438, "learning_rate": 4.895093571658571e-06, "loss": 0.4983, "step": 5109 }, { "epoch": 2.0020745863176095, "grad_norm": 0.7510915200577702, "learning_rate": 4.895048657920926e-06, "loss": 0.4983, "step": 5110 }, { "epoch": 2.0024697456162017, "grad_norm": 0.6065619194319212, "learning_rate": 4.895003734776984e-06, "loss": 0.49, "step": 5111 }, { "epoch": 2.002864904914794, "grad_norm": 0.5356219470458906, "learning_rate": 4.894958802226921e-06, "loss": 0.4856, "step": 5112 }, { "epoch": 2.003260064213386, "grad_norm": 0.5780607077723401, "learning_rate": 4.894913860270915e-06, "loss": 0.4935, "step": 5113 }, { "epoch": 2.0036552235119784, "grad_norm": 0.5595862883969074, "learning_rate": 4.8948689089091414e-06, "loss": 0.4943, "step": 5114 }, { "epoch": 2.0040503828105707, "grad_norm": 0.5217610935709599, "learning_rate": 4.8948239481417766e-06, "loss": 0.5112, "step": 5115 }, { "epoch": 2.004445542109163, "grad_norm": 0.49305801115666537, "learning_rate": 4.894778977968998e-06, "loss": 0.4907, "step": 5116 }, { "epoch": 2.004840701407755, "grad_norm": 0.530247044656353, "learning_rate": 4.894733998390982e-06, "loss": 0.4904, "step": 5117 }, { "epoch": 2.0052358607063474, "grad_norm": 0.552426427747116, "learning_rate": 4.894689009407903e-06, "loss": 0.49, "step": 5118 }, { "epoch": 2.0056310200049396, "grad_norm": 0.5685491982079328, "learning_rate": 4.894644011019942e-06, "loss": 0.4948, "step": 5119 }, { "epoch": 2.006026179303532, "grad_norm": 0.502595184515819, "learning_rate": 4.894599003227273e-06, "loss": 0.4962, "step": 5120 }, { "epoch": 2.006421338602124, "grad_norm": 0.5130021858315302, "learning_rate": 4.8945539860300725e-06, "loss": 0.5107, "step": 5121 }, { "epoch": 2.0068164979007164, "grad_norm": 0.5350797674291279, "learning_rate": 4.8945089594285185e-06, "loss": 0.5076, "step": 5122 }, { "epoch": 2.0072116571993086, "grad_norm": 0.49090717650738347, "learning_rate": 4.894463923422787e-06, "loss": 0.4939, "step": 5123 }, { "epoch": 2.007606816497901, "grad_norm": 0.48813921337582705, "learning_rate": 4.8944188780130555e-06, "loss": 0.5004, "step": 5124 }, { "epoch": 2.008001975796493, "grad_norm": 0.4912844935858703, "learning_rate": 4.8943738231995005e-06, "loss": 0.4914, "step": 5125 }, { "epoch": 2.0083971350950853, "grad_norm": 0.5117780002863588, "learning_rate": 4.894328758982299e-06, "loss": 0.4892, "step": 5126 }, { "epoch": 2.0087922943936776, "grad_norm": 0.4929822170606211, "learning_rate": 4.894283685361628e-06, "loss": 0.5052, "step": 5127 }, { "epoch": 2.00918745369227, "grad_norm": 0.47940057487781756, "learning_rate": 4.894238602337665e-06, "loss": 0.4986, "step": 5128 }, { "epoch": 2.009582612990862, "grad_norm": 0.4738863333886923, "learning_rate": 4.894193509910586e-06, "loss": 0.4841, "step": 5129 }, { "epoch": 2.0099777722894543, "grad_norm": 0.470276754348612, "learning_rate": 4.8941484080805695e-06, "loss": 0.5098, "step": 5130 }, { "epoch": 2.0103729315880465, "grad_norm": 0.4767147238302886, "learning_rate": 4.8941032968477914e-06, "loss": 0.4909, "step": 5131 }, { "epoch": 2.0107680908866388, "grad_norm": 0.4721168271949362, "learning_rate": 4.894058176212429e-06, "loss": 0.484, "step": 5132 }, { "epoch": 2.011163250185231, "grad_norm": 0.4694032432392925, "learning_rate": 4.89401304617466e-06, "loss": 0.4952, "step": 5133 }, { "epoch": 2.0115584094838233, "grad_norm": 0.5016813592368355, "learning_rate": 4.8939679067346625e-06, "loss": 0.4858, "step": 5134 }, { "epoch": 2.0119535687824155, "grad_norm": 0.4764925899996865, "learning_rate": 4.893922757892612e-06, "loss": 0.4997, "step": 5135 }, { "epoch": 2.0123487280810077, "grad_norm": 0.48398112841687313, "learning_rate": 4.893877599648686e-06, "loss": 0.5019, "step": 5136 }, { "epoch": 2.0127438873796, "grad_norm": 0.47136229426376053, "learning_rate": 4.893832432003062e-06, "loss": 0.4867, "step": 5137 }, { "epoch": 2.013139046678192, "grad_norm": 0.48191425839984575, "learning_rate": 4.893787254955919e-06, "loss": 0.4825, "step": 5138 }, { "epoch": 2.0135342059767845, "grad_norm": 0.4670311308730599, "learning_rate": 4.893742068507434e-06, "loss": 0.4951, "step": 5139 }, { "epoch": 2.0139293652753767, "grad_norm": 0.4890864968827146, "learning_rate": 4.893696872657782e-06, "loss": 0.5113, "step": 5140 }, { "epoch": 2.014324524573969, "grad_norm": 0.4584922074591445, "learning_rate": 4.893651667407143e-06, "loss": 0.4888, "step": 5141 }, { "epoch": 2.014719683872561, "grad_norm": 0.48036994938581246, "learning_rate": 4.893606452755693e-06, "loss": 0.4916, "step": 5142 }, { "epoch": 2.0151148431711534, "grad_norm": 0.6649796099167191, "learning_rate": 4.893561228703611e-06, "loss": 0.5061, "step": 5143 }, { "epoch": 2.0155100024697457, "grad_norm": 0.46584443556518795, "learning_rate": 4.8935159952510745e-06, "loss": 0.4786, "step": 5144 }, { "epoch": 2.015905161768338, "grad_norm": 0.46787656590183857, "learning_rate": 4.893470752398261e-06, "loss": 0.4822, "step": 5145 }, { "epoch": 2.01630032106693, "grad_norm": 0.46824424287806576, "learning_rate": 4.893425500145346e-06, "loss": 0.5001, "step": 5146 }, { "epoch": 2.0166954803655224, "grad_norm": 0.49194078179454165, "learning_rate": 4.89338023849251e-06, "loss": 0.4799, "step": 5147 }, { "epoch": 2.0170906396641146, "grad_norm": 0.476320179653451, "learning_rate": 4.893334967439929e-06, "loss": 0.5194, "step": 5148 }, { "epoch": 2.017485798962707, "grad_norm": 0.4780909181467547, "learning_rate": 4.893289686987782e-06, "loss": 0.5032, "step": 5149 }, { "epoch": 2.017880958261299, "grad_norm": 0.4548175332915629, "learning_rate": 4.893244397136247e-06, "loss": 0.4723, "step": 5150 }, { "epoch": 2.0182761175598913, "grad_norm": 0.44903119971257, "learning_rate": 4.8931990978855005e-06, "loss": 0.4873, "step": 5151 }, { "epoch": 2.0186712768584836, "grad_norm": 0.46396916970746166, "learning_rate": 4.893153789235722e-06, "loss": 0.5, "step": 5152 }, { "epoch": 2.019066436157076, "grad_norm": 0.4569485462711414, "learning_rate": 4.893108471187088e-06, "loss": 0.5015, "step": 5153 }, { "epoch": 2.019461595455668, "grad_norm": 0.45922275497265813, "learning_rate": 4.893063143739777e-06, "loss": 0.502, "step": 5154 }, { "epoch": 2.0198567547542603, "grad_norm": 0.4695075729482603, "learning_rate": 4.893017806893967e-06, "loss": 0.5134, "step": 5155 }, { "epoch": 2.0202519140528525, "grad_norm": 0.4530917617622478, "learning_rate": 4.892972460649836e-06, "loss": 0.5076, "step": 5156 }, { "epoch": 2.020647073351445, "grad_norm": 0.4504342750468262, "learning_rate": 4.892927105007563e-06, "loss": 0.505, "step": 5157 }, { "epoch": 2.021042232650037, "grad_norm": 0.4667012843063457, "learning_rate": 4.892881739967325e-06, "loss": 0.5167, "step": 5158 }, { "epoch": 2.0214373919486293, "grad_norm": 0.4494930932302058, "learning_rate": 4.892836365529301e-06, "loss": 0.4803, "step": 5159 }, { "epoch": 2.0218325512472215, "grad_norm": 0.4588896369546433, "learning_rate": 4.892790981693668e-06, "loss": 0.5003, "step": 5160 }, { "epoch": 2.0222277105458137, "grad_norm": 0.4497347107376531, "learning_rate": 4.892745588460606e-06, "loss": 0.4777, "step": 5161 }, { "epoch": 2.022622869844406, "grad_norm": 0.4547275239393893, "learning_rate": 4.892700185830291e-06, "loss": 0.4908, "step": 5162 }, { "epoch": 2.023018029142998, "grad_norm": 0.4776327997808457, "learning_rate": 4.892654773802904e-06, "loss": 0.5075, "step": 5163 }, { "epoch": 2.0234131884415905, "grad_norm": 0.45100034823235746, "learning_rate": 4.892609352378621e-06, "loss": 0.4954, "step": 5164 }, { "epoch": 2.0238083477401827, "grad_norm": 0.453949322209561, "learning_rate": 4.8925639215576215e-06, "loss": 0.4988, "step": 5165 }, { "epoch": 2.024203507038775, "grad_norm": 0.4485757581479012, "learning_rate": 4.8925184813400835e-06, "loss": 0.4954, "step": 5166 }, { "epoch": 2.024598666337367, "grad_norm": 0.4523702947118703, "learning_rate": 4.892473031726187e-06, "loss": 0.5083, "step": 5167 }, { "epoch": 2.0249938256359594, "grad_norm": 0.4510797356282732, "learning_rate": 4.8924275727161075e-06, "loss": 0.4964, "step": 5168 }, { "epoch": 2.0253889849345517, "grad_norm": 0.464165953716701, "learning_rate": 4.892382104310026e-06, "loss": 0.506, "step": 5169 }, { "epoch": 2.025784144233144, "grad_norm": 0.4598347669774403, "learning_rate": 4.892336626508121e-06, "loss": 0.5139, "step": 5170 }, { "epoch": 2.026179303531736, "grad_norm": 0.44063169455818113, "learning_rate": 4.89229113931057e-06, "loss": 0.5005, "step": 5171 }, { "epoch": 2.0265744628303284, "grad_norm": 0.4520710240225769, "learning_rate": 4.892245642717551e-06, "loss": 0.4917, "step": 5172 }, { "epoch": 2.0269696221289206, "grad_norm": 0.4558674076863556, "learning_rate": 4.8922001367292445e-06, "loss": 0.487, "step": 5173 }, { "epoch": 2.027364781427513, "grad_norm": 0.4637585590513638, "learning_rate": 4.892154621345829e-06, "loss": 0.5094, "step": 5174 }, { "epoch": 2.027759940726105, "grad_norm": 0.45275964455192436, "learning_rate": 4.8921090965674825e-06, "loss": 0.4866, "step": 5175 }, { "epoch": 2.0281551000246973, "grad_norm": 0.4544576816718463, "learning_rate": 4.892063562394384e-06, "loss": 0.5135, "step": 5176 }, { "epoch": 2.0285502593232896, "grad_norm": 0.4713873273127142, "learning_rate": 4.892018018826712e-06, "loss": 0.4952, "step": 5177 }, { "epoch": 2.028945418621882, "grad_norm": 0.44993919688256234, "learning_rate": 4.8919724658646465e-06, "loss": 0.4896, "step": 5178 }, { "epoch": 2.029340577920474, "grad_norm": 0.4689922683938695, "learning_rate": 4.891926903508365e-06, "loss": 0.4828, "step": 5179 }, { "epoch": 2.0297357372190663, "grad_norm": 0.4838250197458008, "learning_rate": 4.891881331758047e-06, "loss": 0.5096, "step": 5180 }, { "epoch": 2.0301308965176585, "grad_norm": 0.4675284522861168, "learning_rate": 4.891835750613872e-06, "loss": 0.4982, "step": 5181 }, { "epoch": 2.030526055816251, "grad_norm": 0.45385770764223, "learning_rate": 4.891790160076018e-06, "loss": 0.5001, "step": 5182 }, { "epoch": 2.030921215114843, "grad_norm": 0.4600669620147579, "learning_rate": 4.8917445601446656e-06, "loss": 0.4839, "step": 5183 }, { "epoch": 2.0313163744134353, "grad_norm": 0.4530041700317838, "learning_rate": 4.891698950819992e-06, "loss": 0.501, "step": 5184 }, { "epoch": 2.0317115337120275, "grad_norm": 0.47142109619231753, "learning_rate": 4.891653332102177e-06, "loss": 0.5274, "step": 5185 }, { "epoch": 2.0321066930106197, "grad_norm": 0.46742031828739516, "learning_rate": 4.891607703991401e-06, "loss": 0.5046, "step": 5186 }, { "epoch": 2.032501852309212, "grad_norm": 0.7198025805905561, "learning_rate": 4.891562066487842e-06, "loss": 0.4781, "step": 5187 }, { "epoch": 2.0328970116078042, "grad_norm": 0.47277610272117676, "learning_rate": 4.891516419591679e-06, "loss": 0.4994, "step": 5188 }, { "epoch": 2.0332921709063965, "grad_norm": 0.49484180211119877, "learning_rate": 4.891470763303092e-06, "loss": 0.4852, "step": 5189 }, { "epoch": 2.0336873302049887, "grad_norm": 0.4859621511332183, "learning_rate": 4.89142509762226e-06, "loss": 0.5134, "step": 5190 }, { "epoch": 2.034082489503581, "grad_norm": 0.4577229468236649, "learning_rate": 4.891379422549361e-06, "loss": 0.4986, "step": 5191 }, { "epoch": 2.034477648802173, "grad_norm": 0.47417760848062124, "learning_rate": 4.891333738084578e-06, "loss": 0.511, "step": 5192 }, { "epoch": 2.0348728081007654, "grad_norm": 0.48539047796098145, "learning_rate": 4.891288044228088e-06, "loss": 0.496, "step": 5193 }, { "epoch": 2.0352679673993577, "grad_norm": 0.46849711925970705, "learning_rate": 4.891242340980069e-06, "loss": 0.5081, "step": 5194 }, { "epoch": 2.03566312669795, "grad_norm": 0.4690072112312264, "learning_rate": 4.891196628340703e-06, "loss": 0.5097, "step": 5195 }, { "epoch": 2.036058285996542, "grad_norm": 0.46808264537794086, "learning_rate": 4.8911509063101685e-06, "loss": 0.488, "step": 5196 }, { "epoch": 2.0364534452951344, "grad_norm": 0.5519283415442977, "learning_rate": 4.891105174888645e-06, "loss": 0.5065, "step": 5197 }, { "epoch": 2.0368486045937266, "grad_norm": 0.46362954842671233, "learning_rate": 4.891059434076313e-06, "loss": 0.4878, "step": 5198 }, { "epoch": 2.0372437638923193, "grad_norm": 0.5173109880832085, "learning_rate": 4.891013683873351e-06, "loss": 0.4982, "step": 5199 }, { "epoch": 2.0376389231909116, "grad_norm": 0.46175244060924875, "learning_rate": 4.890967924279939e-06, "loss": 0.4923, "step": 5200 }, { "epoch": 2.038034082489504, "grad_norm": 0.44259574184550005, "learning_rate": 4.8909221552962574e-06, "loss": 0.4824, "step": 5201 }, { "epoch": 2.038429241788096, "grad_norm": 0.4586966341918503, "learning_rate": 4.890876376922486e-06, "loss": 0.4779, "step": 5202 }, { "epoch": 2.0388244010866883, "grad_norm": 0.46102142747514496, "learning_rate": 4.890830589158802e-06, "loss": 0.49, "step": 5203 }, { "epoch": 2.0392195603852805, "grad_norm": 0.471145434062063, "learning_rate": 4.8907847920053885e-06, "loss": 0.5156, "step": 5204 }, { "epoch": 2.0396147196838728, "grad_norm": 0.4549769045467522, "learning_rate": 4.890738985462424e-06, "loss": 0.4871, "step": 5205 }, { "epoch": 2.040009878982465, "grad_norm": 0.44669007469835803, "learning_rate": 4.890693169530088e-06, "loss": 0.48, "step": 5206 }, { "epoch": 2.0404050382810572, "grad_norm": 0.4617411167739797, "learning_rate": 4.890647344208562e-06, "loss": 0.4818, "step": 5207 }, { "epoch": 2.0408001975796495, "grad_norm": 0.4689178727542056, "learning_rate": 4.8906015094980246e-06, "loss": 0.4983, "step": 5208 }, { "epoch": 2.0411953568782417, "grad_norm": 0.45516172538535027, "learning_rate": 4.890555665398656e-06, "loss": 0.4964, "step": 5209 }, { "epoch": 2.041590516176834, "grad_norm": 0.4391222692490777, "learning_rate": 4.890509811910637e-06, "loss": 0.472, "step": 5210 }, { "epoch": 2.041985675475426, "grad_norm": 0.4739508445179121, "learning_rate": 4.890463949034145e-06, "loss": 0.5124, "step": 5211 }, { "epoch": 2.0423808347740184, "grad_norm": 0.4597047754530406, "learning_rate": 4.890418076769364e-06, "loss": 0.4901, "step": 5212 }, { "epoch": 2.0427759940726107, "grad_norm": 0.4400780410171843, "learning_rate": 4.890372195116472e-06, "loss": 0.4955, "step": 5213 }, { "epoch": 2.043171153371203, "grad_norm": 0.4525419825437649, "learning_rate": 4.890326304075649e-06, "loss": 0.4837, "step": 5214 }, { "epoch": 2.043566312669795, "grad_norm": 0.45871098059276433, "learning_rate": 4.890280403647076e-06, "loss": 0.4951, "step": 5215 }, { "epoch": 2.0439614719683874, "grad_norm": 0.4643059677550961, "learning_rate": 4.890234493830933e-06, "loss": 0.503, "step": 5216 }, { "epoch": 2.0443566312669796, "grad_norm": 0.4499775130020552, "learning_rate": 4.8901885746274e-06, "loss": 0.484, "step": 5217 }, { "epoch": 2.044751790565572, "grad_norm": 0.45820087196407705, "learning_rate": 4.890142646036659e-06, "loss": 0.504, "step": 5218 }, { "epoch": 2.045146949864164, "grad_norm": 0.4532918197234839, "learning_rate": 4.890096708058888e-06, "loss": 0.4833, "step": 5219 }, { "epoch": 2.0455421091627564, "grad_norm": 0.4596033870554187, "learning_rate": 4.890050760694268e-06, "loss": 0.4818, "step": 5220 }, { "epoch": 2.0459372684613486, "grad_norm": 0.5275495863257932, "learning_rate": 4.890004803942982e-06, "loss": 0.5084, "step": 5221 }, { "epoch": 2.046332427759941, "grad_norm": 0.44983675009756247, "learning_rate": 4.889958837805207e-06, "loss": 0.4839, "step": 5222 }, { "epoch": 2.046727587058533, "grad_norm": 0.46932242931178536, "learning_rate": 4.889912862281124e-06, "loss": 0.4891, "step": 5223 }, { "epoch": 2.0471227463571253, "grad_norm": 0.4580616059487756, "learning_rate": 4.889866877370915e-06, "loss": 0.4697, "step": 5224 }, { "epoch": 2.0475179056557176, "grad_norm": 0.4543027878441082, "learning_rate": 4.8898208830747615e-06, "loss": 0.4971, "step": 5225 }, { "epoch": 2.04791306495431, "grad_norm": 0.45234992583478045, "learning_rate": 4.889774879392841e-06, "loss": 0.5034, "step": 5226 }, { "epoch": 2.048308224252902, "grad_norm": 0.45898065072529526, "learning_rate": 4.889728866325337e-06, "loss": 0.5033, "step": 5227 }, { "epoch": 2.0487033835514943, "grad_norm": 0.4601022979706201, "learning_rate": 4.889682843872429e-06, "loss": 0.4865, "step": 5228 }, { "epoch": 2.0490985428500865, "grad_norm": 0.44681491251197947, "learning_rate": 4.889636812034298e-06, "loss": 0.5066, "step": 5229 }, { "epoch": 2.0494937021486788, "grad_norm": 0.4506415649381756, "learning_rate": 4.889590770811125e-06, "loss": 0.4907, "step": 5230 }, { "epoch": 2.049888861447271, "grad_norm": 0.45120214151761606, "learning_rate": 4.88954472020309e-06, "loss": 0.5062, "step": 5231 }, { "epoch": 2.0502840207458632, "grad_norm": 0.45154401506062164, "learning_rate": 4.8894986602103735e-06, "loss": 0.4961, "step": 5232 }, { "epoch": 2.0506791800444555, "grad_norm": 0.4511811539872512, "learning_rate": 4.889452590833158e-06, "loss": 0.4858, "step": 5233 }, { "epoch": 2.0510743393430477, "grad_norm": 0.44882241957643904, "learning_rate": 4.8894065120716235e-06, "loss": 0.5002, "step": 5234 }, { "epoch": 2.05146949864164, "grad_norm": 0.44681664863949433, "learning_rate": 4.889360423925952e-06, "loss": 0.4889, "step": 5235 }, { "epoch": 2.051864657940232, "grad_norm": 0.45495838986619974, "learning_rate": 4.889314326396323e-06, "loss": 0.4913, "step": 5236 }, { "epoch": 2.0522598172388244, "grad_norm": 0.45058039719748044, "learning_rate": 4.889268219482918e-06, "loss": 0.4974, "step": 5237 }, { "epoch": 2.0526549765374167, "grad_norm": 0.4673937972973294, "learning_rate": 4.889222103185919e-06, "loss": 0.4963, "step": 5238 }, { "epoch": 2.053050135836009, "grad_norm": 0.4650737282011602, "learning_rate": 4.889175977505505e-06, "loss": 0.4903, "step": 5239 }, { "epoch": 2.053445295134601, "grad_norm": 0.45116470306677475, "learning_rate": 4.88912984244186e-06, "loss": 0.4992, "step": 5240 }, { "epoch": 2.0538404544331934, "grad_norm": 0.5298176727549352, "learning_rate": 4.889083697995163e-06, "loss": 0.4919, "step": 5241 }, { "epoch": 2.0542356137317856, "grad_norm": 0.45177097015636897, "learning_rate": 4.889037544165596e-06, "loss": 0.4904, "step": 5242 }, { "epoch": 2.054630773030378, "grad_norm": 0.5119412810802527, "learning_rate": 4.8889913809533404e-06, "loss": 0.4968, "step": 5243 }, { "epoch": 2.05502593232897, "grad_norm": 0.4616855361620579, "learning_rate": 4.888945208358577e-06, "loss": 0.5005, "step": 5244 }, { "epoch": 2.0554210916275624, "grad_norm": 0.4672209154136625, "learning_rate": 4.888899026381487e-06, "loss": 0.5107, "step": 5245 }, { "epoch": 2.0558162509261546, "grad_norm": 0.4528232853036076, "learning_rate": 4.888852835022253e-06, "loss": 0.4762, "step": 5246 }, { "epoch": 2.056211410224747, "grad_norm": 0.4558490462001704, "learning_rate": 4.8888066342810555e-06, "loss": 0.491, "step": 5247 }, { "epoch": 2.056606569523339, "grad_norm": 0.4527416296938898, "learning_rate": 4.888760424158077e-06, "loss": 0.4922, "step": 5248 }, { "epoch": 2.0570017288219313, "grad_norm": 0.46237981652455606, "learning_rate": 4.8887142046534975e-06, "loss": 0.5024, "step": 5249 }, { "epoch": 2.0573968881205236, "grad_norm": 0.4637558739077874, "learning_rate": 4.888667975767499e-06, "loss": 0.4894, "step": 5250 }, { "epoch": 2.057792047419116, "grad_norm": 0.4744951570899205, "learning_rate": 4.888621737500262e-06, "loss": 0.4908, "step": 5251 }, { "epoch": 2.058187206717708, "grad_norm": 0.4584214020537685, "learning_rate": 4.888575489851971e-06, "loss": 0.5037, "step": 5252 }, { "epoch": 2.0585823660163003, "grad_norm": 0.4727044592007818, "learning_rate": 4.888529232822805e-06, "loss": 0.4776, "step": 5253 }, { "epoch": 2.0589775253148925, "grad_norm": 0.4632461314737739, "learning_rate": 4.888482966412947e-06, "loss": 0.4921, "step": 5254 }, { "epoch": 2.0593726846134848, "grad_norm": 0.46365343578610424, "learning_rate": 4.888436690622578e-06, "loss": 0.4982, "step": 5255 }, { "epoch": 2.059767843912077, "grad_norm": 0.4927502347234346, "learning_rate": 4.8883904054518805e-06, "loss": 0.5025, "step": 5256 }, { "epoch": 2.0601630032106693, "grad_norm": 0.45431480591589724, "learning_rate": 4.888344110901035e-06, "loss": 0.4862, "step": 5257 }, { "epoch": 2.0605581625092615, "grad_norm": 0.44639395998441367, "learning_rate": 4.888297806970225e-06, "loss": 0.5002, "step": 5258 }, { "epoch": 2.0609533218078537, "grad_norm": 0.4581725919289095, "learning_rate": 4.888251493659631e-06, "loss": 0.4763, "step": 5259 }, { "epoch": 2.061348481106446, "grad_norm": 0.455160272080671, "learning_rate": 4.888205170969435e-06, "loss": 0.499, "step": 5260 }, { "epoch": 2.061743640405038, "grad_norm": 0.4643189428275059, "learning_rate": 4.888158838899819e-06, "loss": 0.4932, "step": 5261 }, { "epoch": 2.0621387997036305, "grad_norm": 0.4463638667832112, "learning_rate": 4.888112497450966e-06, "loss": 0.5011, "step": 5262 }, { "epoch": 2.0625339590022227, "grad_norm": 0.44491650271349903, "learning_rate": 4.888066146623058e-06, "loss": 0.4968, "step": 5263 }, { "epoch": 2.062929118300815, "grad_norm": 0.4741916119379606, "learning_rate": 4.888019786416275e-06, "loss": 0.5109, "step": 5264 }, { "epoch": 2.063324277599407, "grad_norm": 0.46077105210136354, "learning_rate": 4.887973416830801e-06, "loss": 0.4847, "step": 5265 }, { "epoch": 2.0637194368979994, "grad_norm": 0.5849360991852471, "learning_rate": 4.887927037866817e-06, "loss": 0.4926, "step": 5266 }, { "epoch": 2.0641145961965917, "grad_norm": 0.4633200832882633, "learning_rate": 4.8878806495245055e-06, "loss": 0.4919, "step": 5267 }, { "epoch": 2.064509755495184, "grad_norm": 0.46236301178780204, "learning_rate": 4.887834251804049e-06, "loss": 0.4954, "step": 5268 }, { "epoch": 2.064904914793776, "grad_norm": 0.457629583658407, "learning_rate": 4.8877878447056305e-06, "loss": 0.4923, "step": 5269 }, { "epoch": 2.0653000740923684, "grad_norm": 0.4673120651485547, "learning_rate": 4.88774142822943e-06, "loss": 0.4808, "step": 5270 }, { "epoch": 2.0656952333909606, "grad_norm": 0.4568906136006192, "learning_rate": 4.887695002375631e-06, "loss": 0.485, "step": 5271 }, { "epoch": 2.066090392689553, "grad_norm": 0.4734779549670619, "learning_rate": 4.8876485671444175e-06, "loss": 0.4925, "step": 5272 }, { "epoch": 2.066485551988145, "grad_norm": 0.47832808820634337, "learning_rate": 4.887602122535969e-06, "loss": 0.4957, "step": 5273 }, { "epoch": 2.0668807112867373, "grad_norm": 0.4749938197663773, "learning_rate": 4.887555668550469e-06, "loss": 0.5011, "step": 5274 }, { "epoch": 2.0672758705853296, "grad_norm": 0.46181910473113197, "learning_rate": 4.887509205188101e-06, "loss": 0.4954, "step": 5275 }, { "epoch": 2.067671029883922, "grad_norm": 0.4476900675547803, "learning_rate": 4.887462732449046e-06, "loss": 0.5033, "step": 5276 }, { "epoch": 2.068066189182514, "grad_norm": 0.4540259110444361, "learning_rate": 4.887416250333487e-06, "loss": 0.4902, "step": 5277 }, { "epoch": 2.0684613484811063, "grad_norm": 0.448149981706839, "learning_rate": 4.8873697588416075e-06, "loss": 0.4998, "step": 5278 }, { "epoch": 2.0688565077796985, "grad_norm": 0.44190241176056827, "learning_rate": 4.887323257973589e-06, "loss": 0.4946, "step": 5279 }, { "epoch": 2.069251667078291, "grad_norm": 0.4462613061115609, "learning_rate": 4.887276747729614e-06, "loss": 0.4817, "step": 5280 }, { "epoch": 2.069646826376883, "grad_norm": 0.45013660019262763, "learning_rate": 4.887230228109866e-06, "loss": 0.4897, "step": 5281 }, { "epoch": 2.0700419856754753, "grad_norm": 0.4482753274443419, "learning_rate": 4.887183699114526e-06, "loss": 0.5006, "step": 5282 }, { "epoch": 2.0704371449740675, "grad_norm": 0.46170470070203723, "learning_rate": 4.88713716074378e-06, "loss": 0.5052, "step": 5283 }, { "epoch": 2.0708323042726597, "grad_norm": 0.4669933900589111, "learning_rate": 4.887090612997808e-06, "loss": 0.4917, "step": 5284 }, { "epoch": 2.071227463571252, "grad_norm": 0.483843226668854, "learning_rate": 4.887044055876793e-06, "loss": 0.5046, "step": 5285 }, { "epoch": 2.071622622869844, "grad_norm": 0.46714368758553043, "learning_rate": 4.886997489380919e-06, "loss": 0.489, "step": 5286 }, { "epoch": 2.0720177821684365, "grad_norm": 0.44469009126903564, "learning_rate": 4.886950913510368e-06, "loss": 0.4925, "step": 5287 }, { "epoch": 2.0724129414670287, "grad_norm": 0.46045303664787235, "learning_rate": 4.886904328265323e-06, "loss": 0.4839, "step": 5288 }, { "epoch": 2.072808100765621, "grad_norm": 0.4601616045035097, "learning_rate": 4.886857733645968e-06, "loss": 0.4876, "step": 5289 }, { "epoch": 2.073203260064213, "grad_norm": 0.48030791644212956, "learning_rate": 4.886811129652484e-06, "loss": 0.5098, "step": 5290 }, { "epoch": 2.0735984193628054, "grad_norm": 0.4499859205882746, "learning_rate": 4.886764516285057e-06, "loss": 0.4892, "step": 5291 }, { "epoch": 2.0739935786613977, "grad_norm": 0.46426479744305693, "learning_rate": 4.886717893543868e-06, "loss": 0.4841, "step": 5292 }, { "epoch": 2.07438873795999, "grad_norm": 0.4559454042901413, "learning_rate": 4.886671261429099e-06, "loss": 0.488, "step": 5293 }, { "epoch": 2.074783897258582, "grad_norm": 0.4563383108320409, "learning_rate": 4.8866246199409354e-06, "loss": 0.4792, "step": 5294 }, { "epoch": 2.0751790565571744, "grad_norm": 0.4641041054840762, "learning_rate": 4.886577969079559e-06, "loss": 0.4883, "step": 5295 }, { "epoch": 2.075574215855767, "grad_norm": 0.47969805202822735, "learning_rate": 4.8865313088451544e-06, "loss": 0.5032, "step": 5296 }, { "epoch": 2.0759693751543593, "grad_norm": 0.46863300818021786, "learning_rate": 4.886484639237903e-06, "loss": 0.498, "step": 5297 }, { "epoch": 2.0763645344529515, "grad_norm": 0.4657095895273564, "learning_rate": 4.88643796025799e-06, "loss": 0.4925, "step": 5298 }, { "epoch": 2.076759693751544, "grad_norm": 0.6267052883363237, "learning_rate": 4.886391271905597e-06, "loss": 0.5167, "step": 5299 }, { "epoch": 2.077154853050136, "grad_norm": 0.4773221517600891, "learning_rate": 4.886344574180909e-06, "loss": 0.4994, "step": 5300 }, { "epoch": 2.0775500123487283, "grad_norm": 0.48647171806501055, "learning_rate": 4.886297867084109e-06, "loss": 0.4895, "step": 5301 }, { "epoch": 2.0779451716473205, "grad_norm": 0.47734592420468774, "learning_rate": 4.886251150615379e-06, "loss": 0.4964, "step": 5302 }, { "epoch": 2.0783403309459128, "grad_norm": 0.4822474079732969, "learning_rate": 4.886204424774904e-06, "loss": 0.5145, "step": 5303 }, { "epoch": 2.078735490244505, "grad_norm": 0.4648912162923145, "learning_rate": 4.886157689562866e-06, "loss": 0.5063, "step": 5304 }, { "epoch": 2.0791306495430972, "grad_norm": 0.45932677809466166, "learning_rate": 4.886110944979451e-06, "loss": 0.495, "step": 5305 }, { "epoch": 2.0795258088416895, "grad_norm": 0.47706737183878567, "learning_rate": 4.88606419102484e-06, "loss": 0.5103, "step": 5306 }, { "epoch": 2.0799209681402817, "grad_norm": 0.4532623596914375, "learning_rate": 4.886017427699218e-06, "loss": 0.4873, "step": 5307 }, { "epoch": 2.080316127438874, "grad_norm": 0.4483128492884054, "learning_rate": 4.885970655002768e-06, "loss": 0.5029, "step": 5308 }, { "epoch": 2.080711286737466, "grad_norm": 0.46648648387318337, "learning_rate": 4.885923872935675e-06, "loss": 0.4932, "step": 5309 }, { "epoch": 2.0811064460360584, "grad_norm": 0.470504146656533, "learning_rate": 4.885877081498122e-06, "loss": 0.4986, "step": 5310 }, { "epoch": 2.0815016053346507, "grad_norm": 0.46815656364735725, "learning_rate": 4.8858302806902925e-06, "loss": 0.5053, "step": 5311 }, { "epoch": 2.081896764633243, "grad_norm": 0.47676947419158733, "learning_rate": 4.88578347051237e-06, "loss": 0.5048, "step": 5312 }, { "epoch": 2.082291923931835, "grad_norm": 0.4551948922256932, "learning_rate": 4.885736650964539e-06, "loss": 0.4846, "step": 5313 }, { "epoch": 2.0826870832304274, "grad_norm": 0.4689297776862242, "learning_rate": 4.885689822046983e-06, "loss": 0.5138, "step": 5314 }, { "epoch": 2.0830822425290196, "grad_norm": 0.5266371128832054, "learning_rate": 4.885642983759885e-06, "loss": 0.502, "step": 5315 }, { "epoch": 2.083477401827612, "grad_norm": 0.45232336122965455, "learning_rate": 4.885596136103432e-06, "loss": 0.4926, "step": 5316 }, { "epoch": 2.083872561126204, "grad_norm": 0.45815345336313995, "learning_rate": 4.885549279077805e-06, "loss": 0.4706, "step": 5317 }, { "epoch": 2.0842677204247964, "grad_norm": 0.4508115268547949, "learning_rate": 4.885502412683189e-06, "loss": 0.5158, "step": 5318 }, { "epoch": 2.0846628797233886, "grad_norm": 0.4506873596094119, "learning_rate": 4.885455536919767e-06, "loss": 0.4826, "step": 5319 }, { "epoch": 2.085058039021981, "grad_norm": 0.47613431415557844, "learning_rate": 4.885408651787725e-06, "loss": 0.5037, "step": 5320 }, { "epoch": 2.085453198320573, "grad_norm": 0.4603502263078611, "learning_rate": 4.885361757287247e-06, "loss": 0.4886, "step": 5321 }, { "epoch": 2.0858483576191653, "grad_norm": 0.46036610887217155, "learning_rate": 4.8853148534185165e-06, "loss": 0.4953, "step": 5322 }, { "epoch": 2.0862435169177576, "grad_norm": 0.4529241365840035, "learning_rate": 4.885267940181717e-06, "loss": 0.4953, "step": 5323 }, { "epoch": 2.08663867621635, "grad_norm": 0.48675482273393644, "learning_rate": 4.885221017577033e-06, "loss": 0.5134, "step": 5324 }, { "epoch": 2.087033835514942, "grad_norm": 0.45569874226039914, "learning_rate": 4.88517408560465e-06, "loss": 0.5097, "step": 5325 }, { "epoch": 2.0874289948135343, "grad_norm": 0.4673253599053655, "learning_rate": 4.885127144264752e-06, "loss": 0.5054, "step": 5326 }, { "epoch": 2.0878241541121265, "grad_norm": 0.4531561763675708, "learning_rate": 4.885080193557522e-06, "loss": 0.5016, "step": 5327 }, { "epoch": 2.0882193134107188, "grad_norm": 0.4872090735746867, "learning_rate": 4.885033233483146e-06, "loss": 0.5176, "step": 5328 }, { "epoch": 2.088614472709311, "grad_norm": 0.4513837913446904, "learning_rate": 4.884986264041808e-06, "loss": 0.469, "step": 5329 }, { "epoch": 2.0890096320079032, "grad_norm": 0.4590797104168329, "learning_rate": 4.884939285233691e-06, "loss": 0.504, "step": 5330 }, { "epoch": 2.0894047913064955, "grad_norm": 0.4627972424027732, "learning_rate": 4.884892297058981e-06, "loss": 0.4901, "step": 5331 }, { "epoch": 2.0897999506050877, "grad_norm": 0.44688340629975803, "learning_rate": 4.884845299517863e-06, "loss": 0.4877, "step": 5332 }, { "epoch": 2.09019510990368, "grad_norm": 0.4611503172302672, "learning_rate": 4.88479829261052e-06, "loss": 0.4921, "step": 5333 }, { "epoch": 2.090590269202272, "grad_norm": 0.46829455437768785, "learning_rate": 4.884751276337138e-06, "loss": 0.5113, "step": 5334 }, { "epoch": 2.0909854285008644, "grad_norm": 0.4473009959944776, "learning_rate": 4.8847042506979e-06, "loss": 0.4904, "step": 5335 }, { "epoch": 2.0913805877994567, "grad_norm": 0.4604208483059509, "learning_rate": 4.8846572156929936e-06, "loss": 0.5003, "step": 5336 }, { "epoch": 2.091775747098049, "grad_norm": 0.45767779762723915, "learning_rate": 4.8846101713226005e-06, "loss": 0.4935, "step": 5337 }, { "epoch": 2.092170906396641, "grad_norm": 0.4615944313852972, "learning_rate": 4.884563117586907e-06, "loss": 0.4979, "step": 5338 }, { "epoch": 2.0925660656952334, "grad_norm": 0.4810789677245922, "learning_rate": 4.884516054486097e-06, "loss": 0.5252, "step": 5339 }, { "epoch": 2.0929612249938256, "grad_norm": 0.4573861349101158, "learning_rate": 4.884468982020357e-06, "loss": 0.5129, "step": 5340 }, { "epoch": 2.093356384292418, "grad_norm": 0.4528184175451582, "learning_rate": 4.88442190018987e-06, "loss": 0.5138, "step": 5341 }, { "epoch": 2.09375154359101, "grad_norm": 0.4566897018760855, "learning_rate": 4.884374808994822e-06, "loss": 0.5044, "step": 5342 }, { "epoch": 2.0941467028896024, "grad_norm": 0.4516180831443502, "learning_rate": 4.884327708435397e-06, "loss": 0.4991, "step": 5343 }, { "epoch": 2.0945418621881946, "grad_norm": 0.44852554152023116, "learning_rate": 4.884280598511781e-06, "loss": 0.5032, "step": 5344 }, { "epoch": 2.094937021486787, "grad_norm": 0.45516969248231826, "learning_rate": 4.8842334792241586e-06, "loss": 0.4983, "step": 5345 }, { "epoch": 2.095332180785379, "grad_norm": 0.46631444223982815, "learning_rate": 4.884186350572715e-06, "loss": 0.502, "step": 5346 }, { "epoch": 2.0957273400839713, "grad_norm": 0.47398673780754186, "learning_rate": 4.884139212557635e-06, "loss": 0.4953, "step": 5347 }, { "epoch": 2.0961224993825636, "grad_norm": 0.47579743471894775, "learning_rate": 4.884092065179103e-06, "loss": 0.5056, "step": 5348 }, { "epoch": 2.096517658681156, "grad_norm": 0.48515241623044925, "learning_rate": 4.884044908437306e-06, "loss": 0.5059, "step": 5349 }, { "epoch": 2.096912817979748, "grad_norm": 0.4652147279266291, "learning_rate": 4.883997742332429e-06, "loss": 0.4976, "step": 5350 }, { "epoch": 2.0973079772783403, "grad_norm": 0.4453104642842236, "learning_rate": 4.883950566864656e-06, "loss": 0.5074, "step": 5351 }, { "epoch": 2.0977031365769325, "grad_norm": 0.45143539446002273, "learning_rate": 4.883903382034172e-06, "loss": 0.4835, "step": 5352 }, { "epoch": 2.0980982958755248, "grad_norm": 0.4628598192673955, "learning_rate": 4.883856187841164e-06, "loss": 0.5055, "step": 5353 }, { "epoch": 2.098493455174117, "grad_norm": 0.4564188127449865, "learning_rate": 4.883808984285816e-06, "loss": 0.4958, "step": 5354 }, { "epoch": 2.0988886144727092, "grad_norm": 0.45860123136973613, "learning_rate": 4.8837617713683146e-06, "loss": 0.4906, "step": 5355 }, { "epoch": 2.0992837737713015, "grad_norm": 0.4732141389245651, "learning_rate": 4.883714549088844e-06, "loss": 0.493, "step": 5356 }, { "epoch": 2.0996789330698937, "grad_norm": 0.4611681662392404, "learning_rate": 4.8836673174475894e-06, "loss": 0.5004, "step": 5357 }, { "epoch": 2.100074092368486, "grad_norm": 0.4779009985374861, "learning_rate": 4.883620076444738e-06, "loss": 0.4939, "step": 5358 }, { "epoch": 2.100469251667078, "grad_norm": 0.4644243183886433, "learning_rate": 4.883572826080474e-06, "loss": 0.4901, "step": 5359 }, { "epoch": 2.1008644109656704, "grad_norm": 0.4730815340398359, "learning_rate": 4.883525566354983e-06, "loss": 0.4929, "step": 5360 }, { "epoch": 2.1012595702642627, "grad_norm": 0.4645212217941286, "learning_rate": 4.883478297268451e-06, "loss": 0.485, "step": 5361 }, { "epoch": 2.101654729562855, "grad_norm": 0.5883162574204609, "learning_rate": 4.883431018821064e-06, "loss": 0.4936, "step": 5362 }, { "epoch": 2.102049888861447, "grad_norm": 0.44813227041664244, "learning_rate": 4.883383731013007e-06, "loss": 0.4999, "step": 5363 }, { "epoch": 2.1024450481600394, "grad_norm": 0.4693538528184338, "learning_rate": 4.883336433844465e-06, "loss": 0.5014, "step": 5364 }, { "epoch": 2.1028402074586316, "grad_norm": 0.4613356297326133, "learning_rate": 4.883289127315627e-06, "loss": 0.4986, "step": 5365 }, { "epoch": 2.103235366757224, "grad_norm": 0.47462410453708387, "learning_rate": 4.883241811426675e-06, "loss": 0.4964, "step": 5366 }, { "epoch": 2.103630526055816, "grad_norm": 0.4585830157369119, "learning_rate": 4.883194486177796e-06, "loss": 0.5305, "step": 5367 }, { "epoch": 2.1040256853544084, "grad_norm": 0.5374049651711528, "learning_rate": 4.883147151569178e-06, "loss": 0.5013, "step": 5368 }, { "epoch": 2.1044208446530006, "grad_norm": 0.4505696100137808, "learning_rate": 4.883099807601003e-06, "loss": 0.4905, "step": 5369 }, { "epoch": 2.104816003951593, "grad_norm": 0.472748675203154, "learning_rate": 4.88305245427346e-06, "loss": 0.5043, "step": 5370 }, { "epoch": 2.105211163250185, "grad_norm": 0.4703878721715555, "learning_rate": 4.883005091586734e-06, "loss": 0.484, "step": 5371 }, { "epoch": 2.1056063225487773, "grad_norm": 0.4556588761245561, "learning_rate": 4.882957719541011e-06, "loss": 0.5031, "step": 5372 }, { "epoch": 2.1060014818473696, "grad_norm": 0.44797746752549056, "learning_rate": 4.882910338136478e-06, "loss": 0.4944, "step": 5373 }, { "epoch": 2.106396641145962, "grad_norm": 0.4886233397662977, "learning_rate": 4.882862947373318e-06, "loss": 0.5049, "step": 5374 }, { "epoch": 2.106791800444554, "grad_norm": 0.4669275499791975, "learning_rate": 4.882815547251721e-06, "loss": 0.5095, "step": 5375 }, { "epoch": 2.1071869597431463, "grad_norm": 0.4797695708025504, "learning_rate": 4.8827681377718715e-06, "loss": 0.5128, "step": 5376 }, { "epoch": 2.1075821190417385, "grad_norm": 0.45320448299389626, "learning_rate": 4.8827207189339545e-06, "loss": 0.4986, "step": 5377 }, { "epoch": 2.1079772783403308, "grad_norm": 0.44937616960762794, "learning_rate": 4.882673290738158e-06, "loss": 0.499, "step": 5378 }, { "epoch": 2.108372437638923, "grad_norm": 0.4609581797871518, "learning_rate": 4.8826258531846686e-06, "loss": 0.5022, "step": 5379 }, { "epoch": 2.1087675969375153, "grad_norm": 0.4582795163905733, "learning_rate": 4.882578406273671e-06, "loss": 0.4887, "step": 5380 }, { "epoch": 2.1091627562361075, "grad_norm": 0.45000378134977503, "learning_rate": 4.882530950005351e-06, "loss": 0.5033, "step": 5381 }, { "epoch": 2.1095579155346997, "grad_norm": 0.4535208499466116, "learning_rate": 4.882483484379898e-06, "loss": 0.4893, "step": 5382 }, { "epoch": 2.109953074833292, "grad_norm": 0.4976229552655962, "learning_rate": 4.8824360093974945e-06, "loss": 0.4984, "step": 5383 }, { "epoch": 2.110348234131884, "grad_norm": 0.4747654674353325, "learning_rate": 4.88238852505833e-06, "loss": 0.5173, "step": 5384 }, { "epoch": 2.1107433934304765, "grad_norm": 0.45542034736390435, "learning_rate": 4.88234103136259e-06, "loss": 0.4833, "step": 5385 }, { "epoch": 2.1111385527290687, "grad_norm": 0.46024000204519655, "learning_rate": 4.882293528310462e-06, "loss": 0.5103, "step": 5386 }, { "epoch": 2.111533712027661, "grad_norm": 0.4874077584347664, "learning_rate": 4.882246015902131e-06, "loss": 0.5052, "step": 5387 }, { "epoch": 2.1119288713262536, "grad_norm": 0.44865374080295306, "learning_rate": 4.882198494137785e-06, "loss": 0.4952, "step": 5388 }, { "epoch": 2.112324030624846, "grad_norm": 0.46109394452242536, "learning_rate": 4.882150963017609e-06, "loss": 0.5005, "step": 5389 }, { "epoch": 2.112719189923438, "grad_norm": 0.45930292764238206, "learning_rate": 4.88210342254179e-06, "loss": 0.4739, "step": 5390 }, { "epoch": 2.1131143492220303, "grad_norm": 0.5451627115880612, "learning_rate": 4.882055872710516e-06, "loss": 0.5029, "step": 5391 }, { "epoch": 2.1135095085206226, "grad_norm": 0.45461534307695234, "learning_rate": 4.882008313523973e-06, "loss": 0.4902, "step": 5392 }, { "epoch": 2.113904667819215, "grad_norm": 0.4700402964260748, "learning_rate": 4.881960744982348e-06, "loss": 0.5006, "step": 5393 }, { "epoch": 2.114299827117807, "grad_norm": 0.4558104260192114, "learning_rate": 4.881913167085826e-06, "loss": 0.4926, "step": 5394 }, { "epoch": 2.1146949864163993, "grad_norm": 0.46103896695677804, "learning_rate": 4.881865579834598e-06, "loss": 0.5009, "step": 5395 }, { "epoch": 2.1150901457149915, "grad_norm": 0.46008105522327936, "learning_rate": 4.881817983228847e-06, "loss": 0.5072, "step": 5396 }, { "epoch": 2.115485305013584, "grad_norm": 0.459804457470388, "learning_rate": 4.881770377268761e-06, "loss": 0.4966, "step": 5397 }, { "epoch": 2.115880464312176, "grad_norm": 0.4564873017201415, "learning_rate": 4.8817227619545274e-06, "loss": 0.5065, "step": 5398 }, { "epoch": 2.1162756236107683, "grad_norm": 0.4572790123320898, "learning_rate": 4.881675137286334e-06, "loss": 0.4917, "step": 5399 }, { "epoch": 2.1166707829093605, "grad_norm": 0.4583018544454832, "learning_rate": 4.881627503264365e-06, "loss": 0.5122, "step": 5400 }, { "epoch": 2.1170659422079527, "grad_norm": 0.45766918038131965, "learning_rate": 4.881579859888811e-06, "loss": 0.5008, "step": 5401 }, { "epoch": 2.117461101506545, "grad_norm": 0.47089609794076337, "learning_rate": 4.881532207159857e-06, "loss": 0.5111, "step": 5402 }, { "epoch": 2.1178562608051372, "grad_norm": 0.48698638605698286, "learning_rate": 4.881484545077691e-06, "loss": 0.4955, "step": 5403 }, { "epoch": 2.1182514201037295, "grad_norm": 0.47367199549589856, "learning_rate": 4.881436873642499e-06, "loss": 0.4843, "step": 5404 }, { "epoch": 2.1186465794023217, "grad_norm": 0.4548370429625071, "learning_rate": 4.881389192854469e-06, "loss": 0.5109, "step": 5405 }, { "epoch": 2.119041738700914, "grad_norm": 0.46207813913976026, "learning_rate": 4.881341502713789e-06, "loss": 0.5102, "step": 5406 }, { "epoch": 2.119436897999506, "grad_norm": 0.4665523026201837, "learning_rate": 4.881293803220646e-06, "loss": 0.4937, "step": 5407 }, { "epoch": 2.1198320572980984, "grad_norm": 0.44584389144997866, "learning_rate": 4.881246094375226e-06, "loss": 0.4929, "step": 5408 }, { "epoch": 2.1202272165966907, "grad_norm": 0.45411441910971817, "learning_rate": 4.881198376177717e-06, "loss": 0.49, "step": 5409 }, { "epoch": 2.120622375895283, "grad_norm": 0.45916208922818735, "learning_rate": 4.8811506486283075e-06, "loss": 0.4891, "step": 5410 }, { "epoch": 2.121017535193875, "grad_norm": 0.45426066117787084, "learning_rate": 4.881102911727184e-06, "loss": 0.4934, "step": 5411 }, { "epoch": 2.1214126944924674, "grad_norm": 0.4676617535350159, "learning_rate": 4.881055165474535e-06, "loss": 0.5035, "step": 5412 }, { "epoch": 2.1218078537910596, "grad_norm": 0.47051348318804287, "learning_rate": 4.881007409870546e-06, "loss": 0.5064, "step": 5413 }, { "epoch": 2.122203013089652, "grad_norm": 0.4496266026428253, "learning_rate": 4.880959644915406e-06, "loss": 0.5047, "step": 5414 }, { "epoch": 2.122598172388244, "grad_norm": 0.4595779522195174, "learning_rate": 4.880911870609302e-06, "loss": 0.4893, "step": 5415 }, { "epoch": 2.1229933316868363, "grad_norm": 0.4569941314020582, "learning_rate": 4.880864086952423e-06, "loss": 0.4922, "step": 5416 }, { "epoch": 2.1233884909854286, "grad_norm": 0.469415079482959, "learning_rate": 4.880816293944955e-06, "loss": 0.4938, "step": 5417 }, { "epoch": 2.123783650284021, "grad_norm": 0.4447409912431963, "learning_rate": 4.880768491587085e-06, "loss": 0.5183, "step": 5418 }, { "epoch": 2.124178809582613, "grad_norm": 0.45255535919526285, "learning_rate": 4.880720679879004e-06, "loss": 0.4958, "step": 5419 }, { "epoch": 2.1245739688812053, "grad_norm": 0.4614305972818154, "learning_rate": 4.880672858820897e-06, "loss": 0.4974, "step": 5420 }, { "epoch": 2.1249691281797976, "grad_norm": 0.47210760737931706, "learning_rate": 4.880625028412952e-06, "loss": 0.4918, "step": 5421 }, { "epoch": 2.12536428747839, "grad_norm": 0.46822573176234256, "learning_rate": 4.880577188655359e-06, "loss": 0.5063, "step": 5422 }, { "epoch": 2.125759446776982, "grad_norm": 0.480592867802166, "learning_rate": 4.880529339548303e-06, "loss": 0.4923, "step": 5423 }, { "epoch": 2.1261546060755743, "grad_norm": 0.45661426326462, "learning_rate": 4.880481481091974e-06, "loss": 0.5092, "step": 5424 }, { "epoch": 2.1265497653741665, "grad_norm": 0.4704280977766564, "learning_rate": 4.8804336132865595e-06, "loss": 0.5105, "step": 5425 }, { "epoch": 2.1269449246727588, "grad_norm": 0.4618387396924572, "learning_rate": 4.880385736132246e-06, "loss": 0.4752, "step": 5426 }, { "epoch": 2.127340083971351, "grad_norm": 0.45751503112924735, "learning_rate": 4.8803378496292244e-06, "loss": 0.4965, "step": 5427 }, { "epoch": 2.1277352432699432, "grad_norm": 0.4641374268101188, "learning_rate": 4.88028995377768e-06, "loss": 0.5184, "step": 5428 }, { "epoch": 2.1281304025685355, "grad_norm": 0.46565955389095914, "learning_rate": 4.880242048577802e-06, "loss": 0.5042, "step": 5429 }, { "epoch": 2.1285255618671277, "grad_norm": 0.45584303689164063, "learning_rate": 4.8801941340297795e-06, "loss": 0.5004, "step": 5430 }, { "epoch": 2.12892072116572, "grad_norm": 0.4710850087326468, "learning_rate": 4.8801462101338e-06, "loss": 0.5192, "step": 5431 }, { "epoch": 2.129315880464312, "grad_norm": 0.44061458101376594, "learning_rate": 4.88009827689005e-06, "loss": 0.4933, "step": 5432 }, { "epoch": 2.1297110397629044, "grad_norm": 0.4519911211493473, "learning_rate": 4.88005033429872e-06, "loss": 0.4949, "step": 5433 }, { "epoch": 2.1301061990614967, "grad_norm": 0.4504066589256489, "learning_rate": 4.880002382359998e-06, "loss": 0.4791, "step": 5434 }, { "epoch": 2.130501358360089, "grad_norm": 0.45764033049204544, "learning_rate": 4.879954421074071e-06, "loss": 0.4903, "step": 5435 }, { "epoch": 2.130896517658681, "grad_norm": 0.4706731789649233, "learning_rate": 4.879906450441129e-06, "loss": 0.5127, "step": 5436 }, { "epoch": 2.1312916769572734, "grad_norm": 0.44961446232384666, "learning_rate": 4.8798584704613585e-06, "loss": 0.4987, "step": 5437 }, { "epoch": 2.1316868362558656, "grad_norm": 0.47280770131697647, "learning_rate": 4.87981048113495e-06, "loss": 0.5161, "step": 5438 }, { "epoch": 2.132081995554458, "grad_norm": 0.46339098157068087, "learning_rate": 4.879762482462091e-06, "loss": 0.4894, "step": 5439 }, { "epoch": 2.13247715485305, "grad_norm": 0.4487372634389615, "learning_rate": 4.87971447444297e-06, "loss": 0.5221, "step": 5440 }, { "epoch": 2.1328723141516424, "grad_norm": 0.4562917626396976, "learning_rate": 4.879666457077775e-06, "loss": 0.524, "step": 5441 }, { "epoch": 2.1332674734502346, "grad_norm": 0.4821307353976656, "learning_rate": 4.879618430366696e-06, "loss": 0.5079, "step": 5442 }, { "epoch": 2.133662632748827, "grad_norm": 0.5353325642318681, "learning_rate": 4.879570394309921e-06, "loss": 0.5145, "step": 5443 }, { "epoch": 2.134057792047419, "grad_norm": 0.47763771900604635, "learning_rate": 4.879522348907637e-06, "loss": 0.4835, "step": 5444 }, { "epoch": 2.1344529513460113, "grad_norm": 0.44913009790356634, "learning_rate": 4.879474294160035e-06, "loss": 0.4757, "step": 5445 }, { "epoch": 2.1348481106446036, "grad_norm": 0.4597607997906684, "learning_rate": 4.879426230067303e-06, "loss": 0.5005, "step": 5446 }, { "epoch": 2.135243269943196, "grad_norm": 0.4623134190589581, "learning_rate": 4.8793781566296294e-06, "loss": 0.5043, "step": 5447 }, { "epoch": 2.135638429241788, "grad_norm": 0.4506998220430185, "learning_rate": 4.8793300738472025e-06, "loss": 0.475, "step": 5448 }, { "epoch": 2.1360335885403803, "grad_norm": 0.45507178878371285, "learning_rate": 4.879281981720213e-06, "loss": 0.498, "step": 5449 }, { "epoch": 2.1364287478389725, "grad_norm": 0.46575574764050215, "learning_rate": 4.879233880248848e-06, "loss": 0.5143, "step": 5450 }, { "epoch": 2.1368239071375648, "grad_norm": 0.4468946988002143, "learning_rate": 4.879185769433298e-06, "loss": 0.5058, "step": 5451 }, { "epoch": 2.137219066436157, "grad_norm": 0.45705479305728886, "learning_rate": 4.87913764927375e-06, "loss": 0.4844, "step": 5452 }, { "epoch": 2.1376142257347492, "grad_norm": 0.4535460076744396, "learning_rate": 4.8790895197703945e-06, "loss": 0.5187, "step": 5453 }, { "epoch": 2.1380093850333415, "grad_norm": 0.4508238974301958, "learning_rate": 4.879041380923421e-06, "loss": 0.498, "step": 5454 }, { "epoch": 2.1384045443319337, "grad_norm": 0.47611124879857625, "learning_rate": 4.878993232733016e-06, "loss": 0.5015, "step": 5455 }, { "epoch": 2.138799703630526, "grad_norm": 0.47623598181038684, "learning_rate": 4.8789450751993705e-06, "loss": 0.4918, "step": 5456 }, { "epoch": 2.139194862929118, "grad_norm": 0.4463328063702259, "learning_rate": 4.878896908322673e-06, "loss": 0.4949, "step": 5457 }, { "epoch": 2.1395900222277104, "grad_norm": 0.4688848998545471, "learning_rate": 4.878848732103114e-06, "loss": 0.4784, "step": 5458 }, { "epoch": 2.1399851815263027, "grad_norm": 0.44121522041937494, "learning_rate": 4.878800546540881e-06, "loss": 0.4849, "step": 5459 }, { "epoch": 2.140380340824895, "grad_norm": 0.46258456174979196, "learning_rate": 4.878752351636164e-06, "loss": 0.498, "step": 5460 }, { "epoch": 2.140775500123487, "grad_norm": 0.4515779539663671, "learning_rate": 4.878704147389153e-06, "loss": 0.5277, "step": 5461 }, { "epoch": 2.1411706594220794, "grad_norm": 0.44675655713159357, "learning_rate": 4.878655933800036e-06, "loss": 0.4781, "step": 5462 }, { "epoch": 2.1415658187206716, "grad_norm": 0.4787595797738257, "learning_rate": 4.878607710869002e-06, "loss": 0.5159, "step": 5463 }, { "epoch": 2.141960978019264, "grad_norm": 0.46495746367564106, "learning_rate": 4.878559478596242e-06, "loss": 0.5156, "step": 5464 }, { "epoch": 2.142356137317856, "grad_norm": 0.4502923794920052, "learning_rate": 4.8785112369819455e-06, "loss": 0.4891, "step": 5465 }, { "epoch": 2.1427512966164484, "grad_norm": 0.451977669133556, "learning_rate": 4.8784629860263e-06, "loss": 0.5063, "step": 5466 }, { "epoch": 2.1431464559150406, "grad_norm": 0.45073347613474773, "learning_rate": 4.878414725729497e-06, "loss": 0.5037, "step": 5467 }, { "epoch": 2.143541615213633, "grad_norm": 0.4739115032234395, "learning_rate": 4.878366456091724e-06, "loss": 0.4945, "step": 5468 }, { "epoch": 2.143936774512225, "grad_norm": 0.46171831924852413, "learning_rate": 4.8783181771131735e-06, "loss": 0.5097, "step": 5469 }, { "epoch": 2.1443319338108173, "grad_norm": 0.4657774448437735, "learning_rate": 4.878269888794032e-06, "loss": 0.4919, "step": 5470 }, { "epoch": 2.1447270931094096, "grad_norm": 0.47080691090966, "learning_rate": 4.878221591134491e-06, "loss": 0.5119, "step": 5471 }, { "epoch": 2.145122252408002, "grad_norm": 0.4513544487027582, "learning_rate": 4.8781732841347395e-06, "loss": 0.4851, "step": 5472 }, { "epoch": 2.145517411706594, "grad_norm": 0.47195481464231376, "learning_rate": 4.878124967794968e-06, "loss": 0.5161, "step": 5473 }, { "epoch": 2.1459125710051863, "grad_norm": 0.4609154414666813, "learning_rate": 4.878076642115366e-06, "loss": 0.4985, "step": 5474 }, { "epoch": 2.1463077303037785, "grad_norm": 0.46676548499669845, "learning_rate": 4.878028307096122e-06, "loss": 0.4724, "step": 5475 }, { "epoch": 2.1467028896023708, "grad_norm": 0.4611191108433873, "learning_rate": 4.8779799627374265e-06, "loss": 0.4943, "step": 5476 }, { "epoch": 2.147098048900963, "grad_norm": 0.4521869519269894, "learning_rate": 4.877931609039471e-06, "loss": 0.4997, "step": 5477 }, { "epoch": 2.1474932081995552, "grad_norm": 0.505573034174829, "learning_rate": 4.877883246002444e-06, "loss": 0.5042, "step": 5478 }, { "epoch": 2.1478883674981475, "grad_norm": 0.47546280424045617, "learning_rate": 4.877834873626535e-06, "loss": 0.5167, "step": 5479 }, { "epoch": 2.1482835267967397, "grad_norm": 0.4634538760180319, "learning_rate": 4.877786491911935e-06, "loss": 0.4926, "step": 5480 }, { "epoch": 2.148678686095332, "grad_norm": 0.4677531886753567, "learning_rate": 4.877738100858832e-06, "loss": 0.4891, "step": 5481 }, { "epoch": 2.149073845393924, "grad_norm": 0.4690745882384834, "learning_rate": 4.877689700467419e-06, "loss": 0.5207, "step": 5482 }, { "epoch": 2.1494690046925164, "grad_norm": 0.4692422873547366, "learning_rate": 4.8776412907378845e-06, "loss": 0.4979, "step": 5483 }, { "epoch": 2.1498641639911087, "grad_norm": 0.45669923066085627, "learning_rate": 4.877592871670419e-06, "loss": 0.5017, "step": 5484 }, { "epoch": 2.150259323289701, "grad_norm": 0.4647143592549349, "learning_rate": 4.877544443265212e-06, "loss": 0.5213, "step": 5485 }, { "epoch": 2.150654482588293, "grad_norm": 0.4487735037385304, "learning_rate": 4.877496005522454e-06, "loss": 0.4956, "step": 5486 }, { "epoch": 2.151049641886886, "grad_norm": 0.4732217043292161, "learning_rate": 4.877447558442335e-06, "loss": 0.5098, "step": 5487 }, { "epoch": 2.151444801185478, "grad_norm": 0.4868586481311084, "learning_rate": 4.877399102025046e-06, "loss": 0.4968, "step": 5488 }, { "epoch": 2.1518399604840703, "grad_norm": 0.4679716331191469, "learning_rate": 4.877350636270778e-06, "loss": 0.5025, "step": 5489 }, { "epoch": 2.1522351197826626, "grad_norm": 0.455600959345513, "learning_rate": 4.87730216117972e-06, "loss": 0.5006, "step": 5490 }, { "epoch": 2.152630279081255, "grad_norm": 0.4729311871353748, "learning_rate": 4.877253676752062e-06, "loss": 0.4905, "step": 5491 }, { "epoch": 2.153025438379847, "grad_norm": 0.47191264067689354, "learning_rate": 4.877205182987995e-06, "loss": 0.5033, "step": 5492 }, { "epoch": 2.1534205976784393, "grad_norm": 0.4734254855624861, "learning_rate": 4.87715667988771e-06, "loss": 0.5113, "step": 5493 }, { "epoch": 2.1538157569770315, "grad_norm": 0.46092003008011734, "learning_rate": 4.8771081674513965e-06, "loss": 0.4878, "step": 5494 }, { "epoch": 2.1542109162756238, "grad_norm": 0.45908545472439594, "learning_rate": 4.877059645679246e-06, "loss": 0.4934, "step": 5495 }, { "epoch": 2.154606075574216, "grad_norm": 0.4468868294393995, "learning_rate": 4.877011114571449e-06, "loss": 0.4947, "step": 5496 }, { "epoch": 2.1550012348728083, "grad_norm": 0.444489465663762, "learning_rate": 4.876962574128196e-06, "loss": 0.4967, "step": 5497 }, { "epoch": 2.1553963941714005, "grad_norm": 0.4393378102668866, "learning_rate": 4.876914024349676e-06, "loss": 0.4844, "step": 5498 }, { "epoch": 2.1557915534699927, "grad_norm": 0.4640805678204216, "learning_rate": 4.876865465236082e-06, "loss": 0.5285, "step": 5499 }, { "epoch": 2.156186712768585, "grad_norm": 0.5289230739266989, "learning_rate": 4.876816896787603e-06, "loss": 0.4989, "step": 5500 }, { "epoch": 2.156581872067177, "grad_norm": 0.5419539291904234, "learning_rate": 4.876768319004431e-06, "loss": 0.5113, "step": 5501 }, { "epoch": 2.1569770313657695, "grad_norm": 0.4517913697063531, "learning_rate": 4.876719731886757e-06, "loss": 0.4963, "step": 5502 }, { "epoch": 2.1573721906643617, "grad_norm": 0.4504437690654093, "learning_rate": 4.87667113543477e-06, "loss": 0.4958, "step": 5503 }, { "epoch": 2.157767349962954, "grad_norm": 0.4619724381275875, "learning_rate": 4.876622529648663e-06, "loss": 0.4998, "step": 5504 }, { "epoch": 2.158162509261546, "grad_norm": 0.46378800765710737, "learning_rate": 4.876573914528625e-06, "loss": 0.5158, "step": 5505 }, { "epoch": 2.1585576685601384, "grad_norm": 0.45305071404161723, "learning_rate": 4.876525290074848e-06, "loss": 0.4932, "step": 5506 }, { "epoch": 2.1589528278587307, "grad_norm": 0.46744932250978694, "learning_rate": 4.8764766562875235e-06, "loss": 0.5075, "step": 5507 }, { "epoch": 2.159347987157323, "grad_norm": 0.44330667766097176, "learning_rate": 4.87642801316684e-06, "loss": 0.4809, "step": 5508 }, { "epoch": 2.159743146455915, "grad_norm": 0.46354232388964045, "learning_rate": 4.876379360712993e-06, "loss": 0.5029, "step": 5509 }, { "epoch": 2.1601383057545074, "grad_norm": 0.46977655177705924, "learning_rate": 4.876330698926169e-06, "loss": 0.5001, "step": 5510 }, { "epoch": 2.1605334650530996, "grad_norm": 0.45685117233330014, "learning_rate": 4.876282027806561e-06, "loss": 0.5151, "step": 5511 }, { "epoch": 2.160928624351692, "grad_norm": 0.47320943013293004, "learning_rate": 4.87623334735436e-06, "loss": 0.5153, "step": 5512 }, { "epoch": 2.161323783650284, "grad_norm": 0.45887136394693434, "learning_rate": 4.876184657569759e-06, "loss": 0.4955, "step": 5513 }, { "epoch": 2.1617189429488763, "grad_norm": 0.45614045191471664, "learning_rate": 4.876135958452946e-06, "loss": 0.5055, "step": 5514 }, { "epoch": 2.1621141022474686, "grad_norm": 0.4545945601618243, "learning_rate": 4.876087250004114e-06, "loss": 0.5069, "step": 5515 }, { "epoch": 2.162509261546061, "grad_norm": 0.447681420448537, "learning_rate": 4.876038532223454e-06, "loss": 0.4999, "step": 5516 }, { "epoch": 2.162904420844653, "grad_norm": 0.47058053249246107, "learning_rate": 4.875989805111158e-06, "loss": 0.4918, "step": 5517 }, { "epoch": 2.1632995801432453, "grad_norm": 0.7930113043114789, "learning_rate": 4.875941068667417e-06, "loss": 0.4778, "step": 5518 }, { "epoch": 2.1636947394418375, "grad_norm": 0.445244286794454, "learning_rate": 4.875892322892421e-06, "loss": 0.4965, "step": 5519 }, { "epoch": 2.16408989874043, "grad_norm": 0.4577281165835478, "learning_rate": 4.875843567786364e-06, "loss": 0.4979, "step": 5520 }, { "epoch": 2.164485058039022, "grad_norm": 0.464223962296654, "learning_rate": 4.8757948033494365e-06, "loss": 0.4995, "step": 5521 }, { "epoch": 2.1648802173376143, "grad_norm": 0.44936416883238395, "learning_rate": 4.875746029581828e-06, "loss": 0.5189, "step": 5522 }, { "epoch": 2.1652753766362065, "grad_norm": 0.45656574932816285, "learning_rate": 4.875697246483733e-06, "loss": 0.4947, "step": 5523 }, { "epoch": 2.1656705359347987, "grad_norm": 0.45384101923535536, "learning_rate": 4.875648454055341e-06, "loss": 0.5006, "step": 5524 }, { "epoch": 2.166065695233391, "grad_norm": 0.5008396943055093, "learning_rate": 4.875599652296845e-06, "loss": 0.4967, "step": 5525 }, { "epoch": 2.1664608545319832, "grad_norm": 0.45982000765099335, "learning_rate": 4.8755508412084364e-06, "loss": 0.4969, "step": 5526 }, { "epoch": 2.1668560138305755, "grad_norm": 0.45510607157877675, "learning_rate": 4.875502020790306e-06, "loss": 0.5089, "step": 5527 }, { "epoch": 2.1672511731291677, "grad_norm": 0.45718988116180564, "learning_rate": 4.875453191042646e-06, "loss": 0.5141, "step": 5528 }, { "epoch": 2.16764633242776, "grad_norm": 0.46235453264969417, "learning_rate": 4.875404351965648e-06, "loss": 0.4939, "step": 5529 }, { "epoch": 2.168041491726352, "grad_norm": 0.45617899670326717, "learning_rate": 4.875355503559506e-06, "loss": 0.4993, "step": 5530 }, { "epoch": 2.1684366510249444, "grad_norm": 0.47662452130332766, "learning_rate": 4.875306645824408e-06, "loss": 0.5141, "step": 5531 }, { "epoch": 2.1688318103235367, "grad_norm": 0.4622948877225635, "learning_rate": 4.875257778760549e-06, "loss": 0.515, "step": 5532 }, { "epoch": 2.169226969622129, "grad_norm": 0.4842991640779124, "learning_rate": 4.8752089023681195e-06, "loss": 0.5125, "step": 5533 }, { "epoch": 2.169622128920721, "grad_norm": 0.48514678815733153, "learning_rate": 4.875160016647311e-06, "loss": 0.5048, "step": 5534 }, { "epoch": 2.1700172882193134, "grad_norm": 0.45733286495961495, "learning_rate": 4.875111121598317e-06, "loss": 0.5015, "step": 5535 }, { "epoch": 2.1704124475179056, "grad_norm": 0.45364214188982693, "learning_rate": 4.875062217221329e-06, "loss": 0.5115, "step": 5536 }, { "epoch": 2.170807606816498, "grad_norm": 0.4421312637696224, "learning_rate": 4.875013303516538e-06, "loss": 0.4895, "step": 5537 }, { "epoch": 2.17120276611509, "grad_norm": 0.4908166354246267, "learning_rate": 4.874964380484138e-06, "loss": 0.4812, "step": 5538 }, { "epoch": 2.1715979254136824, "grad_norm": 0.4616856713589806, "learning_rate": 4.874915448124319e-06, "loss": 0.4974, "step": 5539 }, { "epoch": 2.1719930847122746, "grad_norm": 0.4646457406849082, "learning_rate": 4.874866506437275e-06, "loss": 0.5034, "step": 5540 }, { "epoch": 2.172388244010867, "grad_norm": 0.45047160391577984, "learning_rate": 4.874817555423196e-06, "loss": 0.4877, "step": 5541 }, { "epoch": 2.172783403309459, "grad_norm": 0.4500708056755459, "learning_rate": 4.874768595082277e-06, "loss": 0.4918, "step": 5542 }, { "epoch": 2.1731785626080513, "grad_norm": 0.47456977571950365, "learning_rate": 4.874719625414709e-06, "loss": 0.4933, "step": 5543 }, { "epoch": 2.1735737219066436, "grad_norm": 0.46200205295141555, "learning_rate": 4.874670646420684e-06, "loss": 0.5142, "step": 5544 }, { "epoch": 2.173968881205236, "grad_norm": 0.4734243176175764, "learning_rate": 4.874621658100395e-06, "loss": 0.5208, "step": 5545 }, { "epoch": 2.174364040503828, "grad_norm": 0.44815049758957626, "learning_rate": 4.874572660454034e-06, "loss": 0.4968, "step": 5546 }, { "epoch": 2.1747591998024203, "grad_norm": 0.4614058105293498, "learning_rate": 4.874523653481793e-06, "loss": 0.5072, "step": 5547 }, { "epoch": 2.1751543591010125, "grad_norm": 0.44451885664415813, "learning_rate": 4.874474637183866e-06, "loss": 0.5043, "step": 5548 }, { "epoch": 2.1755495183996048, "grad_norm": 0.4591881542386408, "learning_rate": 4.874425611560444e-06, "loss": 0.4854, "step": 5549 }, { "epoch": 2.175944677698197, "grad_norm": 0.47533595495611164, "learning_rate": 4.874376576611719e-06, "loss": 0.5194, "step": 5550 }, { "epoch": 2.1763398369967892, "grad_norm": 0.46610351057678207, "learning_rate": 4.874327532337886e-06, "loss": 0.5002, "step": 5551 }, { "epoch": 2.1767349962953815, "grad_norm": 0.4633556164186955, "learning_rate": 4.8742784787391355e-06, "loss": 0.5062, "step": 5552 }, { "epoch": 2.1771301555939737, "grad_norm": 0.4676833644787226, "learning_rate": 4.874229415815661e-06, "loss": 0.5077, "step": 5553 }, { "epoch": 2.177525314892566, "grad_norm": 0.4861231482509832, "learning_rate": 4.874180343567655e-06, "loss": 0.5149, "step": 5554 }, { "epoch": 2.177920474191158, "grad_norm": 0.47393974173121844, "learning_rate": 4.8741312619953106e-06, "loss": 0.4931, "step": 5555 }, { "epoch": 2.1783156334897504, "grad_norm": 0.46438049892681393, "learning_rate": 4.87408217109882e-06, "loss": 0.4922, "step": 5556 }, { "epoch": 2.1787107927883427, "grad_norm": 0.480552674800388, "learning_rate": 4.874033070878377e-06, "loss": 0.5093, "step": 5557 }, { "epoch": 2.179105952086935, "grad_norm": 0.49032805311209543, "learning_rate": 4.873983961334172e-06, "loss": 0.5129, "step": 5558 }, { "epoch": 2.179501111385527, "grad_norm": 0.46500235243264837, "learning_rate": 4.873934842466401e-06, "loss": 0.4974, "step": 5559 }, { "epoch": 2.1798962706841194, "grad_norm": 0.4601347793294418, "learning_rate": 4.873885714275255e-06, "loss": 0.5178, "step": 5560 }, { "epoch": 2.1802914299827116, "grad_norm": 0.4601391528297951, "learning_rate": 4.873836576760927e-06, "loss": 0.501, "step": 5561 }, { "epoch": 2.180686589281304, "grad_norm": 0.5067694421292206, "learning_rate": 4.873787429923611e-06, "loss": 0.484, "step": 5562 }, { "epoch": 2.181081748579896, "grad_norm": 0.4663242902766598, "learning_rate": 4.8737382737635e-06, "loss": 0.5084, "step": 5563 }, { "epoch": 2.1814769078784884, "grad_norm": 0.4553398223962099, "learning_rate": 4.873689108280786e-06, "loss": 0.4965, "step": 5564 }, { "epoch": 2.1818720671770806, "grad_norm": 0.529541497892225, "learning_rate": 4.873639933475662e-06, "loss": 0.4955, "step": 5565 }, { "epoch": 2.182267226475673, "grad_norm": 0.4713945957848912, "learning_rate": 4.8735907493483216e-06, "loss": 0.5047, "step": 5566 }, { "epoch": 2.182662385774265, "grad_norm": 0.4572708785437191, "learning_rate": 4.873541555898959e-06, "loss": 0.4803, "step": 5567 }, { "epoch": 2.1830575450728573, "grad_norm": 0.45611106467714163, "learning_rate": 4.873492353127765e-06, "loss": 0.4936, "step": 5568 }, { "epoch": 2.1834527043714496, "grad_norm": 0.4595792613339848, "learning_rate": 4.873443141034936e-06, "loss": 0.4874, "step": 5569 }, { "epoch": 2.183847863670042, "grad_norm": 0.4640866973139749, "learning_rate": 4.873393919620663e-06, "loss": 0.4924, "step": 5570 }, { "epoch": 2.184243022968634, "grad_norm": 0.46111098813030116, "learning_rate": 4.873344688885139e-06, "loss": 0.5057, "step": 5571 }, { "epoch": 2.1846381822672263, "grad_norm": 0.46999556887546373, "learning_rate": 4.873295448828559e-06, "loss": 0.5093, "step": 5572 }, { "epoch": 2.1850333415658185, "grad_norm": 0.4718054070690503, "learning_rate": 4.873246199451116e-06, "loss": 0.4879, "step": 5573 }, { "epoch": 2.1854285008644108, "grad_norm": 0.4710239940304868, "learning_rate": 4.873196940753002e-06, "loss": 0.4993, "step": 5574 }, { "epoch": 2.1858236601630034, "grad_norm": 0.49030453082387426, "learning_rate": 4.873147672734412e-06, "loss": 0.5107, "step": 5575 }, { "epoch": 2.1862188194615957, "grad_norm": 0.46406412622436066, "learning_rate": 4.873098395395539e-06, "loss": 0.5081, "step": 5576 }, { "epoch": 2.186613978760188, "grad_norm": 0.4628191661905533, "learning_rate": 4.873049108736577e-06, "loss": 0.5031, "step": 5577 }, { "epoch": 2.18700913805878, "grad_norm": 0.44331929710296053, "learning_rate": 4.872999812757718e-06, "loss": 0.5046, "step": 5578 }, { "epoch": 2.1874042973573724, "grad_norm": 0.446907560016826, "learning_rate": 4.872950507459158e-06, "loss": 0.5187, "step": 5579 }, { "epoch": 2.1877994566559646, "grad_norm": 0.45847831566326597, "learning_rate": 4.872901192841089e-06, "loss": 0.4828, "step": 5580 }, { "epoch": 2.188194615954557, "grad_norm": 0.4487958830148146, "learning_rate": 4.872851868903704e-06, "loss": 0.5003, "step": 5581 }, { "epoch": 2.188589775253149, "grad_norm": 0.4535135827385076, "learning_rate": 4.872802535647199e-06, "loss": 0.5003, "step": 5582 }, { "epoch": 2.1889849345517414, "grad_norm": 0.4375345693693656, "learning_rate": 4.872753193071766e-06, "loss": 0.4881, "step": 5583 }, { "epoch": 2.1893800938503336, "grad_norm": 0.4641343320330823, "learning_rate": 4.872703841177599e-06, "loss": 0.4941, "step": 5584 }, { "epoch": 2.189775253148926, "grad_norm": 0.45644605766947677, "learning_rate": 4.872654479964892e-06, "loss": 0.5066, "step": 5585 }, { "epoch": 2.190170412447518, "grad_norm": 0.46839020316567503, "learning_rate": 4.87260510943384e-06, "loss": 0.5116, "step": 5586 }, { "epoch": 2.1905655717461103, "grad_norm": 0.4731473500021345, "learning_rate": 4.872555729584635e-06, "loss": 0.5096, "step": 5587 }, { "epoch": 2.1909607310447026, "grad_norm": 0.4433081776671587, "learning_rate": 4.872506340417471e-06, "loss": 0.4933, "step": 5588 }, { "epoch": 2.191355890343295, "grad_norm": 0.48768292944297426, "learning_rate": 4.872456941932544e-06, "loss": 0.507, "step": 5589 }, { "epoch": 2.191751049641887, "grad_norm": 0.4436677280755508, "learning_rate": 4.872407534130047e-06, "loss": 0.5024, "step": 5590 }, { "epoch": 2.1921462089404793, "grad_norm": 0.4536188851299345, "learning_rate": 4.8723581170101734e-06, "loss": 0.4982, "step": 5591 }, { "epoch": 2.1925413682390715, "grad_norm": 0.44412822568870913, "learning_rate": 4.872308690573118e-06, "loss": 0.4957, "step": 5592 }, { "epoch": 2.1929365275376638, "grad_norm": 0.6642864573262021, "learning_rate": 4.872259254819073e-06, "loss": 0.5231, "step": 5593 }, { "epoch": 2.193331686836256, "grad_norm": 0.46456261247267744, "learning_rate": 4.872209809748236e-06, "loss": 0.5085, "step": 5594 }, { "epoch": 2.1937268461348483, "grad_norm": 0.45989758175778567, "learning_rate": 4.872160355360798e-06, "loss": 0.4912, "step": 5595 }, { "epoch": 2.1941220054334405, "grad_norm": 0.47722595433382775, "learning_rate": 4.8721108916569555e-06, "loss": 0.5026, "step": 5596 }, { "epoch": 2.1945171647320327, "grad_norm": 0.4588929552955127, "learning_rate": 4.872061418636902e-06, "loss": 0.4941, "step": 5597 }, { "epoch": 2.194912324030625, "grad_norm": 0.43968483295091, "learning_rate": 4.872011936300831e-06, "loss": 0.5046, "step": 5598 }, { "epoch": 2.195307483329217, "grad_norm": 0.44462587632009504, "learning_rate": 4.871962444648938e-06, "loss": 0.4819, "step": 5599 }, { "epoch": 2.1957026426278095, "grad_norm": 0.45683508501671893, "learning_rate": 4.871912943681416e-06, "loss": 0.504, "step": 5600 }, { "epoch": 2.1960978019264017, "grad_norm": 0.4446678291894044, "learning_rate": 4.87186343339846e-06, "loss": 0.4973, "step": 5601 }, { "epoch": 2.196492961224994, "grad_norm": 0.4736599203705845, "learning_rate": 4.871813913800266e-06, "loss": 0.5027, "step": 5602 }, { "epoch": 2.196888120523586, "grad_norm": 0.45846116866212805, "learning_rate": 4.8717643848870265e-06, "loss": 0.5032, "step": 5603 }, { "epoch": 2.1972832798221784, "grad_norm": 0.5481111316900571, "learning_rate": 4.871714846658937e-06, "loss": 0.5066, "step": 5604 }, { "epoch": 2.1976784391207707, "grad_norm": 0.45324845161744187, "learning_rate": 4.871665299116192e-06, "loss": 0.496, "step": 5605 }, { "epoch": 2.198073598419363, "grad_norm": 0.46469652330820316, "learning_rate": 4.871615742258985e-06, "loss": 0.5095, "step": 5606 }, { "epoch": 2.198468757717955, "grad_norm": 0.5196428348937057, "learning_rate": 4.871566176087512e-06, "loss": 0.4955, "step": 5607 }, { "epoch": 2.1988639170165474, "grad_norm": 0.47622508477020703, "learning_rate": 4.871516600601968e-06, "loss": 0.5222, "step": 5608 }, { "epoch": 2.1992590763151396, "grad_norm": 0.4644820525166222, "learning_rate": 4.871467015802545e-06, "loss": 0.5046, "step": 5609 }, { "epoch": 2.199654235613732, "grad_norm": 0.4563914565316861, "learning_rate": 4.871417421689442e-06, "loss": 0.502, "step": 5610 }, { "epoch": 2.200049394912324, "grad_norm": 0.4521533603619674, "learning_rate": 4.871367818262849e-06, "loss": 0.4989, "step": 5611 }, { "epoch": 2.2004445542109163, "grad_norm": 0.4679852547890778, "learning_rate": 4.871318205522965e-06, "loss": 0.5271, "step": 5612 }, { "epoch": 2.2008397135095086, "grad_norm": 0.44239899933907284, "learning_rate": 4.871268583469982e-06, "loss": 0.4922, "step": 5613 }, { "epoch": 2.201234872808101, "grad_norm": 0.46044033765149545, "learning_rate": 4.8712189521040955e-06, "loss": 0.505, "step": 5614 }, { "epoch": 2.201630032106693, "grad_norm": 0.4509076270729074, "learning_rate": 4.871169311425501e-06, "loss": 0.4841, "step": 5615 }, { "epoch": 2.2020251914052853, "grad_norm": 0.46744577639760204, "learning_rate": 4.871119661434395e-06, "loss": 0.4923, "step": 5616 }, { "epoch": 2.2024203507038775, "grad_norm": 0.46269441156741004, "learning_rate": 4.871070002130968e-06, "loss": 0.5057, "step": 5617 }, { "epoch": 2.20281551000247, "grad_norm": 0.4615270330282239, "learning_rate": 4.871020333515421e-06, "loss": 0.4931, "step": 5618 }, { "epoch": 2.203210669301062, "grad_norm": 0.45781453501465114, "learning_rate": 4.870970655587943e-06, "loss": 0.4993, "step": 5619 }, { "epoch": 2.2036058285996543, "grad_norm": 0.46448710878947935, "learning_rate": 4.870920968348734e-06, "loss": 0.513, "step": 5620 }, { "epoch": 2.2040009878982465, "grad_norm": 0.45923364426870794, "learning_rate": 4.870871271797986e-06, "loss": 0.4827, "step": 5621 }, { "epoch": 2.2043961471968387, "grad_norm": 0.4663678964949085, "learning_rate": 4.870821565935896e-06, "loss": 0.487, "step": 5622 }, { "epoch": 2.204791306495431, "grad_norm": 0.4652080789551877, "learning_rate": 4.870771850762658e-06, "loss": 0.5101, "step": 5623 }, { "epoch": 2.205186465794023, "grad_norm": 0.7697658307753772, "learning_rate": 4.870722126278468e-06, "loss": 0.4702, "step": 5624 }, { "epoch": 2.2055816250926155, "grad_norm": 0.46255357306164907, "learning_rate": 4.870672392483521e-06, "loss": 0.5035, "step": 5625 }, { "epoch": 2.2059767843912077, "grad_norm": 0.49091601504343524, "learning_rate": 4.870622649378012e-06, "loss": 0.5232, "step": 5626 }, { "epoch": 2.2063719436898, "grad_norm": 0.46764422378827775, "learning_rate": 4.870572896962138e-06, "loss": 0.4822, "step": 5627 }, { "epoch": 2.206767102988392, "grad_norm": 0.4580646300847785, "learning_rate": 4.870523135236092e-06, "loss": 0.516, "step": 5628 }, { "epoch": 2.2071622622869844, "grad_norm": 0.4716946592361151, "learning_rate": 4.8704733642000714e-06, "loss": 0.5058, "step": 5629 }, { "epoch": 2.2075574215855767, "grad_norm": 0.4610497435658121, "learning_rate": 4.8704235838542705e-06, "loss": 0.5128, "step": 5630 }, { "epoch": 2.207952580884169, "grad_norm": 0.46580653784035464, "learning_rate": 4.870373794198885e-06, "loss": 0.5168, "step": 5631 }, { "epoch": 2.208347740182761, "grad_norm": 0.45033186180479673, "learning_rate": 4.870323995234109e-06, "loss": 0.4908, "step": 5632 }, { "epoch": 2.2087428994813534, "grad_norm": 0.4623037931279241, "learning_rate": 4.870274186960142e-06, "loss": 0.5007, "step": 5633 }, { "epoch": 2.2091380587799456, "grad_norm": 0.4543745068546596, "learning_rate": 4.870224369377176e-06, "loss": 0.4904, "step": 5634 }, { "epoch": 2.209533218078538, "grad_norm": 0.4525710119538143, "learning_rate": 4.87017454248541e-06, "loss": 0.4789, "step": 5635 }, { "epoch": 2.20992837737713, "grad_norm": 0.475347791482502, "learning_rate": 4.870124706285036e-06, "loss": 0.5116, "step": 5636 }, { "epoch": 2.2103235366757223, "grad_norm": 0.4668581607053355, "learning_rate": 4.8700748607762515e-06, "loss": 0.4948, "step": 5637 }, { "epoch": 2.2107186959743146, "grad_norm": 0.4562173473097316, "learning_rate": 4.870025005959252e-06, "loss": 0.5139, "step": 5638 }, { "epoch": 2.211113855272907, "grad_norm": 0.4638745581699392, "learning_rate": 4.869975141834234e-06, "loss": 0.5042, "step": 5639 }, { "epoch": 2.211509014571499, "grad_norm": 0.45690466092589344, "learning_rate": 4.869925268401392e-06, "loss": 0.5122, "step": 5640 }, { "epoch": 2.2119041738700913, "grad_norm": 0.46129161407797814, "learning_rate": 4.869875385660923e-06, "loss": 0.5034, "step": 5641 }, { "epoch": 2.2122993331686835, "grad_norm": 0.45249931427852574, "learning_rate": 4.869825493613023e-06, "loss": 0.4944, "step": 5642 }, { "epoch": 2.212694492467276, "grad_norm": 0.4717240333098036, "learning_rate": 4.869775592257887e-06, "loss": 0.5079, "step": 5643 }, { "epoch": 2.213089651765868, "grad_norm": 0.45646269978521453, "learning_rate": 4.869725681595712e-06, "loss": 0.5086, "step": 5644 }, { "epoch": 2.2134848110644603, "grad_norm": 0.4660158700586118, "learning_rate": 4.869675761626693e-06, "loss": 0.5024, "step": 5645 }, { "epoch": 2.2138799703630525, "grad_norm": 0.46107902427465175, "learning_rate": 4.869625832351026e-06, "loss": 0.4842, "step": 5646 }, { "epoch": 2.2142751296616447, "grad_norm": 0.4610113532446741, "learning_rate": 4.869575893768909e-06, "loss": 0.5025, "step": 5647 }, { "epoch": 2.214670288960237, "grad_norm": 0.46330124836282083, "learning_rate": 4.869525945880536e-06, "loss": 0.5011, "step": 5648 }, { "epoch": 2.2150654482588292, "grad_norm": 0.44825650922902094, "learning_rate": 4.869475988686105e-06, "loss": 0.5132, "step": 5649 }, { "epoch": 2.2154606075574215, "grad_norm": 0.438523700300251, "learning_rate": 4.8694260221858095e-06, "loss": 0.4888, "step": 5650 }, { "epoch": 2.2158557668560137, "grad_norm": 0.446477475948747, "learning_rate": 4.869376046379848e-06, "loss": 0.5027, "step": 5651 }, { "epoch": 2.216250926154606, "grad_norm": 0.47359530183611437, "learning_rate": 4.869326061268416e-06, "loss": 0.5057, "step": 5652 }, { "epoch": 2.216646085453198, "grad_norm": 0.45346141292171366, "learning_rate": 4.869276066851711e-06, "loss": 0.5238, "step": 5653 }, { "epoch": 2.2170412447517904, "grad_norm": 0.4697574903847061, "learning_rate": 4.869226063129926e-06, "loss": 0.5157, "step": 5654 }, { "epoch": 2.2174364040503827, "grad_norm": 0.4992277490563748, "learning_rate": 4.869176050103262e-06, "loss": 0.5312, "step": 5655 }, { "epoch": 2.217831563348975, "grad_norm": 0.4463421488924404, "learning_rate": 4.869126027771912e-06, "loss": 0.4857, "step": 5656 }, { "epoch": 2.218226722647567, "grad_norm": 0.445536527726352, "learning_rate": 4.8690759961360736e-06, "loss": 0.511, "step": 5657 }, { "epoch": 2.2186218819461594, "grad_norm": 0.4608081468830687, "learning_rate": 4.869025955195944e-06, "loss": 0.4942, "step": 5658 }, { "epoch": 2.2190170412447516, "grad_norm": 0.471512152814528, "learning_rate": 4.868975904951718e-06, "loss": 0.5061, "step": 5659 }, { "epoch": 2.219412200543344, "grad_norm": 0.4550754501913592, "learning_rate": 4.868925845403594e-06, "loss": 0.4992, "step": 5660 }, { "epoch": 2.219807359841936, "grad_norm": 0.456877328887177, "learning_rate": 4.868875776551767e-06, "loss": 0.5084, "step": 5661 }, { "epoch": 2.2202025191405284, "grad_norm": 0.44849181279718897, "learning_rate": 4.868825698396435e-06, "loss": 0.4798, "step": 5662 }, { "epoch": 2.2205976784391206, "grad_norm": 0.4478184080732161, "learning_rate": 4.8687756109377935e-06, "loss": 0.4878, "step": 5663 }, { "epoch": 2.220992837737713, "grad_norm": 0.4558088044427646, "learning_rate": 4.86872551417604e-06, "loss": 0.5042, "step": 5664 }, { "epoch": 2.221387997036305, "grad_norm": 0.45237458187762586, "learning_rate": 4.8686754081113715e-06, "loss": 0.4871, "step": 5665 }, { "epoch": 2.2217831563348973, "grad_norm": 0.4344840693507297, "learning_rate": 4.868625292743985e-06, "loss": 0.4937, "step": 5666 }, { "epoch": 2.2221783156334896, "grad_norm": 0.45680335357644025, "learning_rate": 4.868575168074075e-06, "loss": 0.5051, "step": 5667 }, { "epoch": 2.222573474932082, "grad_norm": 0.4786605604468794, "learning_rate": 4.8685250341018405e-06, "loss": 0.5145, "step": 5668 }, { "epoch": 2.222968634230674, "grad_norm": 0.45133256657617904, "learning_rate": 4.868474890827479e-06, "loss": 0.4852, "step": 5669 }, { "epoch": 2.2233637935292663, "grad_norm": 0.45874816511999167, "learning_rate": 4.8684247382511855e-06, "loss": 0.506, "step": 5670 }, { "epoch": 2.2237589528278585, "grad_norm": 0.47105938523376145, "learning_rate": 4.868374576373157e-06, "loss": 0.5193, "step": 5671 }, { "epoch": 2.2241541121264508, "grad_norm": 0.4499761946748633, "learning_rate": 4.868324405193593e-06, "loss": 0.4957, "step": 5672 }, { "epoch": 2.224549271425043, "grad_norm": 0.44396274883575054, "learning_rate": 4.868274224712688e-06, "loss": 0.4968, "step": 5673 }, { "epoch": 2.2249444307236352, "grad_norm": 0.45650883917884044, "learning_rate": 4.86822403493064e-06, "loss": 0.4963, "step": 5674 }, { "epoch": 2.2253395900222275, "grad_norm": 0.45717604881834667, "learning_rate": 4.868173835847646e-06, "loss": 0.5214, "step": 5675 }, { "epoch": 2.22573474932082, "grad_norm": 0.4587246120001337, "learning_rate": 4.8681236274639024e-06, "loss": 0.4964, "step": 5676 }, { "epoch": 2.2261299086194124, "grad_norm": 0.46577038706127155, "learning_rate": 4.868073409779609e-06, "loss": 0.501, "step": 5677 }, { "epoch": 2.2265250679180046, "grad_norm": 0.45292880378261396, "learning_rate": 4.86802318279496e-06, "loss": 0.5076, "step": 5678 }, { "epoch": 2.226920227216597, "grad_norm": 0.46823804273718916, "learning_rate": 4.867972946510154e-06, "loss": 0.4996, "step": 5679 }, { "epoch": 2.227315386515189, "grad_norm": 0.4651990124613078, "learning_rate": 4.867922700925388e-06, "loss": 0.4958, "step": 5680 }, { "epoch": 2.2277105458137814, "grad_norm": 0.48024213114162967, "learning_rate": 4.86787244604086e-06, "loss": 0.5189, "step": 5681 }, { "epoch": 2.2281057051123736, "grad_norm": 0.5081055107145902, "learning_rate": 4.867822181856766e-06, "loss": 0.4902, "step": 5682 }, { "epoch": 2.228500864410966, "grad_norm": 0.4430423080870507, "learning_rate": 4.867771908373306e-06, "loss": 0.5037, "step": 5683 }, { "epoch": 2.228896023709558, "grad_norm": 0.46192941764536816, "learning_rate": 4.867721625590674e-06, "loss": 0.5109, "step": 5684 }, { "epoch": 2.2292911830081503, "grad_norm": 0.47441393038250607, "learning_rate": 4.8676713335090694e-06, "loss": 0.4973, "step": 5685 }, { "epoch": 2.2296863423067426, "grad_norm": 0.4466027577701095, "learning_rate": 4.867621032128691e-06, "loss": 0.4853, "step": 5686 }, { "epoch": 2.230081501605335, "grad_norm": 0.46435968422699964, "learning_rate": 4.867570721449734e-06, "loss": 0.5097, "step": 5687 }, { "epoch": 2.230476660903927, "grad_norm": 0.45475109267398633, "learning_rate": 4.867520401472396e-06, "loss": 0.4937, "step": 5688 }, { "epoch": 2.2308718202025193, "grad_norm": 0.46084767398038173, "learning_rate": 4.867470072196876e-06, "loss": 0.4913, "step": 5689 }, { "epoch": 2.2312669795011115, "grad_norm": 0.45201456818615127, "learning_rate": 4.867419733623372e-06, "loss": 0.494, "step": 5690 }, { "epoch": 2.2316621387997038, "grad_norm": 0.4669002434804088, "learning_rate": 4.86736938575208e-06, "loss": 0.5061, "step": 5691 }, { "epoch": 2.232057298098296, "grad_norm": 0.5274175443355472, "learning_rate": 4.867319028583199e-06, "loss": 0.4944, "step": 5692 }, { "epoch": 2.2324524573968882, "grad_norm": 0.45829216977076004, "learning_rate": 4.867268662116926e-06, "loss": 0.5064, "step": 5693 }, { "epoch": 2.2328476166954805, "grad_norm": 0.45758070298208187, "learning_rate": 4.86721828635346e-06, "loss": 0.49, "step": 5694 }, { "epoch": 2.2332427759940727, "grad_norm": 0.47928993400834125, "learning_rate": 4.867167901292997e-06, "loss": 0.5075, "step": 5695 }, { "epoch": 2.233637935292665, "grad_norm": 0.46840348523008235, "learning_rate": 4.867117506935737e-06, "loss": 0.517, "step": 5696 }, { "epoch": 2.234033094591257, "grad_norm": 0.48509443499209903, "learning_rate": 4.867067103281876e-06, "loss": 0.5159, "step": 5697 }, { "epoch": 2.2344282538898494, "grad_norm": 0.5618447420828913, "learning_rate": 4.867016690331613e-06, "loss": 0.498, "step": 5698 }, { "epoch": 2.2348234131884417, "grad_norm": 0.46216383707180914, "learning_rate": 4.866966268085146e-06, "loss": 0.5139, "step": 5699 }, { "epoch": 2.235218572487034, "grad_norm": 0.45344618660738734, "learning_rate": 4.866915836542672e-06, "loss": 0.4922, "step": 5700 }, { "epoch": 2.235613731785626, "grad_norm": 0.4679880087576633, "learning_rate": 4.866865395704391e-06, "loss": 0.4877, "step": 5701 }, { "epoch": 2.2360088910842184, "grad_norm": 0.4683019810711741, "learning_rate": 4.8668149455705e-06, "loss": 0.4969, "step": 5702 }, { "epoch": 2.2364040503828106, "grad_norm": 0.44030516087498117, "learning_rate": 4.866764486141195e-06, "loss": 0.5104, "step": 5703 }, { "epoch": 2.236799209681403, "grad_norm": 0.4568263053340173, "learning_rate": 4.866714017416678e-06, "loss": 0.4835, "step": 5704 }, { "epoch": 2.237194368979995, "grad_norm": 0.4855558978372768, "learning_rate": 4.866663539397145e-06, "loss": 0.5128, "step": 5705 }, { "epoch": 2.2375895282785874, "grad_norm": 0.4698552204107376, "learning_rate": 4.866613052082795e-06, "loss": 0.4989, "step": 5706 }, { "epoch": 2.2379846875771796, "grad_norm": 0.4573675129961296, "learning_rate": 4.866562555473826e-06, "loss": 0.5163, "step": 5707 }, { "epoch": 2.238379846875772, "grad_norm": 0.4951264964599398, "learning_rate": 4.866512049570437e-06, "loss": 0.5161, "step": 5708 }, { "epoch": 2.238775006174364, "grad_norm": 0.4687182481039132, "learning_rate": 4.866461534372825e-06, "loss": 0.5193, "step": 5709 }, { "epoch": 2.2391701654729563, "grad_norm": 0.4524062924624281, "learning_rate": 4.866411009881189e-06, "loss": 0.5038, "step": 5710 }, { "epoch": 2.2395653247715486, "grad_norm": 0.44991629928323695, "learning_rate": 4.866360476095727e-06, "loss": 0.4756, "step": 5711 }, { "epoch": 2.239960484070141, "grad_norm": 0.45306535987909613, "learning_rate": 4.866309933016639e-06, "loss": 0.5075, "step": 5712 }, { "epoch": 2.240355643368733, "grad_norm": 0.4582923597497876, "learning_rate": 4.866259380644122e-06, "loss": 0.4876, "step": 5713 }, { "epoch": 2.2407508026673253, "grad_norm": 0.4756588236793085, "learning_rate": 4.866208818978375e-06, "loss": 0.5107, "step": 5714 }, { "epoch": 2.2411459619659175, "grad_norm": 0.4463905269080376, "learning_rate": 4.866158248019597e-06, "loss": 0.4959, "step": 5715 }, { "epoch": 2.2415411212645098, "grad_norm": 0.46283025201138467, "learning_rate": 4.866107667767986e-06, "loss": 0.5118, "step": 5716 }, { "epoch": 2.241936280563102, "grad_norm": 0.45145005862470505, "learning_rate": 4.866057078223741e-06, "loss": 0.5036, "step": 5717 }, { "epoch": 2.2423314398616943, "grad_norm": 0.4496155378592595, "learning_rate": 4.86600647938706e-06, "loss": 0.4873, "step": 5718 }, { "epoch": 2.2427265991602865, "grad_norm": 0.46541226899418187, "learning_rate": 4.865955871258142e-06, "loss": 0.4932, "step": 5719 }, { "epoch": 2.2431217584588787, "grad_norm": 0.46041587527729605, "learning_rate": 4.865905253837187e-06, "loss": 0.4907, "step": 5720 }, { "epoch": 2.243516917757471, "grad_norm": 0.45128087864299693, "learning_rate": 4.865854627124392e-06, "loss": 0.5083, "step": 5721 }, { "epoch": 2.243912077056063, "grad_norm": 0.4597446068090033, "learning_rate": 4.8658039911199575e-06, "loss": 0.4876, "step": 5722 }, { "epoch": 2.2443072363546555, "grad_norm": 0.4531642673031476, "learning_rate": 4.8657533458240814e-06, "loss": 0.5167, "step": 5723 }, { "epoch": 2.2447023956532477, "grad_norm": 0.4436869230047204, "learning_rate": 4.865702691236962e-06, "loss": 0.4931, "step": 5724 }, { "epoch": 2.24509755495184, "grad_norm": 0.4651798857195074, "learning_rate": 4.865652027358799e-06, "loss": 0.5097, "step": 5725 }, { "epoch": 2.245492714250432, "grad_norm": 0.45772796873736676, "learning_rate": 4.865601354189791e-06, "loss": 0.4916, "step": 5726 }, { "epoch": 2.2458878735490244, "grad_norm": 0.46051001258089186, "learning_rate": 4.865550671730139e-06, "loss": 0.505, "step": 5727 }, { "epoch": 2.2462830328476167, "grad_norm": 0.46185516258558523, "learning_rate": 4.8654999799800394e-06, "loss": 0.489, "step": 5728 }, { "epoch": 2.246678192146209, "grad_norm": 0.47864882828994293, "learning_rate": 4.865449278939693e-06, "loss": 0.528, "step": 5729 }, { "epoch": 2.247073351444801, "grad_norm": 0.49285960618658853, "learning_rate": 4.865398568609297e-06, "loss": 0.5121, "step": 5730 }, { "epoch": 2.2474685107433934, "grad_norm": 0.470443068321539, "learning_rate": 4.865347848989052e-06, "loss": 0.5095, "step": 5731 }, { "epoch": 2.2478636700419856, "grad_norm": 0.4560396366626315, "learning_rate": 4.865297120079157e-06, "loss": 0.4911, "step": 5732 }, { "epoch": 2.248258829340578, "grad_norm": 0.4510249709001742, "learning_rate": 4.8652463818798115e-06, "loss": 0.491, "step": 5733 }, { "epoch": 2.24865398863917, "grad_norm": 0.4646681611937035, "learning_rate": 4.8651956343912145e-06, "loss": 0.4974, "step": 5734 }, { "epoch": 2.2490491479377623, "grad_norm": 0.45710106589102445, "learning_rate": 4.8651448776135655e-06, "loss": 0.4957, "step": 5735 }, { "epoch": 2.2494443072363546, "grad_norm": 0.4432218553818469, "learning_rate": 4.8650941115470636e-06, "loss": 0.5001, "step": 5736 }, { "epoch": 2.249839466534947, "grad_norm": 0.4461130216348829, "learning_rate": 4.865043336191908e-06, "loss": 0.4993, "step": 5737 }, { "epoch": 2.250234625833539, "grad_norm": 0.4765715644527521, "learning_rate": 4.864992551548298e-06, "loss": 0.4984, "step": 5738 }, { "epoch": 2.2506297851321313, "grad_norm": 0.44134482235233946, "learning_rate": 4.864941757616434e-06, "loss": 0.4966, "step": 5739 }, { "epoch": 2.2510249444307235, "grad_norm": 0.4565907937298236, "learning_rate": 4.864890954396514e-06, "loss": 0.5124, "step": 5740 }, { "epoch": 2.251420103729316, "grad_norm": 0.4445014385916519, "learning_rate": 4.8648401418887385e-06, "loss": 0.5036, "step": 5741 }, { "epoch": 2.251815263027908, "grad_norm": 0.5046116665567862, "learning_rate": 4.864789320093307e-06, "loss": 0.4885, "step": 5742 }, { "epoch": 2.2522104223265003, "grad_norm": 0.45743791687221036, "learning_rate": 4.86473848901042e-06, "loss": 0.5157, "step": 5743 }, { "epoch": 2.2526055816250925, "grad_norm": 0.45713971041275187, "learning_rate": 4.864687648640275e-06, "loss": 0.5209, "step": 5744 }, { "epoch": 2.2530007409236847, "grad_norm": 0.45916966205747045, "learning_rate": 4.864636798983073e-06, "loss": 0.5218, "step": 5745 }, { "epoch": 2.253395900222277, "grad_norm": 0.4645094613852396, "learning_rate": 4.864585940039014e-06, "loss": 0.4919, "step": 5746 }, { "epoch": 2.2537910595208692, "grad_norm": 0.4611192692789078, "learning_rate": 4.864535071808298e-06, "loss": 0.5181, "step": 5747 }, { "epoch": 2.2541862188194615, "grad_norm": 0.462528547985611, "learning_rate": 4.8644841942911225e-06, "loss": 0.5048, "step": 5748 }, { "epoch": 2.2545813781180537, "grad_norm": 0.4622175816401159, "learning_rate": 4.8644333074876896e-06, "loss": 0.5138, "step": 5749 }, { "epoch": 2.254976537416646, "grad_norm": 0.4802227886602715, "learning_rate": 4.864382411398198e-06, "loss": 0.4925, "step": 5750 }, { "epoch": 2.255371696715238, "grad_norm": 0.4542300433876316, "learning_rate": 4.864331506022848e-06, "loss": 0.5204, "step": 5751 }, { "epoch": 2.2557668560138304, "grad_norm": 0.46121165508412626, "learning_rate": 4.86428059136184e-06, "loss": 0.489, "step": 5752 }, { "epoch": 2.2561620153124227, "grad_norm": 0.45065089171005923, "learning_rate": 4.864229667415373e-06, "loss": 0.5082, "step": 5753 }, { "epoch": 2.256557174611015, "grad_norm": 0.4664108459131774, "learning_rate": 4.864178734183649e-06, "loss": 0.5203, "step": 5754 }, { "epoch": 2.256952333909607, "grad_norm": 0.46599726286903415, "learning_rate": 4.864127791666865e-06, "loss": 0.5044, "step": 5755 }, { "epoch": 2.2573474932081994, "grad_norm": 0.4579348177710053, "learning_rate": 4.864076839865223e-06, "loss": 0.4966, "step": 5756 }, { "epoch": 2.2577426525067916, "grad_norm": 0.4710610355784568, "learning_rate": 4.864025878778923e-06, "loss": 0.5039, "step": 5757 }, { "epoch": 2.258137811805384, "grad_norm": 0.5098836289434676, "learning_rate": 4.863974908408164e-06, "loss": 0.4864, "step": 5758 }, { "epoch": 2.258532971103976, "grad_norm": 0.444685462826527, "learning_rate": 4.863923928753148e-06, "loss": 0.4929, "step": 5759 }, { "epoch": 2.2589281304025683, "grad_norm": 0.45233517234419085, "learning_rate": 4.8638729398140735e-06, "loss": 0.5061, "step": 5760 }, { "epoch": 2.2593232897011606, "grad_norm": 0.465371345730281, "learning_rate": 4.863821941591142e-06, "loss": 0.4998, "step": 5761 }, { "epoch": 2.2597184489997533, "grad_norm": 0.4562128947163187, "learning_rate": 4.863770934084553e-06, "loss": 0.5238, "step": 5762 }, { "epoch": 2.2601136082983455, "grad_norm": 0.45638820275354225, "learning_rate": 4.863719917294507e-06, "loss": 0.5035, "step": 5763 }, { "epoch": 2.2605087675969378, "grad_norm": 0.47757782000563964, "learning_rate": 4.863668891221206e-06, "loss": 0.5079, "step": 5764 }, { "epoch": 2.26090392689553, "grad_norm": 0.4662080915894657, "learning_rate": 4.863617855864847e-06, "loss": 0.5124, "step": 5765 }, { "epoch": 2.2612990861941222, "grad_norm": 0.47892291257771435, "learning_rate": 4.863566811225634e-06, "loss": 0.5184, "step": 5766 }, { "epoch": 2.2616942454927145, "grad_norm": 0.45977277292864505, "learning_rate": 4.863515757303764e-06, "loss": 0.4941, "step": 5767 }, { "epoch": 2.2620894047913067, "grad_norm": 0.46916509655903266, "learning_rate": 4.863464694099441e-06, "loss": 0.5056, "step": 5768 }, { "epoch": 2.262484564089899, "grad_norm": 0.4504962671579038, "learning_rate": 4.863413621612862e-06, "loss": 0.5062, "step": 5769 }, { "epoch": 2.262879723388491, "grad_norm": 0.44867824093263253, "learning_rate": 4.863362539844231e-06, "loss": 0.4993, "step": 5770 }, { "epoch": 2.2632748826870834, "grad_norm": 0.4670694822124429, "learning_rate": 4.863311448793747e-06, "loss": 0.5106, "step": 5771 }, { "epoch": 2.2636700419856757, "grad_norm": 0.4632432969652908, "learning_rate": 4.8632603484616095e-06, "loss": 0.5138, "step": 5772 }, { "epoch": 2.264065201284268, "grad_norm": 0.5486722092316492, "learning_rate": 4.8632092388480216e-06, "loss": 0.4994, "step": 5773 }, { "epoch": 2.26446036058286, "grad_norm": 0.46134894866499143, "learning_rate": 4.863158119953182e-06, "loss": 0.5041, "step": 5774 }, { "epoch": 2.2648555198814524, "grad_norm": 0.4645211586085868, "learning_rate": 4.863106991777293e-06, "loss": 0.5062, "step": 5775 }, { "epoch": 2.2652506791800446, "grad_norm": 0.46537662412865477, "learning_rate": 4.863055854320554e-06, "loss": 0.4913, "step": 5776 }, { "epoch": 2.265645838478637, "grad_norm": 0.4463910826863991, "learning_rate": 4.863004707583167e-06, "loss": 0.5051, "step": 5777 }, { "epoch": 2.266040997777229, "grad_norm": 0.4461844055881152, "learning_rate": 4.862953551565332e-06, "loss": 0.4879, "step": 5778 }, { "epoch": 2.2664361570758214, "grad_norm": 0.4671496378324098, "learning_rate": 4.862902386267251e-06, "loss": 0.4962, "step": 5779 }, { "epoch": 2.2668313163744136, "grad_norm": 0.47268040765077685, "learning_rate": 4.862851211689124e-06, "loss": 0.5075, "step": 5780 }, { "epoch": 2.267226475673006, "grad_norm": 0.4482777628793822, "learning_rate": 4.8628000278311515e-06, "loss": 0.5135, "step": 5781 }, { "epoch": 2.267621634971598, "grad_norm": 0.4589821173101706, "learning_rate": 4.862748834693536e-06, "loss": 0.4909, "step": 5782 }, { "epoch": 2.2680167942701903, "grad_norm": 0.4670920261314897, "learning_rate": 4.862697632276477e-06, "loss": 0.5232, "step": 5783 }, { "epoch": 2.2684119535687826, "grad_norm": 0.4421128362683695, "learning_rate": 4.862646420580178e-06, "loss": 0.4887, "step": 5784 }, { "epoch": 2.268807112867375, "grad_norm": 0.44505858305719176, "learning_rate": 4.862595199604837e-06, "loss": 0.5003, "step": 5785 }, { "epoch": 2.269202272165967, "grad_norm": 0.49135116256536704, "learning_rate": 4.862543969350657e-06, "loss": 0.4949, "step": 5786 }, { "epoch": 2.2695974314645593, "grad_norm": 0.46918700677450953, "learning_rate": 4.86249272981784e-06, "loss": 0.4885, "step": 5787 }, { "epoch": 2.2699925907631515, "grad_norm": 0.4485662070030722, "learning_rate": 4.862441481006586e-06, "loss": 0.5049, "step": 5788 }, { "epoch": 2.2703877500617438, "grad_norm": 0.43803428307843834, "learning_rate": 4.862390222917095e-06, "loss": 0.4808, "step": 5789 }, { "epoch": 2.270782909360336, "grad_norm": 0.46807207385887406, "learning_rate": 4.86233895554957e-06, "loss": 0.5187, "step": 5790 }, { "epoch": 2.2711780686589282, "grad_norm": 0.44793935494961473, "learning_rate": 4.862287678904213e-06, "loss": 0.4933, "step": 5791 }, { "epoch": 2.2715732279575205, "grad_norm": 0.46535343446328026, "learning_rate": 4.862236392981225e-06, "loss": 0.5276, "step": 5792 }, { "epoch": 2.2719683872561127, "grad_norm": 0.4698550219155634, "learning_rate": 4.8621850977808046e-06, "loss": 0.4872, "step": 5793 }, { "epoch": 2.272363546554705, "grad_norm": 0.4682879416283325, "learning_rate": 4.862133793303157e-06, "loss": 0.4903, "step": 5794 }, { "epoch": 2.272758705853297, "grad_norm": 0.48270114542131143, "learning_rate": 4.862082479548482e-06, "loss": 0.4993, "step": 5795 }, { "epoch": 2.2731538651518894, "grad_norm": 0.5493323187094451, "learning_rate": 4.862031156516982e-06, "loss": 0.4965, "step": 5796 }, { "epoch": 2.2735490244504817, "grad_norm": 0.4516215367368103, "learning_rate": 4.861979824208857e-06, "loss": 0.5034, "step": 5797 }, { "epoch": 2.273944183749074, "grad_norm": 0.4538027339697062, "learning_rate": 4.86192848262431e-06, "loss": 0.5006, "step": 5798 }, { "epoch": 2.274339343047666, "grad_norm": 0.4616229995385204, "learning_rate": 4.861877131763542e-06, "loss": 0.5013, "step": 5799 }, { "epoch": 2.2747345023462584, "grad_norm": 0.46227058646164154, "learning_rate": 4.861825771626755e-06, "loss": 0.4987, "step": 5800 }, { "epoch": 2.2751296616448506, "grad_norm": 0.45156002524949446, "learning_rate": 4.86177440221415e-06, "loss": 0.4905, "step": 5801 }, { "epoch": 2.275524820943443, "grad_norm": 0.45398486859479054, "learning_rate": 4.861723023525929e-06, "loss": 0.5127, "step": 5802 }, { "epoch": 2.275919980242035, "grad_norm": 0.4546600141282232, "learning_rate": 4.861671635562295e-06, "loss": 0.512, "step": 5803 }, { "epoch": 2.2763151395406274, "grad_norm": 0.4420173118969808, "learning_rate": 4.861620238323449e-06, "loss": 0.508, "step": 5804 }, { "epoch": 2.2767102988392196, "grad_norm": 0.4481000176106849, "learning_rate": 4.861568831809592e-06, "loss": 0.505, "step": 5805 }, { "epoch": 2.277105458137812, "grad_norm": 0.48239910462261965, "learning_rate": 4.861517416020928e-06, "loss": 0.5103, "step": 5806 }, { "epoch": 2.277500617436404, "grad_norm": 0.47340057062183605, "learning_rate": 4.861465990957656e-06, "loss": 0.4927, "step": 5807 }, { "epoch": 2.2778957767349963, "grad_norm": 0.4552226094608049, "learning_rate": 4.86141455661998e-06, "loss": 0.4982, "step": 5808 }, { "epoch": 2.2782909360335886, "grad_norm": 0.580540080884017, "learning_rate": 4.861363113008102e-06, "loss": 0.4945, "step": 5809 }, { "epoch": 2.278686095332181, "grad_norm": 0.4563722531620255, "learning_rate": 4.861311660122223e-06, "loss": 0.491, "step": 5810 }, { "epoch": 2.279081254630773, "grad_norm": 0.44479481415363753, "learning_rate": 4.861260197962546e-06, "loss": 0.4733, "step": 5811 }, { "epoch": 2.2794764139293653, "grad_norm": 0.46120540937626275, "learning_rate": 4.861208726529273e-06, "loss": 0.4971, "step": 5812 }, { "epoch": 2.2798715732279575, "grad_norm": 0.46393191062150557, "learning_rate": 4.861157245822605e-06, "loss": 0.5004, "step": 5813 }, { "epoch": 2.2802667325265498, "grad_norm": 0.4515647504745636, "learning_rate": 4.861105755842747e-06, "loss": 0.4777, "step": 5814 }, { "epoch": 2.280661891825142, "grad_norm": 0.438205927734934, "learning_rate": 4.8610542565898975e-06, "loss": 0.4916, "step": 5815 }, { "epoch": 2.2810570511237342, "grad_norm": 0.457098274611986, "learning_rate": 4.861002748064261e-06, "loss": 0.5009, "step": 5816 }, { "epoch": 2.2814522104223265, "grad_norm": 0.4615257787626016, "learning_rate": 4.86095123026604e-06, "loss": 0.4986, "step": 5817 }, { "epoch": 2.2818473697209187, "grad_norm": 0.45965059976307776, "learning_rate": 4.860899703195435e-06, "loss": 0.4986, "step": 5818 }, { "epoch": 2.282242529019511, "grad_norm": 0.46417580461089847, "learning_rate": 4.860848166852651e-06, "loss": 0.4937, "step": 5819 }, { "epoch": 2.282637688318103, "grad_norm": 0.47161773166956317, "learning_rate": 4.860796621237888e-06, "loss": 0.4979, "step": 5820 }, { "epoch": 2.2830328476166954, "grad_norm": 0.4660616983606871, "learning_rate": 4.86074506635135e-06, "loss": 0.509, "step": 5821 }, { "epoch": 2.2834280069152877, "grad_norm": 0.4832205275826543, "learning_rate": 4.860693502193239e-06, "loss": 0.516, "step": 5822 }, { "epoch": 2.28382316621388, "grad_norm": 0.49641441825377863, "learning_rate": 4.860641928763757e-06, "loss": 0.5138, "step": 5823 }, { "epoch": 2.284218325512472, "grad_norm": 0.4591396621913851, "learning_rate": 4.860590346063107e-06, "loss": 0.4956, "step": 5824 }, { "epoch": 2.2846134848110644, "grad_norm": 0.46808791796018534, "learning_rate": 4.8605387540914915e-06, "loss": 0.4979, "step": 5825 }, { "epoch": 2.2850086441096567, "grad_norm": 0.44616669574023354, "learning_rate": 4.8604871528491135e-06, "loss": 0.4809, "step": 5826 }, { "epoch": 2.285403803408249, "grad_norm": 0.46293514962772786, "learning_rate": 4.860435542336175e-06, "loss": 0.5058, "step": 5827 }, { "epoch": 2.285798962706841, "grad_norm": 0.47952265408731376, "learning_rate": 4.86038392255288e-06, "loss": 0.4998, "step": 5828 }, { "epoch": 2.2861941220054334, "grad_norm": 0.4585566892611743, "learning_rate": 4.8603322934994284e-06, "loss": 0.5039, "step": 5829 }, { "epoch": 2.2865892813040256, "grad_norm": 0.45394838026461104, "learning_rate": 4.860280655176026e-06, "loss": 0.5084, "step": 5830 }, { "epoch": 2.286984440602618, "grad_norm": 0.4556930344196025, "learning_rate": 4.860229007582874e-06, "loss": 0.512, "step": 5831 }, { "epoch": 2.28737959990121, "grad_norm": 0.5160699721279627, "learning_rate": 4.860177350720176e-06, "loss": 0.4992, "step": 5832 }, { "epoch": 2.2877747591998023, "grad_norm": 0.4683352173737334, "learning_rate": 4.860125684588135e-06, "loss": 0.4984, "step": 5833 }, { "epoch": 2.2881699184983946, "grad_norm": 0.44948200378048164, "learning_rate": 4.860074009186952e-06, "loss": 0.5026, "step": 5834 }, { "epoch": 2.288565077796987, "grad_norm": 0.4462063995104546, "learning_rate": 4.8600223245168325e-06, "loss": 0.4989, "step": 5835 }, { "epoch": 2.288960237095579, "grad_norm": 0.4769414323903259, "learning_rate": 4.8599706305779785e-06, "loss": 0.5109, "step": 5836 }, { "epoch": 2.2893553963941713, "grad_norm": 0.4838732772951883, "learning_rate": 4.8599189273705926e-06, "loss": 0.4982, "step": 5837 }, { "epoch": 2.2897505556927635, "grad_norm": 0.47336977014120124, "learning_rate": 4.859867214894878e-06, "loss": 0.501, "step": 5838 }, { "epoch": 2.2901457149913558, "grad_norm": 0.4708351136561112, "learning_rate": 4.8598154931510385e-06, "loss": 0.5142, "step": 5839 }, { "epoch": 2.290540874289948, "grad_norm": 0.4335493511834357, "learning_rate": 4.859763762139276e-06, "loss": 0.4897, "step": 5840 }, { "epoch": 2.2909360335885403, "grad_norm": 0.4551238002159216, "learning_rate": 4.859712021859795e-06, "loss": 0.506, "step": 5841 }, { "epoch": 2.2913311928871325, "grad_norm": 0.46419964327160607, "learning_rate": 4.8596602723127975e-06, "loss": 0.4924, "step": 5842 }, { "epoch": 2.2917263521857247, "grad_norm": 0.4504273710533772, "learning_rate": 4.859608513498488e-06, "loss": 0.5141, "step": 5843 }, { "epoch": 2.292121511484317, "grad_norm": 0.4592991376285661, "learning_rate": 4.859556745417068e-06, "loss": 0.5224, "step": 5844 }, { "epoch": 2.292516670782909, "grad_norm": 0.45863529913627665, "learning_rate": 4.859504968068743e-06, "loss": 0.5019, "step": 5845 }, { "epoch": 2.2929118300815015, "grad_norm": 0.46302406671745044, "learning_rate": 4.859453181453715e-06, "loss": 0.5123, "step": 5846 }, { "epoch": 2.2933069893800937, "grad_norm": 0.5210997355300256, "learning_rate": 4.8594013855721875e-06, "loss": 0.5157, "step": 5847 }, { "epoch": 2.293702148678686, "grad_norm": 0.4698909559181017, "learning_rate": 4.859349580424364e-06, "loss": 0.5115, "step": 5848 }, { "epoch": 2.294097307977278, "grad_norm": 0.4753355992191938, "learning_rate": 4.859297766010448e-06, "loss": 0.508, "step": 5849 }, { "epoch": 2.2944924672758704, "grad_norm": 0.44406133299641976, "learning_rate": 4.859245942330643e-06, "loss": 0.5031, "step": 5850 }, { "epoch": 2.2948876265744627, "grad_norm": 0.46793872675388737, "learning_rate": 4.859194109385152e-06, "loss": 0.5028, "step": 5851 }, { "epoch": 2.295282785873055, "grad_norm": 0.456450080256325, "learning_rate": 4.85914226717418e-06, "loss": 0.4996, "step": 5852 }, { "epoch": 2.295677945171647, "grad_norm": 0.4678454186698638, "learning_rate": 4.85909041569793e-06, "loss": 0.4944, "step": 5853 }, { "epoch": 2.2960731044702394, "grad_norm": 0.4558794558467581, "learning_rate": 4.8590385549566046e-06, "loss": 0.5009, "step": 5854 }, { "epoch": 2.2964682637688316, "grad_norm": 0.45402991768060114, "learning_rate": 4.858986684950408e-06, "loss": 0.5311, "step": 5855 }, { "epoch": 2.296863423067424, "grad_norm": 0.46642376112323874, "learning_rate": 4.858934805679545e-06, "loss": 0.4982, "step": 5856 }, { "epoch": 2.297258582366016, "grad_norm": 0.4582563144966381, "learning_rate": 4.858882917144218e-06, "loss": 0.4966, "step": 5857 }, { "epoch": 2.2976537416646083, "grad_norm": 0.44640576325625786, "learning_rate": 4.858831019344632e-06, "loss": 0.5014, "step": 5858 }, { "epoch": 2.2980489009632006, "grad_norm": 0.45330919927536356, "learning_rate": 4.858779112280989e-06, "loss": 0.4981, "step": 5859 }, { "epoch": 2.298444060261793, "grad_norm": 0.4575939467608079, "learning_rate": 4.858727195953495e-06, "loss": 0.5004, "step": 5860 }, { "epoch": 2.298839219560385, "grad_norm": 0.45894810163579486, "learning_rate": 4.858675270362352e-06, "loss": 0.5005, "step": 5861 }, { "epoch": 2.2992343788589773, "grad_norm": 0.4667448223237067, "learning_rate": 4.858623335507765e-06, "loss": 0.5131, "step": 5862 }, { "epoch": 2.2996295381575695, "grad_norm": 0.4466389707322938, "learning_rate": 4.858571391389938e-06, "loss": 0.5065, "step": 5863 }, { "epoch": 2.300024697456162, "grad_norm": 0.46052845698144745, "learning_rate": 4.858519438009075e-06, "loss": 0.4981, "step": 5864 }, { "epoch": 2.300419856754754, "grad_norm": 0.4521693219071108, "learning_rate": 4.8584674753653795e-06, "loss": 0.4932, "step": 5865 }, { "epoch": 2.3008150160533463, "grad_norm": 0.4747140612853695, "learning_rate": 4.858415503459056e-06, "loss": 0.5212, "step": 5866 }, { "epoch": 2.3012101753519385, "grad_norm": 0.45520487283506444, "learning_rate": 4.858363522290308e-06, "loss": 0.5063, "step": 5867 }, { "epoch": 2.301605334650531, "grad_norm": 0.4551319537165788, "learning_rate": 4.858311531859341e-06, "loss": 0.488, "step": 5868 }, { "epoch": 2.3020004939491234, "grad_norm": 0.4794988475592662, "learning_rate": 4.858259532166358e-06, "loss": 0.5088, "step": 5869 }, { "epoch": 2.3023956532477157, "grad_norm": 0.4581559846294469, "learning_rate": 4.858207523211563e-06, "loss": 0.5049, "step": 5870 }, { "epoch": 2.302790812546308, "grad_norm": 0.4616610380469891, "learning_rate": 4.858155504995162e-06, "loss": 0.4891, "step": 5871 }, { "epoch": 2.3031859718449, "grad_norm": 0.46422859211150336, "learning_rate": 4.8581034775173575e-06, "loss": 0.512, "step": 5872 }, { "epoch": 2.3035811311434924, "grad_norm": 0.4693415161744398, "learning_rate": 4.858051440778354e-06, "loss": 0.5236, "step": 5873 }, { "epoch": 2.3039762904420846, "grad_norm": 0.45548329510092883, "learning_rate": 4.857999394778357e-06, "loss": 0.5045, "step": 5874 }, { "epoch": 2.304371449740677, "grad_norm": 0.4568687640660631, "learning_rate": 4.857947339517571e-06, "loss": 0.5055, "step": 5875 }, { "epoch": 2.304766609039269, "grad_norm": 0.46035875823989625, "learning_rate": 4.857895274996198e-06, "loss": 0.4936, "step": 5876 }, { "epoch": 2.3051617683378613, "grad_norm": 0.4852045992439535, "learning_rate": 4.857843201214445e-06, "loss": 0.515, "step": 5877 }, { "epoch": 2.3055569276364536, "grad_norm": 0.4595559022314449, "learning_rate": 4.857791118172515e-06, "loss": 0.4981, "step": 5878 }, { "epoch": 2.305952086935046, "grad_norm": 0.4647597601816075, "learning_rate": 4.857739025870614e-06, "loss": 0.4914, "step": 5879 }, { "epoch": 2.306347246233638, "grad_norm": 0.4745172978982465, "learning_rate": 4.857686924308946e-06, "loss": 0.4986, "step": 5880 }, { "epoch": 2.3067424055322303, "grad_norm": 0.4738840639632119, "learning_rate": 4.857634813487715e-06, "loss": 0.5071, "step": 5881 }, { "epoch": 2.3071375648308226, "grad_norm": 0.4528727368292258, "learning_rate": 4.857582693407126e-06, "loss": 0.4974, "step": 5882 }, { "epoch": 2.307532724129415, "grad_norm": 0.46653799947338975, "learning_rate": 4.857530564067383e-06, "loss": 0.4883, "step": 5883 }, { "epoch": 2.307927883428007, "grad_norm": 0.46309044360177304, "learning_rate": 4.857478425468693e-06, "loss": 0.4971, "step": 5884 }, { "epoch": 2.3083230427265993, "grad_norm": 0.46207981907616164, "learning_rate": 4.857426277611258e-06, "loss": 0.512, "step": 5885 }, { "epoch": 2.3087182020251915, "grad_norm": 0.46314752976399587, "learning_rate": 4.857374120495285e-06, "loss": 0.5063, "step": 5886 }, { "epoch": 2.3091133613237838, "grad_norm": 0.4649842929739362, "learning_rate": 4.857321954120977e-06, "loss": 0.5114, "step": 5887 }, { "epoch": 2.309508520622376, "grad_norm": 0.5535102504683219, "learning_rate": 4.857269778488541e-06, "loss": 0.5134, "step": 5888 }, { "epoch": 2.3099036799209682, "grad_norm": 0.4423259127513932, "learning_rate": 4.85721759359818e-06, "loss": 0.4975, "step": 5889 }, { "epoch": 2.3102988392195605, "grad_norm": 0.44102253287067894, "learning_rate": 4.8571653994501e-06, "loss": 0.487, "step": 5890 }, { "epoch": 2.3106939985181527, "grad_norm": 0.4528723880487849, "learning_rate": 4.857113196044505e-06, "loss": 0.5174, "step": 5891 }, { "epoch": 2.311089157816745, "grad_norm": 0.4594030254108174, "learning_rate": 4.857060983381601e-06, "loss": 0.5041, "step": 5892 }, { "epoch": 2.311484317115337, "grad_norm": 0.4688774998319715, "learning_rate": 4.857008761461593e-06, "loss": 0.5058, "step": 5893 }, { "epoch": 2.3118794764139294, "grad_norm": 0.4649598927489743, "learning_rate": 4.856956530284686e-06, "loss": 0.5022, "step": 5894 }, { "epoch": 2.3122746357125217, "grad_norm": 0.4498229566988718, "learning_rate": 4.856904289851084e-06, "loss": 0.5122, "step": 5895 }, { "epoch": 2.312669795011114, "grad_norm": 0.4683470822663338, "learning_rate": 4.856852040160994e-06, "loss": 0.5005, "step": 5896 }, { "epoch": 2.313064954309706, "grad_norm": 0.4911558695679091, "learning_rate": 4.856799781214621e-06, "loss": 0.5253, "step": 5897 }, { "epoch": 2.3134601136082984, "grad_norm": 0.464409082612125, "learning_rate": 4.856747513012168e-06, "loss": 0.5019, "step": 5898 }, { "epoch": 2.3138552729068906, "grad_norm": 0.4579973840895186, "learning_rate": 4.856695235553843e-06, "loss": 0.4942, "step": 5899 }, { "epoch": 2.314250432205483, "grad_norm": 0.4478579726762588, "learning_rate": 4.85664294883985e-06, "loss": 0.4892, "step": 5900 }, { "epoch": 2.314645591504075, "grad_norm": 0.4693973591026146, "learning_rate": 4.856590652870395e-06, "loss": 0.5006, "step": 5901 }, { "epoch": 2.3150407508026674, "grad_norm": 0.5045358380289714, "learning_rate": 4.856538347645681e-06, "loss": 0.5154, "step": 5902 }, { "epoch": 2.3154359101012596, "grad_norm": 0.4649328917632037, "learning_rate": 4.856486033165917e-06, "loss": 0.4955, "step": 5903 }, { "epoch": 2.315831069399852, "grad_norm": 0.46867742048507893, "learning_rate": 4.856433709431307e-06, "loss": 0.5016, "step": 5904 }, { "epoch": 2.316226228698444, "grad_norm": 0.4520581706582322, "learning_rate": 4.8563813764420555e-06, "loss": 0.5125, "step": 5905 }, { "epoch": 2.3166213879970363, "grad_norm": 0.45026206385228246, "learning_rate": 4.856329034198368e-06, "loss": 0.4939, "step": 5906 }, { "epoch": 2.3170165472956286, "grad_norm": 0.45030736202082045, "learning_rate": 4.8562766827004525e-06, "loss": 0.5242, "step": 5907 }, { "epoch": 2.317411706594221, "grad_norm": 0.4469905948107838, "learning_rate": 4.856224321948512e-06, "loss": 0.5073, "step": 5908 }, { "epoch": 2.317806865892813, "grad_norm": 0.45894383299198227, "learning_rate": 4.856171951942754e-06, "loss": 0.5101, "step": 5909 }, { "epoch": 2.3182020251914053, "grad_norm": 0.4395899572240971, "learning_rate": 4.856119572683383e-06, "loss": 0.4837, "step": 5910 }, { "epoch": 2.3185971844899975, "grad_norm": 0.44751988426963857, "learning_rate": 4.856067184170604e-06, "loss": 0.5126, "step": 5911 }, { "epoch": 2.3189923437885898, "grad_norm": 0.45656584660916594, "learning_rate": 4.856014786404625e-06, "loss": 0.4979, "step": 5912 }, { "epoch": 2.319387503087182, "grad_norm": 0.4506209307224017, "learning_rate": 4.8559623793856505e-06, "loss": 0.4942, "step": 5913 }, { "epoch": 2.3197826623857742, "grad_norm": 0.4461798611971729, "learning_rate": 4.855909963113886e-06, "loss": 0.4802, "step": 5914 }, { "epoch": 2.3201778216843665, "grad_norm": 0.46130265251689806, "learning_rate": 4.8558575375895375e-06, "loss": 0.5252, "step": 5915 }, { "epoch": 2.3205729809829587, "grad_norm": 0.45320094317982595, "learning_rate": 4.855805102812811e-06, "loss": 0.5089, "step": 5916 }, { "epoch": 2.320968140281551, "grad_norm": 0.4508029872376069, "learning_rate": 4.855752658783914e-06, "loss": 0.5247, "step": 5917 }, { "epoch": 2.321363299580143, "grad_norm": 0.4448389675565296, "learning_rate": 4.85570020550305e-06, "loss": 0.4821, "step": 5918 }, { "epoch": 2.3217584588787354, "grad_norm": 0.46839616082852714, "learning_rate": 4.8556477429704265e-06, "loss": 0.4997, "step": 5919 }, { "epoch": 2.3221536181773277, "grad_norm": 0.45777076533689726, "learning_rate": 4.855595271186249e-06, "loss": 0.4868, "step": 5920 }, { "epoch": 2.32254877747592, "grad_norm": 0.4606460723597928, "learning_rate": 4.855542790150723e-06, "loss": 0.4996, "step": 5921 }, { "epoch": 2.322943936774512, "grad_norm": 0.474039852550981, "learning_rate": 4.855490299864055e-06, "loss": 0.5024, "step": 5922 }, { "epoch": 2.3233390960731044, "grad_norm": 0.45026962258097275, "learning_rate": 4.8554378003264525e-06, "loss": 0.4911, "step": 5923 }, { "epoch": 2.3237342553716966, "grad_norm": 0.47088446275779033, "learning_rate": 4.85538529153812e-06, "loss": 0.5054, "step": 5924 }, { "epoch": 2.324129414670289, "grad_norm": 0.4520409677257949, "learning_rate": 4.855332773499265e-06, "loss": 0.5, "step": 5925 }, { "epoch": 2.324524573968881, "grad_norm": 0.5002895000010738, "learning_rate": 4.855280246210093e-06, "loss": 0.5013, "step": 5926 }, { "epoch": 2.3249197332674734, "grad_norm": 0.43977050907935966, "learning_rate": 4.8552277096708104e-06, "loss": 0.4955, "step": 5927 }, { "epoch": 2.3253148925660656, "grad_norm": 0.44665214038970635, "learning_rate": 4.855175163881623e-06, "loss": 0.491, "step": 5928 }, { "epoch": 2.325710051864658, "grad_norm": 0.4543680288607131, "learning_rate": 4.855122608842738e-06, "loss": 0.5021, "step": 5929 }, { "epoch": 2.32610521116325, "grad_norm": 0.47105886533075386, "learning_rate": 4.855070044554361e-06, "loss": 0.4931, "step": 5930 }, { "epoch": 2.3265003704618423, "grad_norm": 0.4542183871966276, "learning_rate": 4.8550174710167e-06, "loss": 0.4898, "step": 5931 }, { "epoch": 2.3268955297604346, "grad_norm": 0.44138796987873863, "learning_rate": 4.854964888229959e-06, "loss": 0.4778, "step": 5932 }, { "epoch": 2.327290689059027, "grad_norm": 0.4453768384169529, "learning_rate": 4.854912296194347e-06, "loss": 0.4952, "step": 5933 }, { "epoch": 2.327685848357619, "grad_norm": 0.46292907268573785, "learning_rate": 4.854859694910069e-06, "loss": 0.5194, "step": 5934 }, { "epoch": 2.3280810076562113, "grad_norm": 0.5289798354022537, "learning_rate": 4.854807084377332e-06, "loss": 0.4982, "step": 5935 }, { "epoch": 2.3284761669548035, "grad_norm": 0.4844679893735134, "learning_rate": 4.854754464596344e-06, "loss": 0.5083, "step": 5936 }, { "epoch": 2.3288713262533958, "grad_norm": 0.46477016057992765, "learning_rate": 4.854701835567309e-06, "loss": 0.5174, "step": 5937 }, { "epoch": 2.329266485551988, "grad_norm": 0.4434767608118626, "learning_rate": 4.8546491972904354e-06, "loss": 0.4904, "step": 5938 }, { "epoch": 2.3296616448505802, "grad_norm": 0.4481318939794802, "learning_rate": 4.854596549765929e-06, "loss": 0.5045, "step": 5939 }, { "epoch": 2.3300568041491725, "grad_norm": 0.4534393794450779, "learning_rate": 4.8545438929939985e-06, "loss": 0.4957, "step": 5940 }, { "epoch": 2.3304519634477647, "grad_norm": 0.4687124392558075, "learning_rate": 4.854491226974848e-06, "loss": 0.5176, "step": 5941 }, { "epoch": 2.330847122746357, "grad_norm": 0.6355213440329489, "learning_rate": 4.854438551708686e-06, "loss": 0.5251, "step": 5942 }, { "epoch": 2.331242282044949, "grad_norm": 0.45957482273724554, "learning_rate": 4.854385867195719e-06, "loss": 0.4918, "step": 5943 }, { "epoch": 2.3316374413435415, "grad_norm": 0.46063015196640106, "learning_rate": 4.854333173436154e-06, "loss": 0.5072, "step": 5944 }, { "epoch": 2.3320326006421337, "grad_norm": 0.46027939590902706, "learning_rate": 4.854280470430199e-06, "loss": 0.5212, "step": 5945 }, { "epoch": 2.332427759940726, "grad_norm": 0.4697655644185264, "learning_rate": 4.854227758178058e-06, "loss": 0.4959, "step": 5946 }, { "epoch": 2.332822919239318, "grad_norm": 0.45342411422226236, "learning_rate": 4.854175036679941e-06, "loss": 0.5064, "step": 5947 }, { "epoch": 2.3332180785379104, "grad_norm": 0.4534823543771811, "learning_rate": 4.854122305936054e-06, "loss": 0.5074, "step": 5948 }, { "epoch": 2.3336132378365027, "grad_norm": 0.46415220608251223, "learning_rate": 4.8540695659466045e-06, "loss": 0.4908, "step": 5949 }, { "epoch": 2.334008397135095, "grad_norm": 0.4555359616647736, "learning_rate": 4.854016816711799e-06, "loss": 0.5075, "step": 5950 }, { "epoch": 2.3344035564336876, "grad_norm": 0.4602201174143169, "learning_rate": 4.853964058231844e-06, "loss": 0.5022, "step": 5951 }, { "epoch": 2.33479871573228, "grad_norm": 0.445668129634111, "learning_rate": 4.853911290506949e-06, "loss": 0.4977, "step": 5952 }, { "epoch": 2.335193875030872, "grad_norm": 0.4677802262684022, "learning_rate": 4.853858513537319e-06, "loss": 0.5155, "step": 5953 }, { "epoch": 2.3355890343294643, "grad_norm": 0.46379592618135573, "learning_rate": 4.853805727323162e-06, "loss": 0.5065, "step": 5954 }, { "epoch": 2.3359841936280565, "grad_norm": 0.4438035631679444, "learning_rate": 4.853752931864685e-06, "loss": 0.4997, "step": 5955 }, { "epoch": 2.336379352926649, "grad_norm": 0.46352707418602923, "learning_rate": 4.853700127162097e-06, "loss": 0.499, "step": 5956 }, { "epoch": 2.336774512225241, "grad_norm": 0.4428618122246438, "learning_rate": 4.8536473132156025e-06, "loss": 0.4866, "step": 5957 }, { "epoch": 2.3371696715238333, "grad_norm": 0.4521576676325371, "learning_rate": 4.8535944900254115e-06, "loss": 0.5002, "step": 5958 }, { "epoch": 2.3375648308224255, "grad_norm": 0.4626467646021148, "learning_rate": 4.853541657591731e-06, "loss": 0.5019, "step": 5959 }, { "epoch": 2.3379599901210177, "grad_norm": 0.46120148479836487, "learning_rate": 4.853488815914768e-06, "loss": 0.5007, "step": 5960 }, { "epoch": 2.33835514941961, "grad_norm": 0.4506914517703311, "learning_rate": 4.85343596499473e-06, "loss": 0.4884, "step": 5961 }, { "epoch": 2.338750308718202, "grad_norm": 0.43940905649704776, "learning_rate": 4.853383104831823e-06, "loss": 0.4956, "step": 5962 }, { "epoch": 2.3391454680167945, "grad_norm": 0.46981121876573256, "learning_rate": 4.853330235426258e-06, "loss": 0.5144, "step": 5963 }, { "epoch": 2.3395406273153867, "grad_norm": 0.4910013553279312, "learning_rate": 4.85327735677824e-06, "loss": 0.5202, "step": 5964 }, { "epoch": 2.339935786613979, "grad_norm": 0.45060719865992904, "learning_rate": 4.853224468887978e-06, "loss": 0.502, "step": 5965 }, { "epoch": 2.340330945912571, "grad_norm": 0.4553193621696011, "learning_rate": 4.853171571755679e-06, "loss": 0.4781, "step": 5966 }, { "epoch": 2.3407261052111634, "grad_norm": 0.4515357883492096, "learning_rate": 4.853118665381551e-06, "loss": 0.4948, "step": 5967 }, { "epoch": 2.3411212645097557, "grad_norm": 0.45077739345730894, "learning_rate": 4.853065749765802e-06, "loss": 0.494, "step": 5968 }, { "epoch": 2.341516423808348, "grad_norm": 0.6697241027454435, "learning_rate": 4.853012824908639e-06, "loss": 0.5153, "step": 5969 }, { "epoch": 2.34191158310694, "grad_norm": 0.46052939599652937, "learning_rate": 4.852959890810271e-06, "loss": 0.4899, "step": 5970 }, { "epoch": 2.3423067424055324, "grad_norm": 0.4629470441078412, "learning_rate": 4.852906947470905e-06, "loss": 0.5122, "step": 5971 }, { "epoch": 2.3427019017041246, "grad_norm": 0.45554485923089544, "learning_rate": 4.8528539948907495e-06, "loss": 0.5118, "step": 5972 }, { "epoch": 2.343097061002717, "grad_norm": 0.46703177457997613, "learning_rate": 4.8528010330700125e-06, "loss": 0.4909, "step": 5973 }, { "epoch": 2.343492220301309, "grad_norm": 0.4592646549588022, "learning_rate": 4.852748062008901e-06, "loss": 0.4904, "step": 5974 }, { "epoch": 2.3438873795999013, "grad_norm": 0.474115303348079, "learning_rate": 4.8526950817076244e-06, "loss": 0.5038, "step": 5975 }, { "epoch": 2.3442825388984936, "grad_norm": 0.48149869168465126, "learning_rate": 4.85264209216639e-06, "loss": 0.5108, "step": 5976 }, { "epoch": 2.344677698197086, "grad_norm": 0.45837357450934973, "learning_rate": 4.852589093385406e-06, "loss": 0.5158, "step": 5977 }, { "epoch": 2.345072857495678, "grad_norm": 0.44593351395167496, "learning_rate": 4.852536085364881e-06, "loss": 0.4967, "step": 5978 }, { "epoch": 2.3454680167942703, "grad_norm": 0.4531644610848413, "learning_rate": 4.852483068105022e-06, "loss": 0.4981, "step": 5979 }, { "epoch": 2.3458631760928625, "grad_norm": 0.4595627732654401, "learning_rate": 4.852430041606039e-06, "loss": 0.4888, "step": 5980 }, { "epoch": 2.346258335391455, "grad_norm": 0.4581903361601766, "learning_rate": 4.852377005868138e-06, "loss": 0.4822, "step": 5981 }, { "epoch": 2.346653494690047, "grad_norm": 0.4483989827690783, "learning_rate": 4.85232396089153e-06, "loss": 0.5124, "step": 5982 }, { "epoch": 2.3470486539886393, "grad_norm": 0.45908454030513607, "learning_rate": 4.8522709066764204e-06, "loss": 0.5086, "step": 5983 }, { "epoch": 2.3474438132872315, "grad_norm": 0.4583400146391861, "learning_rate": 4.85221784322302e-06, "loss": 0.5112, "step": 5984 }, { "epoch": 2.3478389725858237, "grad_norm": 0.45763389698982093, "learning_rate": 4.852164770531536e-06, "loss": 0.5045, "step": 5985 }, { "epoch": 2.348234131884416, "grad_norm": 0.460537227961999, "learning_rate": 4.852111688602177e-06, "loss": 0.5215, "step": 5986 }, { "epoch": 2.3486292911830082, "grad_norm": 0.46679943581021927, "learning_rate": 4.852058597435152e-06, "loss": 0.5116, "step": 5987 }, { "epoch": 2.3490244504816005, "grad_norm": 0.44817207334568054, "learning_rate": 4.852005497030669e-06, "loss": 0.5015, "step": 5988 }, { "epoch": 2.3494196097801927, "grad_norm": 0.4489419200289153, "learning_rate": 4.851952387388936e-06, "loss": 0.5115, "step": 5989 }, { "epoch": 2.349814769078785, "grad_norm": 0.45473940760337633, "learning_rate": 4.851899268510163e-06, "loss": 0.4971, "step": 5990 }, { "epoch": 2.350209928377377, "grad_norm": 0.4595345160766586, "learning_rate": 4.851846140394557e-06, "loss": 0.5165, "step": 5991 }, { "epoch": 2.3506050876759694, "grad_norm": 0.45078645033514275, "learning_rate": 4.851793003042328e-06, "loss": 0.4801, "step": 5992 }, { "epoch": 2.3510002469745617, "grad_norm": 0.46083912000399313, "learning_rate": 4.851739856453685e-06, "loss": 0.506, "step": 5993 }, { "epoch": 2.351395406273154, "grad_norm": 0.456612483620041, "learning_rate": 4.851686700628834e-06, "loss": 0.5031, "step": 5994 }, { "epoch": 2.351790565571746, "grad_norm": 0.4617876715575406, "learning_rate": 4.851633535567987e-06, "loss": 0.5115, "step": 5995 }, { "epoch": 2.3521857248703384, "grad_norm": 0.5326660248295265, "learning_rate": 4.851580361271351e-06, "loss": 0.5117, "step": 5996 }, { "epoch": 2.3525808841689306, "grad_norm": 0.4613788668226646, "learning_rate": 4.851527177739135e-06, "loss": 0.5209, "step": 5997 }, { "epoch": 2.352976043467523, "grad_norm": 0.44443451469751716, "learning_rate": 4.851473984971549e-06, "loss": 0.4932, "step": 5998 }, { "epoch": 2.353371202766115, "grad_norm": 0.45353874492848084, "learning_rate": 4.851420782968801e-06, "loss": 0.512, "step": 5999 }, { "epoch": 2.3537663620647074, "grad_norm": 0.45136448249781186, "learning_rate": 4.8513675717311e-06, "loss": 0.4939, "step": 6000 }, { "epoch": 2.3541615213632996, "grad_norm": 0.4516803569929796, "learning_rate": 4.851314351258654e-06, "loss": 0.5113, "step": 6001 }, { "epoch": 2.354556680661892, "grad_norm": 0.4561308566297529, "learning_rate": 4.851261121551674e-06, "loss": 0.4967, "step": 6002 }, { "epoch": 2.354951839960484, "grad_norm": 0.5260342783696013, "learning_rate": 4.8512078826103675e-06, "loss": 0.4982, "step": 6003 }, { "epoch": 2.3553469992590763, "grad_norm": 0.47897271506812045, "learning_rate": 4.8511546344349444e-06, "loss": 0.5193, "step": 6004 }, { "epoch": 2.3557421585576686, "grad_norm": 0.4638833627455354, "learning_rate": 4.851101377025614e-06, "loss": 0.4941, "step": 6005 }, { "epoch": 2.356137317856261, "grad_norm": 0.4427664156009943, "learning_rate": 4.8510481103825845e-06, "loss": 0.4878, "step": 6006 }, { "epoch": 2.356532477154853, "grad_norm": 0.44716700666783465, "learning_rate": 4.850994834506065e-06, "loss": 0.4942, "step": 6007 }, { "epoch": 2.3569276364534453, "grad_norm": 0.45290206397069754, "learning_rate": 4.850941549396267e-06, "loss": 0.504, "step": 6008 }, { "epoch": 2.3573227957520375, "grad_norm": 0.4624224635836012, "learning_rate": 4.850888255053398e-06, "loss": 0.5128, "step": 6009 }, { "epoch": 2.3577179550506298, "grad_norm": 0.4685159236259062, "learning_rate": 4.850834951477666e-06, "loss": 0.5056, "step": 6010 }, { "epoch": 2.358113114349222, "grad_norm": 0.4637190744227209, "learning_rate": 4.850781638669283e-06, "loss": 0.4789, "step": 6011 }, { "epoch": 2.3585082736478142, "grad_norm": 0.4519731150582127, "learning_rate": 4.850728316628457e-06, "loss": 0.5129, "step": 6012 }, { "epoch": 2.3589034329464065, "grad_norm": 0.4538513437911314, "learning_rate": 4.8506749853553974e-06, "loss": 0.4994, "step": 6013 }, { "epoch": 2.3592985922449987, "grad_norm": 0.46979489205654373, "learning_rate": 4.850621644850314e-06, "loss": 0.5151, "step": 6014 }, { "epoch": 2.359693751543591, "grad_norm": 0.45746167900887663, "learning_rate": 4.850568295113416e-06, "loss": 0.5168, "step": 6015 }, { "epoch": 2.360088910842183, "grad_norm": 0.4575481082495061, "learning_rate": 4.850514936144913e-06, "loss": 0.5072, "step": 6016 }, { "epoch": 2.3604840701407754, "grad_norm": 0.45975740355910427, "learning_rate": 4.850461567945015e-06, "loss": 0.5, "step": 6017 }, { "epoch": 2.3608792294393677, "grad_norm": 0.4748694051073642, "learning_rate": 4.850408190513931e-06, "loss": 0.5029, "step": 6018 }, { "epoch": 2.36127438873796, "grad_norm": 0.4914980276998342, "learning_rate": 4.850354803851871e-06, "loss": 0.5254, "step": 6019 }, { "epoch": 2.361669548036552, "grad_norm": 0.4800284534033041, "learning_rate": 4.850301407959045e-06, "loss": 0.5161, "step": 6020 }, { "epoch": 2.3620647073351444, "grad_norm": 0.4462205733041414, "learning_rate": 4.8502480028356615e-06, "loss": 0.5042, "step": 6021 }, { "epoch": 2.3624598666337366, "grad_norm": 0.4709204676815143, "learning_rate": 4.850194588481931e-06, "loss": 0.502, "step": 6022 }, { "epoch": 2.362855025932329, "grad_norm": 0.4568772714716113, "learning_rate": 4.8501411648980635e-06, "loss": 0.4653, "step": 6023 }, { "epoch": 2.363250185230921, "grad_norm": 0.4547015608724961, "learning_rate": 4.850087732084269e-06, "loss": 0.5, "step": 6024 }, { "epoch": 2.3636453445295134, "grad_norm": 0.480326169527228, "learning_rate": 4.850034290040756e-06, "loss": 0.5118, "step": 6025 }, { "epoch": 2.3640405038281056, "grad_norm": 0.4598744970305476, "learning_rate": 4.849980838767736e-06, "loss": 0.498, "step": 6026 }, { "epoch": 2.364435663126698, "grad_norm": 0.4600510631142906, "learning_rate": 4.849927378265418e-06, "loss": 0.4725, "step": 6027 }, { "epoch": 2.36483082242529, "grad_norm": 0.4633225031808518, "learning_rate": 4.8498739085340125e-06, "loss": 0.5029, "step": 6028 }, { "epoch": 2.3652259817238823, "grad_norm": 0.4497673903824722, "learning_rate": 4.849820429573729e-06, "loss": 0.5061, "step": 6029 }, { "epoch": 2.3656211410224746, "grad_norm": 0.45283626682133, "learning_rate": 4.849766941384777e-06, "loss": 0.5035, "step": 6030 }, { "epoch": 2.366016300321067, "grad_norm": 0.45318111987548476, "learning_rate": 4.8497134439673685e-06, "loss": 0.5059, "step": 6031 }, { "epoch": 2.366411459619659, "grad_norm": 0.45785207671037326, "learning_rate": 4.849659937321713e-06, "loss": 0.527, "step": 6032 }, { "epoch": 2.3668066189182513, "grad_norm": 0.45427024013798384, "learning_rate": 4.849606421448018e-06, "loss": 0.5253, "step": 6033 }, { "epoch": 2.3672017782168435, "grad_norm": 0.45732742980462926, "learning_rate": 4.849552896346497e-06, "loss": 0.504, "step": 6034 }, { "epoch": 2.3675969375154358, "grad_norm": 0.5128918032034719, "learning_rate": 4.849499362017359e-06, "loss": 0.5012, "step": 6035 }, { "epoch": 2.367992096814028, "grad_norm": 0.46442492874120134, "learning_rate": 4.8494458184608135e-06, "loss": 0.4985, "step": 6036 }, { "epoch": 2.3683872561126202, "grad_norm": 0.44382409744050355, "learning_rate": 4.849392265677072e-06, "loss": 0.4984, "step": 6037 }, { "epoch": 2.3687824154112125, "grad_norm": 0.4417781251327167, "learning_rate": 4.8493387036663445e-06, "loss": 0.4904, "step": 6038 }, { "epoch": 2.3691775747098047, "grad_norm": 0.46452188611272743, "learning_rate": 4.84928513242884e-06, "loss": 0.5057, "step": 6039 }, { "epoch": 2.369572734008397, "grad_norm": 0.4939272964413806, "learning_rate": 4.849231551964771e-06, "loss": 0.5275, "step": 6040 }, { "epoch": 2.369967893306989, "grad_norm": 0.4656217747759951, "learning_rate": 4.849177962274348e-06, "loss": 0.4959, "step": 6041 }, { "epoch": 2.3703630526055814, "grad_norm": 0.45451303351162, "learning_rate": 4.8491243633577785e-06, "loss": 0.4827, "step": 6042 }, { "epoch": 2.3707582119041737, "grad_norm": 0.46682609497280303, "learning_rate": 4.849070755215276e-06, "loss": 0.4905, "step": 6043 }, { "epoch": 2.371153371202766, "grad_norm": 0.451485975019632, "learning_rate": 4.849017137847049e-06, "loss": 0.5045, "step": 6044 }, { "epoch": 2.371548530501358, "grad_norm": 0.4631255854420321, "learning_rate": 4.84896351125331e-06, "loss": 0.5326, "step": 6045 }, { "epoch": 2.3719436897999504, "grad_norm": 0.4571223855093204, "learning_rate": 4.848909875434269e-06, "loss": 0.4987, "step": 6046 }, { "epoch": 2.3723388490985426, "grad_norm": 0.4680677582589837, "learning_rate": 4.848856230390137e-06, "loss": 0.5158, "step": 6047 }, { "epoch": 2.372734008397135, "grad_norm": 0.4446257712595702, "learning_rate": 4.848802576121122e-06, "loss": 0.5033, "step": 6048 }, { "epoch": 2.373129167695727, "grad_norm": 0.4630473866895991, "learning_rate": 4.848748912627438e-06, "loss": 0.5042, "step": 6049 }, { "epoch": 2.3735243269943194, "grad_norm": 0.44778383563697305, "learning_rate": 4.848695239909295e-06, "loss": 0.4975, "step": 6050 }, { "epoch": 2.3739194862929116, "grad_norm": 0.4385248274134849, "learning_rate": 4.848641557966902e-06, "loss": 0.4859, "step": 6051 }, { "epoch": 2.374314645591504, "grad_norm": 0.4527513035838103, "learning_rate": 4.848587866800472e-06, "loss": 0.4834, "step": 6052 }, { "epoch": 2.374709804890096, "grad_norm": 0.45987272190820844, "learning_rate": 4.8485341664102146e-06, "loss": 0.4934, "step": 6053 }, { "epoch": 2.3751049641886883, "grad_norm": 0.44214190083964766, "learning_rate": 4.84848045679634e-06, "loss": 0.4863, "step": 6054 }, { "epoch": 2.3755001234872806, "grad_norm": 0.47023720851459105, "learning_rate": 4.848426737959062e-06, "loss": 0.5147, "step": 6055 }, { "epoch": 2.375895282785873, "grad_norm": 0.4536759734264281, "learning_rate": 4.848373009898589e-06, "loss": 0.5058, "step": 6056 }, { "epoch": 2.3762904420844655, "grad_norm": 0.46339461388038883, "learning_rate": 4.848319272615134e-06, "loss": 0.5292, "step": 6057 }, { "epoch": 2.3766856013830577, "grad_norm": 0.43958334885581213, "learning_rate": 4.848265526108906e-06, "loss": 0.485, "step": 6058 }, { "epoch": 2.37708076068165, "grad_norm": 0.4442145240300568, "learning_rate": 4.848211770380117e-06, "loss": 0.4961, "step": 6059 }, { "epoch": 2.377475919980242, "grad_norm": 0.47607365592400747, "learning_rate": 4.848158005428978e-06, "loss": 0.5151, "step": 6060 }, { "epoch": 2.3778710792788345, "grad_norm": 0.4512904547194257, "learning_rate": 4.8481042312557e-06, "loss": 0.4923, "step": 6061 }, { "epoch": 2.3782662385774267, "grad_norm": 0.44456571012970686, "learning_rate": 4.8480504478604946e-06, "loss": 0.5055, "step": 6062 }, { "epoch": 2.378661397876019, "grad_norm": 0.4560502078542866, "learning_rate": 4.847996655243572e-06, "loss": 0.4988, "step": 6063 }, { "epoch": 2.379056557174611, "grad_norm": 0.45340332096358, "learning_rate": 4.847942853405146e-06, "loss": 0.4981, "step": 6064 }, { "epoch": 2.3794517164732034, "grad_norm": 0.4581291968585293, "learning_rate": 4.847889042345425e-06, "loss": 0.4771, "step": 6065 }, { "epoch": 2.3798468757717957, "grad_norm": 0.459995603273202, "learning_rate": 4.8478352220646215e-06, "loss": 0.4929, "step": 6066 }, { "epoch": 2.380242035070388, "grad_norm": 0.4474153131280138, "learning_rate": 4.847781392562948e-06, "loss": 0.5069, "step": 6067 }, { "epoch": 2.38063719436898, "grad_norm": 0.4493358041327097, "learning_rate": 4.847727553840615e-06, "loss": 0.4881, "step": 6068 }, { "epoch": 2.3810323536675724, "grad_norm": 0.45233974550022343, "learning_rate": 4.847673705897832e-06, "loss": 0.5081, "step": 6069 }, { "epoch": 2.3814275129661646, "grad_norm": 0.5070469825872892, "learning_rate": 4.847619848734814e-06, "loss": 0.5149, "step": 6070 }, { "epoch": 2.381822672264757, "grad_norm": 0.4565568022870259, "learning_rate": 4.84756598235177e-06, "loss": 0.4859, "step": 6071 }, { "epoch": 2.382217831563349, "grad_norm": 0.4871291260740165, "learning_rate": 4.847512106748912e-06, "loss": 0.5239, "step": 6072 }, { "epoch": 2.3826129908619413, "grad_norm": 0.4504152728422771, "learning_rate": 4.847458221926453e-06, "loss": 0.5099, "step": 6073 }, { "epoch": 2.3830081501605336, "grad_norm": 0.4591280231930706, "learning_rate": 4.847404327884603e-06, "loss": 0.4842, "step": 6074 }, { "epoch": 2.383403309459126, "grad_norm": 0.45851366936986665, "learning_rate": 4.847350424623574e-06, "loss": 0.5088, "step": 6075 }, { "epoch": 2.383798468757718, "grad_norm": 0.4634869558586333, "learning_rate": 4.847296512143577e-06, "loss": 0.5131, "step": 6076 }, { "epoch": 2.3841936280563103, "grad_norm": 0.45304642062746575, "learning_rate": 4.847242590444826e-06, "loss": 0.4977, "step": 6077 }, { "epoch": 2.3845887873549025, "grad_norm": 0.4561346179180274, "learning_rate": 4.847188659527532e-06, "loss": 0.5047, "step": 6078 }, { "epoch": 2.384983946653495, "grad_norm": 0.4526743884370499, "learning_rate": 4.847134719391905e-06, "loss": 0.4914, "step": 6079 }, { "epoch": 2.385379105952087, "grad_norm": 0.46407896971341867, "learning_rate": 4.847080770038158e-06, "loss": 0.5089, "step": 6080 }, { "epoch": 2.3857742652506793, "grad_norm": 0.45258104010987427, "learning_rate": 4.847026811466504e-06, "loss": 0.5022, "step": 6081 }, { "epoch": 2.3861694245492715, "grad_norm": 0.461969126357832, "learning_rate": 4.846972843677153e-06, "loss": 0.497, "step": 6082 }, { "epoch": 2.3865645838478637, "grad_norm": 0.4492623434194907, "learning_rate": 4.846918866670318e-06, "loss": 0.4979, "step": 6083 }, { "epoch": 2.386959743146456, "grad_norm": 0.4571175682930661, "learning_rate": 4.846864880446211e-06, "loss": 0.5196, "step": 6084 }, { "epoch": 2.387354902445048, "grad_norm": 0.45913437019988257, "learning_rate": 4.8468108850050436e-06, "loss": 0.5046, "step": 6085 }, { "epoch": 2.3877500617436405, "grad_norm": 0.45074777593560994, "learning_rate": 4.846756880347029e-06, "loss": 0.4989, "step": 6086 }, { "epoch": 2.3881452210422327, "grad_norm": 0.44092077546541725, "learning_rate": 4.846702866472377e-06, "loss": 0.4794, "step": 6087 }, { "epoch": 2.388540380340825, "grad_norm": 0.46155684618109705, "learning_rate": 4.846648843381302e-06, "loss": 0.5089, "step": 6088 }, { "epoch": 2.388935539639417, "grad_norm": 0.46885706820416534, "learning_rate": 4.846594811074015e-06, "loss": 0.5171, "step": 6089 }, { "epoch": 2.3893306989380094, "grad_norm": 0.43931037237601184, "learning_rate": 4.846540769550728e-06, "loss": 0.4882, "step": 6090 }, { "epoch": 2.3897258582366017, "grad_norm": 0.4353274011341564, "learning_rate": 4.8464867188116545e-06, "loss": 0.482, "step": 6091 }, { "epoch": 2.390121017535194, "grad_norm": 0.47037433987558747, "learning_rate": 4.846432658857006e-06, "loss": 0.4994, "step": 6092 }, { "epoch": 2.390516176833786, "grad_norm": 0.4593847264977308, "learning_rate": 4.846378589686995e-06, "loss": 0.507, "step": 6093 }, { "epoch": 2.3909113361323784, "grad_norm": 0.4601985413381888, "learning_rate": 4.846324511301834e-06, "loss": 0.5023, "step": 6094 }, { "epoch": 2.3913064954309706, "grad_norm": 0.452640709655825, "learning_rate": 4.846270423701734e-06, "loss": 0.4995, "step": 6095 }, { "epoch": 2.391701654729563, "grad_norm": 0.45638264701889014, "learning_rate": 4.846216326886909e-06, "loss": 0.4959, "step": 6096 }, { "epoch": 2.392096814028155, "grad_norm": 0.4826711924842887, "learning_rate": 4.846162220857571e-06, "loss": 0.5177, "step": 6097 }, { "epoch": 2.3924919733267473, "grad_norm": 0.4888215683792986, "learning_rate": 4.846108105613932e-06, "loss": 0.5008, "step": 6098 }, { "epoch": 2.3928871326253396, "grad_norm": 0.46320608794522933, "learning_rate": 4.8460539811562055e-06, "loss": 0.5089, "step": 6099 }, { "epoch": 2.393282291923932, "grad_norm": 0.45725756454431987, "learning_rate": 4.845999847484604e-06, "loss": 0.5203, "step": 6100 }, { "epoch": 2.393677451222524, "grad_norm": 0.44991333274030526, "learning_rate": 4.84594570459934e-06, "loss": 0.4848, "step": 6101 }, { "epoch": 2.3940726105211163, "grad_norm": 0.4600972177592437, "learning_rate": 4.845891552500625e-06, "loss": 0.5093, "step": 6102 }, { "epoch": 2.3944677698197085, "grad_norm": 0.5860812755960613, "learning_rate": 4.8458373911886716e-06, "loss": 0.5069, "step": 6103 }, { "epoch": 2.394862929118301, "grad_norm": 0.4538835124457885, "learning_rate": 4.845783220663694e-06, "loss": 0.4874, "step": 6104 }, { "epoch": 2.395258088416893, "grad_norm": 0.45218392628786025, "learning_rate": 4.845729040925905e-06, "loss": 0.496, "step": 6105 }, { "epoch": 2.3956532477154853, "grad_norm": 0.4493156953117064, "learning_rate": 4.845674851975516e-06, "loss": 0.5225, "step": 6106 }, { "epoch": 2.3960484070140775, "grad_norm": 0.4721248920273722, "learning_rate": 4.845620653812742e-06, "loss": 0.503, "step": 6107 }, { "epoch": 2.3964435663126697, "grad_norm": 0.4702634244511809, "learning_rate": 4.845566446437793e-06, "loss": 0.509, "step": 6108 }, { "epoch": 2.396838725611262, "grad_norm": 0.4517371761872178, "learning_rate": 4.845512229850883e-06, "loss": 0.5005, "step": 6109 }, { "epoch": 2.3972338849098542, "grad_norm": 0.45784931465490497, "learning_rate": 4.845458004052226e-06, "loss": 0.5292, "step": 6110 }, { "epoch": 2.3976290442084465, "grad_norm": 0.47366039215477546, "learning_rate": 4.845403769042034e-06, "loss": 0.4974, "step": 6111 }, { "epoch": 2.3980242035070387, "grad_norm": 0.45191947098684676, "learning_rate": 4.8453495248205205e-06, "loss": 0.5141, "step": 6112 }, { "epoch": 2.398419362805631, "grad_norm": 0.4713607350755517, "learning_rate": 4.845295271387897e-06, "loss": 0.4976, "step": 6113 }, { "epoch": 2.398814522104223, "grad_norm": 0.4660999045638153, "learning_rate": 4.84524100874438e-06, "loss": 0.5113, "step": 6114 }, { "epoch": 2.3992096814028154, "grad_norm": 0.47015668850310877, "learning_rate": 4.845186736890179e-06, "loss": 0.4907, "step": 6115 }, { "epoch": 2.3996048407014077, "grad_norm": 0.46766792350712855, "learning_rate": 4.845132455825508e-06, "loss": 0.513, "step": 6116 }, { "epoch": 2.4, "grad_norm": 1.0836917520596936, "learning_rate": 4.8450781655505815e-06, "loss": 0.4935, "step": 6117 }, { "epoch": 2.400395159298592, "grad_norm": 0.46151727429931927, "learning_rate": 4.845023866065612e-06, "loss": 0.5011, "step": 6118 }, { "epoch": 2.4007903185971844, "grad_norm": 0.4605620408152997, "learning_rate": 4.844969557370813e-06, "loss": 0.5045, "step": 6119 }, { "epoch": 2.4011854778957766, "grad_norm": 0.45858291441150556, "learning_rate": 4.844915239466398e-06, "loss": 0.5035, "step": 6120 }, { "epoch": 2.401580637194369, "grad_norm": 0.4572405133813424, "learning_rate": 4.844860912352579e-06, "loss": 0.4987, "step": 6121 }, { "epoch": 2.401975796492961, "grad_norm": 0.4532962343436789, "learning_rate": 4.844806576029571e-06, "loss": 0.5168, "step": 6122 }, { "epoch": 2.4023709557915534, "grad_norm": 0.44424364135152433, "learning_rate": 4.844752230497586e-06, "loss": 0.4874, "step": 6123 }, { "epoch": 2.4027661150901456, "grad_norm": 0.45178951408739765, "learning_rate": 4.844697875756837e-06, "loss": 0.5028, "step": 6124 }, { "epoch": 2.403161274388738, "grad_norm": 0.46131443978892384, "learning_rate": 4.844643511807539e-06, "loss": 0.4997, "step": 6125 }, { "epoch": 2.40355643368733, "grad_norm": 0.44557753095639435, "learning_rate": 4.844589138649906e-06, "loss": 0.4961, "step": 6126 }, { "epoch": 2.4039515929859223, "grad_norm": 0.44660632251012705, "learning_rate": 4.84453475628415e-06, "loss": 0.5055, "step": 6127 }, { "epoch": 2.4043467522845146, "grad_norm": 0.46297338232365337, "learning_rate": 4.844480364710486e-06, "loss": 0.5106, "step": 6128 }, { "epoch": 2.404741911583107, "grad_norm": 0.4546931439519957, "learning_rate": 4.844425963929126e-06, "loss": 0.507, "step": 6129 }, { "epoch": 2.405137070881699, "grad_norm": 0.4393631016625778, "learning_rate": 4.844371553940284e-06, "loss": 0.5011, "step": 6130 }, { "epoch": 2.4055322301802913, "grad_norm": 0.44198752628335675, "learning_rate": 4.844317134744174e-06, "loss": 0.5045, "step": 6131 }, { "epoch": 2.4059273894788835, "grad_norm": 0.4529021301640311, "learning_rate": 4.844262706341011e-06, "loss": 0.5139, "step": 6132 }, { "epoch": 2.4063225487774758, "grad_norm": 0.46321469858495373, "learning_rate": 4.844208268731007e-06, "loss": 0.4949, "step": 6133 }, { "epoch": 2.406717708076068, "grad_norm": 0.5352727595143272, "learning_rate": 4.8441538219143765e-06, "loss": 0.5112, "step": 6134 }, { "epoch": 2.4071128673746602, "grad_norm": 0.45404512767050736, "learning_rate": 4.844099365891333e-06, "loss": 0.5071, "step": 6135 }, { "epoch": 2.4075080266732525, "grad_norm": 0.44661826142521477, "learning_rate": 4.844044900662091e-06, "loss": 0.4842, "step": 6136 }, { "epoch": 2.4079031859718447, "grad_norm": 0.4866476660219383, "learning_rate": 4.843990426226864e-06, "loss": 0.4978, "step": 6137 }, { "epoch": 2.408298345270437, "grad_norm": 0.48353551024417507, "learning_rate": 4.843935942585865e-06, "loss": 0.5364, "step": 6138 }, { "epoch": 2.408693504569029, "grad_norm": 0.4710485363082892, "learning_rate": 4.84388144973931e-06, "loss": 0.503, "step": 6139 }, { "epoch": 2.409088663867622, "grad_norm": 0.4574698914104126, "learning_rate": 4.843826947687412e-06, "loss": 0.5185, "step": 6140 }, { "epoch": 2.409483823166214, "grad_norm": 0.46016949246716193, "learning_rate": 4.843772436430384e-06, "loss": 0.5148, "step": 6141 }, { "epoch": 2.4098789824648064, "grad_norm": 0.4482322493892279, "learning_rate": 4.843717915968442e-06, "loss": 0.5046, "step": 6142 }, { "epoch": 2.4102741417633986, "grad_norm": 0.45116521545047983, "learning_rate": 4.843663386301799e-06, "loss": 0.515, "step": 6143 }, { "epoch": 2.410669301061991, "grad_norm": 0.4590353660506984, "learning_rate": 4.843608847430669e-06, "loss": 0.5015, "step": 6144 }, { "epoch": 2.411064460360583, "grad_norm": 0.4509801127921544, "learning_rate": 4.843554299355267e-06, "loss": 0.5036, "step": 6145 }, { "epoch": 2.4114596196591753, "grad_norm": 0.46483479028215885, "learning_rate": 4.8434997420758065e-06, "loss": 0.5002, "step": 6146 }, { "epoch": 2.4118547789577676, "grad_norm": 0.4594113983493731, "learning_rate": 4.843445175592502e-06, "loss": 0.5072, "step": 6147 }, { "epoch": 2.41224993825636, "grad_norm": 0.45010607482721837, "learning_rate": 4.843390599905568e-06, "loss": 0.509, "step": 6148 }, { "epoch": 2.412645097554952, "grad_norm": 0.47368631625572943, "learning_rate": 4.843336015015218e-06, "loss": 0.5363, "step": 6149 }, { "epoch": 2.4130402568535443, "grad_norm": 0.4583173953744174, "learning_rate": 4.843281420921668e-06, "loss": 0.4834, "step": 6150 }, { "epoch": 2.4134354161521365, "grad_norm": 0.46218829266694594, "learning_rate": 4.843226817625132e-06, "loss": 0.5132, "step": 6151 }, { "epoch": 2.4138305754507288, "grad_norm": 0.46198156224588854, "learning_rate": 4.843172205125824e-06, "loss": 0.5132, "step": 6152 }, { "epoch": 2.414225734749321, "grad_norm": 0.4703069352755098, "learning_rate": 4.843117583423957e-06, "loss": 0.5222, "step": 6153 }, { "epoch": 2.4146208940479132, "grad_norm": 0.4614134551221174, "learning_rate": 4.843062952519748e-06, "loss": 0.4887, "step": 6154 }, { "epoch": 2.4150160533465055, "grad_norm": 0.4697970415651049, "learning_rate": 4.843008312413409e-06, "loss": 0.5093, "step": 6155 }, { "epoch": 2.4154112126450977, "grad_norm": 0.45775748905364666, "learning_rate": 4.842953663105158e-06, "loss": 0.4919, "step": 6156 }, { "epoch": 2.41580637194369, "grad_norm": 0.45387013696855416, "learning_rate": 4.8428990045952075e-06, "loss": 0.4933, "step": 6157 }, { "epoch": 2.416201531242282, "grad_norm": 0.49320450897769486, "learning_rate": 4.842844336883772e-06, "loss": 0.5058, "step": 6158 }, { "epoch": 2.4165966905408744, "grad_norm": 0.4594972163377635, "learning_rate": 4.842789659971065e-06, "loss": 0.4956, "step": 6159 }, { "epoch": 2.4169918498394667, "grad_norm": 0.44799652445453403, "learning_rate": 4.842734973857305e-06, "loss": 0.5003, "step": 6160 }, { "epoch": 2.417387009138059, "grad_norm": 0.4555779832663966, "learning_rate": 4.842680278542704e-06, "loss": 0.4988, "step": 6161 }, { "epoch": 2.417782168436651, "grad_norm": 0.46273844976885875, "learning_rate": 4.8426255740274776e-06, "loss": 0.4881, "step": 6162 }, { "epoch": 2.4181773277352434, "grad_norm": 0.44452340365885856, "learning_rate": 4.84257086031184e-06, "loss": 0.5114, "step": 6163 }, { "epoch": 2.4185724870338356, "grad_norm": 0.45595741947905993, "learning_rate": 4.842516137396007e-06, "loss": 0.4976, "step": 6164 }, { "epoch": 2.418967646332428, "grad_norm": 0.45112785524674676, "learning_rate": 4.842461405280192e-06, "loss": 0.5021, "step": 6165 }, { "epoch": 2.41936280563102, "grad_norm": 0.4540156886101404, "learning_rate": 4.842406663964612e-06, "loss": 0.4895, "step": 6166 }, { "epoch": 2.4197579649296124, "grad_norm": 0.4514068135997412, "learning_rate": 4.842351913449481e-06, "loss": 0.5139, "step": 6167 }, { "epoch": 2.4201531242282046, "grad_norm": 0.4612849080685796, "learning_rate": 4.842297153735014e-06, "loss": 0.4983, "step": 6168 }, { "epoch": 2.420548283526797, "grad_norm": 0.47339152666834156, "learning_rate": 4.842242384821426e-06, "loss": 0.4881, "step": 6169 }, { "epoch": 2.420943442825389, "grad_norm": 0.46244595327643095, "learning_rate": 4.842187606708932e-06, "loss": 0.5034, "step": 6170 }, { "epoch": 2.4213386021239813, "grad_norm": 0.4386337093959035, "learning_rate": 4.8421328193977475e-06, "loss": 0.4869, "step": 6171 }, { "epoch": 2.4217337614225736, "grad_norm": 0.4718123255716426, "learning_rate": 4.842078022888088e-06, "loss": 0.4913, "step": 6172 }, { "epoch": 2.422128920721166, "grad_norm": 0.4612919926951648, "learning_rate": 4.8420232171801675e-06, "loss": 0.5129, "step": 6173 }, { "epoch": 2.422524080019758, "grad_norm": 0.4486144324992224, "learning_rate": 4.841968402274202e-06, "loss": 0.5147, "step": 6174 }, { "epoch": 2.4229192393183503, "grad_norm": 0.46891228322282674, "learning_rate": 4.841913578170407e-06, "loss": 0.5172, "step": 6175 }, { "epoch": 2.4233143986169425, "grad_norm": 0.44889237277545324, "learning_rate": 4.841858744868998e-06, "loss": 0.511, "step": 6176 }, { "epoch": 2.4237095579155348, "grad_norm": 0.45184987151406364, "learning_rate": 4.841803902370189e-06, "loss": 0.4915, "step": 6177 }, { "epoch": 2.424104717214127, "grad_norm": 0.45539717660361717, "learning_rate": 4.841749050674196e-06, "loss": 0.508, "step": 6178 }, { "epoch": 2.4244998765127193, "grad_norm": 0.46183897777708616, "learning_rate": 4.841694189781235e-06, "loss": 0.5147, "step": 6179 }, { "epoch": 2.4248950358113115, "grad_norm": 0.4596909143183604, "learning_rate": 4.841639319691522e-06, "loss": 0.506, "step": 6180 }, { "epoch": 2.4252901951099037, "grad_norm": 0.4446113633044188, "learning_rate": 4.841584440405271e-06, "loss": 0.5064, "step": 6181 }, { "epoch": 2.425685354408496, "grad_norm": 0.45743332618448895, "learning_rate": 4.841529551922699e-06, "loss": 0.504, "step": 6182 }, { "epoch": 2.426080513707088, "grad_norm": 0.4816180094305801, "learning_rate": 4.84147465424402e-06, "loss": 0.4925, "step": 6183 }, { "epoch": 2.4264756730056805, "grad_norm": 0.4543170154356122, "learning_rate": 4.84141974736945e-06, "loss": 0.5244, "step": 6184 }, { "epoch": 2.4268708323042727, "grad_norm": 0.448689306635801, "learning_rate": 4.841364831299206e-06, "loss": 0.511, "step": 6185 }, { "epoch": 2.427265991602865, "grad_norm": 0.45167259627359596, "learning_rate": 4.8413099060335026e-06, "loss": 0.5084, "step": 6186 }, { "epoch": 2.427661150901457, "grad_norm": 0.4610385677246653, "learning_rate": 4.841254971572555e-06, "loss": 0.5097, "step": 6187 }, { "epoch": 2.4280563102000494, "grad_norm": 0.4709029211327468, "learning_rate": 4.84120002791658e-06, "loss": 0.5125, "step": 6188 }, { "epoch": 2.4284514694986417, "grad_norm": 0.4585385642897026, "learning_rate": 4.841145075065793e-06, "loss": 0.5255, "step": 6189 }, { "epoch": 2.428846628797234, "grad_norm": 0.45124097243480904, "learning_rate": 4.841090113020409e-06, "loss": 0.4851, "step": 6190 }, { "epoch": 2.429241788095826, "grad_norm": 0.4589307579275915, "learning_rate": 4.841035141780645e-06, "loss": 0.5237, "step": 6191 }, { "epoch": 2.4296369473944184, "grad_norm": 0.4590381604206053, "learning_rate": 4.840980161346717e-06, "loss": 0.502, "step": 6192 }, { "epoch": 2.4300321066930106, "grad_norm": 0.4575028892209658, "learning_rate": 4.84092517171884e-06, "loss": 0.5317, "step": 6193 }, { "epoch": 2.430427265991603, "grad_norm": 0.4410480910569929, "learning_rate": 4.840870172897231e-06, "loss": 0.5079, "step": 6194 }, { "epoch": 2.430822425290195, "grad_norm": 0.45485313052571497, "learning_rate": 4.840815164882104e-06, "loss": 0.5069, "step": 6195 }, { "epoch": 2.4312175845887873, "grad_norm": 0.45726694565940096, "learning_rate": 4.840760147673678e-06, "loss": 0.4904, "step": 6196 }, { "epoch": 2.4316127438873796, "grad_norm": 0.4591479178991767, "learning_rate": 4.8407051212721664e-06, "loss": 0.5311, "step": 6197 }, { "epoch": 2.432007903185972, "grad_norm": 0.45199105003630935, "learning_rate": 4.8406500856777875e-06, "loss": 0.5167, "step": 6198 }, { "epoch": 2.432403062484564, "grad_norm": 0.4629676193358175, "learning_rate": 4.840595040890756e-06, "loss": 0.5048, "step": 6199 }, { "epoch": 2.4327982217831563, "grad_norm": 0.4476163591715802, "learning_rate": 4.840539986911288e-06, "loss": 0.4954, "step": 6200 }, { "epoch": 2.4331933810817485, "grad_norm": 0.44798378662618715, "learning_rate": 4.8404849237396005e-06, "loss": 0.4933, "step": 6201 }, { "epoch": 2.433588540380341, "grad_norm": 0.4571828487256416, "learning_rate": 4.840429851375909e-06, "loss": 0.5018, "step": 6202 }, { "epoch": 2.433983699678933, "grad_norm": 0.4487082083135661, "learning_rate": 4.840374769820432e-06, "loss": 0.4855, "step": 6203 }, { "epoch": 2.4343788589775253, "grad_norm": 0.5361808616989676, "learning_rate": 4.840319679073382e-06, "loss": 0.5127, "step": 6204 }, { "epoch": 2.4347740182761175, "grad_norm": 0.4556095333028292, "learning_rate": 4.840264579134978e-06, "loss": 0.5055, "step": 6205 }, { "epoch": 2.4351691775747097, "grad_norm": 0.4582035559570844, "learning_rate": 4.840209470005436e-06, "loss": 0.5032, "step": 6206 }, { "epoch": 2.435564336873302, "grad_norm": 0.4848275765020931, "learning_rate": 4.840154351684973e-06, "loss": 0.515, "step": 6207 }, { "epoch": 2.4359594961718942, "grad_norm": 0.46189716770494005, "learning_rate": 4.840099224173803e-06, "loss": 0.5158, "step": 6208 }, { "epoch": 2.4363546554704865, "grad_norm": 0.4586463067687978, "learning_rate": 4.840044087472145e-06, "loss": 0.5169, "step": 6209 }, { "epoch": 2.4367498147690787, "grad_norm": 0.45983904272371406, "learning_rate": 4.839988941580216e-06, "loss": 0.4927, "step": 6210 }, { "epoch": 2.437144974067671, "grad_norm": 0.479677905098288, "learning_rate": 4.83993378649823e-06, "loss": 0.5466, "step": 6211 }, { "epoch": 2.437540133366263, "grad_norm": 0.4605259736107779, "learning_rate": 4.839878622226405e-06, "loss": 0.5026, "step": 6212 }, { "epoch": 2.4379352926648554, "grad_norm": 0.45622589883484194, "learning_rate": 4.839823448764957e-06, "loss": 0.5026, "step": 6213 }, { "epoch": 2.4383304519634477, "grad_norm": 0.4780280716829919, "learning_rate": 4.839768266114105e-06, "loss": 0.515, "step": 6214 }, { "epoch": 2.43872561126204, "grad_norm": 0.46441875870580496, "learning_rate": 4.839713074274064e-06, "loss": 0.508, "step": 6215 }, { "epoch": 2.439120770560632, "grad_norm": 0.45047431718755854, "learning_rate": 4.83965787324505e-06, "loss": 0.5011, "step": 6216 }, { "epoch": 2.4395159298592244, "grad_norm": 0.45440270281711137, "learning_rate": 4.83960266302728e-06, "loss": 0.5024, "step": 6217 }, { "epoch": 2.4399110891578166, "grad_norm": 0.4638601787236139, "learning_rate": 4.839547443620972e-06, "loss": 0.5113, "step": 6218 }, { "epoch": 2.440306248456409, "grad_norm": 0.4488674824729464, "learning_rate": 4.839492215026342e-06, "loss": 0.4858, "step": 6219 }, { "epoch": 2.440701407755001, "grad_norm": 0.4536981413225659, "learning_rate": 4.839436977243608e-06, "loss": 0.4959, "step": 6220 }, { "epoch": 2.4410965670535933, "grad_norm": 0.4614565162626784, "learning_rate": 4.839381730272985e-06, "loss": 0.5012, "step": 6221 }, { "epoch": 2.4414917263521856, "grad_norm": 0.48492633372884375, "learning_rate": 4.839326474114692e-06, "loss": 0.5146, "step": 6222 }, { "epoch": 2.441886885650778, "grad_norm": 0.4704218385973438, "learning_rate": 4.839271208768945e-06, "loss": 0.5129, "step": 6223 }, { "epoch": 2.44228204494937, "grad_norm": 0.45965041302697135, "learning_rate": 4.839215934235961e-06, "loss": 0.5075, "step": 6224 }, { "epoch": 2.4426772042479623, "grad_norm": 0.4561316353748442, "learning_rate": 4.839160650515957e-06, "loss": 0.4964, "step": 6225 }, { "epoch": 2.4430723635465545, "grad_norm": 0.4590933327579659, "learning_rate": 4.839105357609151e-06, "loss": 0.5038, "step": 6226 }, { "epoch": 2.443467522845147, "grad_norm": 0.47776100605193667, "learning_rate": 4.839050055515759e-06, "loss": 0.49, "step": 6227 }, { "epoch": 2.443862682143739, "grad_norm": 0.43282187857296134, "learning_rate": 4.838994744236e-06, "loss": 0.4863, "step": 6228 }, { "epoch": 2.4442578414423313, "grad_norm": 0.45445540357185477, "learning_rate": 4.838939423770088e-06, "loss": 0.5021, "step": 6229 }, { "epoch": 2.4446530007409235, "grad_norm": 0.4562136501460368, "learning_rate": 4.838884094118244e-06, "loss": 0.5307, "step": 6230 }, { "epoch": 2.4450481600395157, "grad_norm": 0.5480503669106829, "learning_rate": 4.8388287552806825e-06, "loss": 0.5152, "step": 6231 }, { "epoch": 2.445443319338108, "grad_norm": 0.4651543841687929, "learning_rate": 4.838773407257622e-06, "loss": 0.5007, "step": 6232 }, { "epoch": 2.4458384786367002, "grad_norm": 0.4685356376510576, "learning_rate": 4.8387180500492795e-06, "loss": 0.5048, "step": 6233 }, { "epoch": 2.4462336379352925, "grad_norm": 0.4586342865694105, "learning_rate": 4.838662683655872e-06, "loss": 0.5004, "step": 6234 }, { "epoch": 2.4466287972338847, "grad_norm": 0.44817924080411603, "learning_rate": 4.83860730807762e-06, "loss": 0.5024, "step": 6235 }, { "epoch": 2.447023956532477, "grad_norm": 0.45665521330466075, "learning_rate": 4.838551923314736e-06, "loss": 0.5106, "step": 6236 }, { "epoch": 2.447419115831069, "grad_norm": 0.46636023225981704, "learning_rate": 4.838496529367441e-06, "loss": 0.5154, "step": 6237 }, { "epoch": 2.4478142751296614, "grad_norm": 0.47231613865652516, "learning_rate": 4.8384411262359525e-06, "loss": 0.5131, "step": 6238 }, { "epoch": 2.4482094344282537, "grad_norm": 0.4613639527463871, "learning_rate": 4.838385713920486e-06, "loss": 0.5084, "step": 6239 }, { "epoch": 2.448604593726846, "grad_norm": 0.4604238114892905, "learning_rate": 4.838330292421262e-06, "loss": 0.5065, "step": 6240 }, { "epoch": 2.448999753025438, "grad_norm": 0.4633869738387279, "learning_rate": 4.838274861738494e-06, "loss": 0.5095, "step": 6241 }, { "epoch": 2.4493949123240304, "grad_norm": 0.47977255402722213, "learning_rate": 4.838219421872405e-06, "loss": 0.5242, "step": 6242 }, { "epoch": 2.4497900716226226, "grad_norm": 0.4649910129362038, "learning_rate": 4.8381639728232075e-06, "loss": 0.5007, "step": 6243 }, { "epoch": 2.450185230921215, "grad_norm": 0.44567230911849703, "learning_rate": 4.838108514591124e-06, "loss": 0.4886, "step": 6244 }, { "epoch": 2.450580390219807, "grad_norm": 0.4802947747613605, "learning_rate": 4.838053047176368e-06, "loss": 0.5182, "step": 6245 }, { "epoch": 2.4509755495184, "grad_norm": 0.4529418893441322, "learning_rate": 4.83799757057916e-06, "loss": 0.4978, "step": 6246 }, { "epoch": 2.451370708816992, "grad_norm": 0.4707140210320241, "learning_rate": 4.837942084799717e-06, "loss": 0.5148, "step": 6247 }, { "epoch": 2.4517658681155843, "grad_norm": 0.4514623045887543, "learning_rate": 4.837886589838259e-06, "loss": 0.5095, "step": 6248 }, { "epoch": 2.4521610274141765, "grad_norm": 0.4760633546771482, "learning_rate": 4.837831085695e-06, "loss": 0.5098, "step": 6249 }, { "epoch": 2.4525561867127688, "grad_norm": 0.4602919310882103, "learning_rate": 4.8377755723701614e-06, "loss": 0.5025, "step": 6250 }, { "epoch": 2.452951346011361, "grad_norm": 0.5205070905111004, "learning_rate": 4.837720049863959e-06, "loss": 0.5558, "step": 6251 }, { "epoch": 2.4533465053099532, "grad_norm": 0.44480305897623773, "learning_rate": 4.837664518176613e-06, "loss": 0.4903, "step": 6252 }, { "epoch": 2.4537416646085455, "grad_norm": 0.45504363171585244, "learning_rate": 4.837608977308339e-06, "loss": 0.5183, "step": 6253 }, { "epoch": 2.4541368239071377, "grad_norm": 0.44828627366707774, "learning_rate": 4.837553427259356e-06, "loss": 0.5075, "step": 6254 }, { "epoch": 2.45453198320573, "grad_norm": 0.4429586869046653, "learning_rate": 4.837497868029884e-06, "loss": 0.495, "step": 6255 }, { "epoch": 2.454927142504322, "grad_norm": 0.4580942265610526, "learning_rate": 4.837442299620139e-06, "loss": 0.5026, "step": 6256 }, { "epoch": 2.4553223018029144, "grad_norm": 0.46738087782305204, "learning_rate": 4.83738672203034e-06, "loss": 0.5316, "step": 6257 }, { "epoch": 2.4557174611015067, "grad_norm": 0.47534561406612363, "learning_rate": 4.837331135260705e-06, "loss": 0.529, "step": 6258 }, { "epoch": 2.456112620400099, "grad_norm": 0.45632596134859843, "learning_rate": 4.837275539311454e-06, "loss": 0.5025, "step": 6259 }, { "epoch": 2.456507779698691, "grad_norm": 0.43916671865176493, "learning_rate": 4.837219934182803e-06, "loss": 0.4894, "step": 6260 }, { "epoch": 2.4569029389972834, "grad_norm": 0.4685573051031802, "learning_rate": 4.837164319874972e-06, "loss": 0.5134, "step": 6261 }, { "epoch": 2.4572980982958756, "grad_norm": 0.45202371145633197, "learning_rate": 4.8371086963881774e-06, "loss": 0.4923, "step": 6262 }, { "epoch": 2.457693257594468, "grad_norm": 0.46730026620993986, "learning_rate": 4.83705306372264e-06, "loss": 0.5016, "step": 6263 }, { "epoch": 2.45808841689306, "grad_norm": 0.4626170175822174, "learning_rate": 4.836997421878577e-06, "loss": 0.5234, "step": 6264 }, { "epoch": 2.4584835761916524, "grad_norm": 0.44361760739699646, "learning_rate": 4.836941770856207e-06, "loss": 0.499, "step": 6265 }, { "epoch": 2.4588787354902446, "grad_norm": 0.45702240750866957, "learning_rate": 4.83688611065575e-06, "loss": 0.5058, "step": 6266 }, { "epoch": 2.459273894788837, "grad_norm": 0.47299501346012857, "learning_rate": 4.836830441277422e-06, "loss": 0.5066, "step": 6267 }, { "epoch": 2.459669054087429, "grad_norm": 0.4727679722857182, "learning_rate": 4.836774762721443e-06, "loss": 0.5219, "step": 6268 }, { "epoch": 2.4600642133860213, "grad_norm": 0.4561162349721409, "learning_rate": 4.836719074988033e-06, "loss": 0.5123, "step": 6269 }, { "epoch": 2.4604593726846136, "grad_norm": 0.46185475580783847, "learning_rate": 4.836663378077408e-06, "loss": 0.5057, "step": 6270 }, { "epoch": 2.460854531983206, "grad_norm": 0.4589633018472339, "learning_rate": 4.836607671989789e-06, "loss": 0.491, "step": 6271 }, { "epoch": 2.461249691281798, "grad_norm": 0.4674016116064247, "learning_rate": 4.836551956725394e-06, "loss": 0.505, "step": 6272 }, { "epoch": 2.4616448505803903, "grad_norm": 0.44019704083575656, "learning_rate": 4.836496232284441e-06, "loss": 0.4899, "step": 6273 }, { "epoch": 2.4620400098789825, "grad_norm": 0.4560950335962957, "learning_rate": 4.8364404986671495e-06, "loss": 0.5086, "step": 6274 }, { "epoch": 2.4624351691775748, "grad_norm": 0.45987663956371605, "learning_rate": 4.83638475587374e-06, "loss": 0.5023, "step": 6275 }, { "epoch": 2.462830328476167, "grad_norm": 0.45218362910357657, "learning_rate": 4.836329003904429e-06, "loss": 0.5049, "step": 6276 }, { "epoch": 2.4632254877747592, "grad_norm": 0.46082337545098706, "learning_rate": 4.836273242759436e-06, "loss": 0.4906, "step": 6277 }, { "epoch": 2.4636206470733515, "grad_norm": 0.4484085796656676, "learning_rate": 4.83621747243898e-06, "loss": 0.5003, "step": 6278 }, { "epoch": 2.4640158063719437, "grad_norm": 0.4561063177663292, "learning_rate": 4.836161692943282e-06, "loss": 0.4872, "step": 6279 }, { "epoch": 2.464410965670536, "grad_norm": 0.4635792252049459, "learning_rate": 4.836105904272558e-06, "loss": 0.4891, "step": 6280 }, { "epoch": 2.464806124969128, "grad_norm": 0.44874718083152354, "learning_rate": 4.836050106427029e-06, "loss": 0.504, "step": 6281 }, { "epoch": 2.4652012842677204, "grad_norm": 0.4542288616082571, "learning_rate": 4.835994299406914e-06, "loss": 0.5117, "step": 6282 }, { "epoch": 2.4655964435663127, "grad_norm": 0.46392560474828354, "learning_rate": 4.835938483212431e-06, "loss": 0.5032, "step": 6283 }, { "epoch": 2.465991602864905, "grad_norm": 0.461032498595409, "learning_rate": 4.835882657843801e-06, "loss": 0.5041, "step": 6284 }, { "epoch": 2.466386762163497, "grad_norm": 0.43775177363788437, "learning_rate": 4.835826823301242e-06, "loss": 0.5057, "step": 6285 }, { "epoch": 2.4667819214620894, "grad_norm": 0.45447705767758345, "learning_rate": 4.835770979584974e-06, "loss": 0.5059, "step": 6286 }, { "epoch": 2.4671770807606817, "grad_norm": 0.4820043582122839, "learning_rate": 4.835715126695216e-06, "loss": 0.5025, "step": 6287 }, { "epoch": 2.467572240059274, "grad_norm": 0.45364882204768436, "learning_rate": 4.835659264632186e-06, "loss": 0.5121, "step": 6288 }, { "epoch": 2.467967399357866, "grad_norm": 0.44392647087381, "learning_rate": 4.835603393396106e-06, "loss": 0.4973, "step": 6289 }, { "epoch": 2.4683625586564584, "grad_norm": 0.4702335158316592, "learning_rate": 4.835547512987194e-06, "loss": 0.5056, "step": 6290 }, { "epoch": 2.4687577179550506, "grad_norm": 0.46379114054816933, "learning_rate": 4.835491623405669e-06, "loss": 0.5062, "step": 6291 }, { "epoch": 2.469152877253643, "grad_norm": 0.46254378434485627, "learning_rate": 4.835435724651753e-06, "loss": 0.5034, "step": 6292 }, { "epoch": 2.469548036552235, "grad_norm": 0.4435217656925511, "learning_rate": 4.8353798167256615e-06, "loss": 0.5178, "step": 6293 }, { "epoch": 2.4699431958508273, "grad_norm": 0.44400526861468553, "learning_rate": 4.835323899627616e-06, "loss": 0.4828, "step": 6294 }, { "epoch": 2.4703383551494196, "grad_norm": 0.4661317214500771, "learning_rate": 4.835267973357837e-06, "loss": 0.5096, "step": 6295 }, { "epoch": 2.470733514448012, "grad_norm": 0.4599470856465283, "learning_rate": 4.835212037916545e-06, "loss": 0.5178, "step": 6296 }, { "epoch": 2.471128673746604, "grad_norm": 0.44508708208240416, "learning_rate": 4.835156093303956e-06, "loss": 0.5011, "step": 6297 }, { "epoch": 2.4715238330451963, "grad_norm": 0.45991594887386883, "learning_rate": 4.835100139520292e-06, "loss": 0.5066, "step": 6298 }, { "epoch": 2.4719189923437885, "grad_norm": 0.4473801495463778, "learning_rate": 4.8350441765657736e-06, "loss": 0.5124, "step": 6299 }, { "epoch": 2.4723141516423808, "grad_norm": 0.45643508613471995, "learning_rate": 4.834988204440619e-06, "loss": 0.4926, "step": 6300 }, { "epoch": 2.472709310940973, "grad_norm": 0.4504940699709523, "learning_rate": 4.834932223145049e-06, "loss": 0.5032, "step": 6301 }, { "epoch": 2.4731044702395653, "grad_norm": 0.4426038077719436, "learning_rate": 4.834876232679283e-06, "loss": 0.5104, "step": 6302 }, { "epoch": 2.4734996295381575, "grad_norm": 0.46223284198355835, "learning_rate": 4.83482023304354e-06, "loss": 0.5081, "step": 6303 }, { "epoch": 2.4738947888367497, "grad_norm": 0.4770885110104904, "learning_rate": 4.834764224238042e-06, "loss": 0.5098, "step": 6304 }, { "epoch": 2.474289948135342, "grad_norm": 0.4732407782266299, "learning_rate": 4.834708206263008e-06, "loss": 0.4808, "step": 6305 }, { "epoch": 2.474685107433934, "grad_norm": 0.4613411578962985, "learning_rate": 4.834652179118657e-06, "loss": 0.5215, "step": 6306 }, { "epoch": 2.4750802667325265, "grad_norm": 0.4626350011702521, "learning_rate": 4.83459614280521e-06, "loss": 0.5143, "step": 6307 }, { "epoch": 2.4754754260311187, "grad_norm": 0.4492080049375109, "learning_rate": 4.834540097322888e-06, "loss": 0.5151, "step": 6308 }, { "epoch": 2.475870585329711, "grad_norm": 0.46276098431405666, "learning_rate": 4.834484042671909e-06, "loss": 0.5107, "step": 6309 }, { "epoch": 2.476265744628303, "grad_norm": 0.4600080634693739, "learning_rate": 4.834427978852495e-06, "loss": 0.5183, "step": 6310 }, { "epoch": 2.4766609039268954, "grad_norm": 0.4560521377408553, "learning_rate": 4.834371905864865e-06, "loss": 0.515, "step": 6311 }, { "epoch": 2.4770560632254877, "grad_norm": 0.4680722643265072, "learning_rate": 4.83431582370924e-06, "loss": 0.5268, "step": 6312 }, { "epoch": 2.47745122252408, "grad_norm": 0.45788403308191006, "learning_rate": 4.83425973238584e-06, "loss": 0.5185, "step": 6313 }, { "epoch": 2.477846381822672, "grad_norm": 0.4489353107357901, "learning_rate": 4.834203631894885e-06, "loss": 0.5245, "step": 6314 }, { "epoch": 2.4782415411212644, "grad_norm": 0.4769728044328352, "learning_rate": 4.834147522236595e-06, "loss": 0.5034, "step": 6315 }, { "epoch": 2.4786367004198566, "grad_norm": 0.4531931819815472, "learning_rate": 4.8340914034111916e-06, "loss": 0.4785, "step": 6316 }, { "epoch": 2.479031859718449, "grad_norm": 0.436765563760745, "learning_rate": 4.834035275418895e-06, "loss": 0.5044, "step": 6317 }, { "epoch": 2.479427019017041, "grad_norm": 0.45553402918025715, "learning_rate": 4.833979138259923e-06, "loss": 0.5099, "step": 6318 }, { "epoch": 2.4798221783156333, "grad_norm": 0.45738260128460106, "learning_rate": 4.8339229919345e-06, "loss": 0.5028, "step": 6319 }, { "epoch": 2.4802173376142256, "grad_norm": 0.45623700990486077, "learning_rate": 4.833866836442844e-06, "loss": 0.5015, "step": 6320 }, { "epoch": 2.480612496912818, "grad_norm": 0.44512331672110883, "learning_rate": 4.833810671785177e-06, "loss": 0.5099, "step": 6321 }, { "epoch": 2.48100765621141, "grad_norm": 0.4552888276049071, "learning_rate": 4.833754497961719e-06, "loss": 0.5012, "step": 6322 }, { "epoch": 2.4814028155100023, "grad_norm": 0.46342152696551503, "learning_rate": 4.83369831497269e-06, "loss": 0.5272, "step": 6323 }, { "epoch": 2.4817979748085945, "grad_norm": 0.44267854664405953, "learning_rate": 4.833642122818311e-06, "loss": 0.4853, "step": 6324 }, { "epoch": 2.482193134107187, "grad_norm": 0.43631582113931716, "learning_rate": 4.833585921498802e-06, "loss": 0.4987, "step": 6325 }, { "epoch": 2.482588293405779, "grad_norm": 0.44520693389382443, "learning_rate": 4.8335297110143854e-06, "loss": 0.5021, "step": 6326 }, { "epoch": 2.4829834527043713, "grad_norm": 0.4611485355941225, "learning_rate": 4.833473491365281e-06, "loss": 0.5156, "step": 6327 }, { "epoch": 2.4833786120029635, "grad_norm": 0.45289421087393966, "learning_rate": 4.833417262551711e-06, "loss": 0.5095, "step": 6328 }, { "epoch": 2.483773771301556, "grad_norm": 0.4566101862712613, "learning_rate": 4.833361024573893e-06, "loss": 0.5237, "step": 6329 }, { "epoch": 2.4841689306001484, "grad_norm": 0.4564858320090929, "learning_rate": 4.833304777432051e-06, "loss": 0.492, "step": 6330 }, { "epoch": 2.4845640898987407, "grad_norm": 0.4555944378026437, "learning_rate": 4.8332485211264035e-06, "loss": 0.5221, "step": 6331 }, { "epoch": 2.484959249197333, "grad_norm": 0.4640570525721282, "learning_rate": 4.833192255657173e-06, "loss": 0.5265, "step": 6332 }, { "epoch": 2.485354408495925, "grad_norm": 0.4450100165471149, "learning_rate": 4.833135981024581e-06, "loss": 0.4968, "step": 6333 }, { "epoch": 2.4857495677945174, "grad_norm": 0.44268155087230027, "learning_rate": 4.833079697228847e-06, "loss": 0.4893, "step": 6334 }, { "epoch": 2.4861447270931096, "grad_norm": 0.446170036400285, "learning_rate": 4.833023404270193e-06, "loss": 0.4945, "step": 6335 }, { "epoch": 2.486539886391702, "grad_norm": 0.46363170302080026, "learning_rate": 4.8329671021488385e-06, "loss": 0.5106, "step": 6336 }, { "epoch": 2.486935045690294, "grad_norm": 0.46125075416833533, "learning_rate": 4.832910790865007e-06, "loss": 0.5099, "step": 6337 }, { "epoch": 2.4873302049888864, "grad_norm": 0.4666268360674439, "learning_rate": 4.832854470418918e-06, "loss": 0.4968, "step": 6338 }, { "epoch": 2.4877253642874786, "grad_norm": 0.45259995470398834, "learning_rate": 4.8327981408107945e-06, "loss": 0.5231, "step": 6339 }, { "epoch": 2.488120523586071, "grad_norm": 0.45257495176751394, "learning_rate": 4.832741802040856e-06, "loss": 0.51, "step": 6340 }, { "epoch": 2.488515682884663, "grad_norm": 0.4702986428108045, "learning_rate": 4.8326854541093235e-06, "loss": 0.511, "step": 6341 }, { "epoch": 2.4889108421832553, "grad_norm": 0.45287635582005137, "learning_rate": 4.832629097016419e-06, "loss": 0.5194, "step": 6342 }, { "epoch": 2.4893060014818476, "grad_norm": 0.44097483735488313, "learning_rate": 4.832572730762364e-06, "loss": 0.4886, "step": 6343 }, { "epoch": 2.48970116078044, "grad_norm": 0.4963761696743815, "learning_rate": 4.83251635534738e-06, "loss": 0.5144, "step": 6344 }, { "epoch": 2.490096320079032, "grad_norm": 0.4721389393948357, "learning_rate": 4.832459970771688e-06, "loss": 0.5196, "step": 6345 }, { "epoch": 2.4904914793776243, "grad_norm": 0.4607493050777102, "learning_rate": 4.83240357703551e-06, "loss": 0.5065, "step": 6346 }, { "epoch": 2.4908866386762165, "grad_norm": 0.4603650627094426, "learning_rate": 4.8323471741390656e-06, "loss": 0.4982, "step": 6347 }, { "epoch": 2.4912817979748088, "grad_norm": 0.4620527083371952, "learning_rate": 4.832290762082579e-06, "loss": 0.5114, "step": 6348 }, { "epoch": 2.491676957273401, "grad_norm": 0.46945952654863243, "learning_rate": 4.8322343408662705e-06, "loss": 0.5093, "step": 6349 }, { "epoch": 2.4920721165719932, "grad_norm": 0.45597389461472576, "learning_rate": 4.8321779104903616e-06, "loss": 0.5103, "step": 6350 }, { "epoch": 2.4924672758705855, "grad_norm": 0.4517530087892166, "learning_rate": 4.832121470955074e-06, "loss": 0.504, "step": 6351 }, { "epoch": 2.4928624351691777, "grad_norm": 0.44430291926385046, "learning_rate": 4.832065022260629e-06, "loss": 0.4967, "step": 6352 }, { "epoch": 2.49325759446777, "grad_norm": 0.46115093494647114, "learning_rate": 4.83200856440725e-06, "loss": 0.5082, "step": 6353 }, { "epoch": 2.493652753766362, "grad_norm": 0.45214398265733147, "learning_rate": 4.831952097395156e-06, "loss": 0.503, "step": 6354 }, { "epoch": 2.4940479130649544, "grad_norm": 0.4607263777785255, "learning_rate": 4.831895621224571e-06, "loss": 0.5115, "step": 6355 }, { "epoch": 2.4944430723635467, "grad_norm": 0.45430159930327796, "learning_rate": 4.8318391358957156e-06, "loss": 0.4955, "step": 6356 }, { "epoch": 2.494838231662139, "grad_norm": 0.45283670210193705, "learning_rate": 4.831782641408812e-06, "loss": 0.5071, "step": 6357 }, { "epoch": 2.495233390960731, "grad_norm": 0.4598695429896044, "learning_rate": 4.831726137764082e-06, "loss": 0.518, "step": 6358 }, { "epoch": 2.4956285502593234, "grad_norm": 0.4399372220031, "learning_rate": 4.831669624961748e-06, "loss": 0.5051, "step": 6359 }, { "epoch": 2.4960237095579156, "grad_norm": 0.467578857452426, "learning_rate": 4.831613103002032e-06, "loss": 0.4944, "step": 6360 }, { "epoch": 2.496418868856508, "grad_norm": 0.46610183550354434, "learning_rate": 4.831556571885155e-06, "loss": 0.5046, "step": 6361 }, { "epoch": 2.4968140281551, "grad_norm": 0.4589963237813121, "learning_rate": 4.831500031611339e-06, "loss": 0.5007, "step": 6362 }, { "epoch": 2.4972091874536924, "grad_norm": 0.45341734760729985, "learning_rate": 4.831443482180808e-06, "loss": 0.5097, "step": 6363 }, { "epoch": 2.4976043467522846, "grad_norm": 0.4737045186230201, "learning_rate": 4.831386923593781e-06, "loss": 0.5105, "step": 6364 }, { "epoch": 2.497999506050877, "grad_norm": 0.4641942011706654, "learning_rate": 4.831330355850484e-06, "loss": 0.5005, "step": 6365 }, { "epoch": 2.498394665349469, "grad_norm": 0.4856421198337944, "learning_rate": 4.831273778951135e-06, "loss": 0.5245, "step": 6366 }, { "epoch": 2.4987898246480613, "grad_norm": 0.44305020823239166, "learning_rate": 4.831217192895959e-06, "loss": 0.4882, "step": 6367 }, { "epoch": 2.4991849839466536, "grad_norm": 0.4582781517686158, "learning_rate": 4.831160597685178e-06, "loss": 0.4903, "step": 6368 }, { "epoch": 2.499580143245246, "grad_norm": 0.4525149145967303, "learning_rate": 4.8311039933190136e-06, "loss": 0.4834, "step": 6369 }, { "epoch": 2.499975302543838, "grad_norm": 0.4575043422893562, "learning_rate": 4.831047379797687e-06, "loss": 0.4897, "step": 6370 }, { "epoch": 2.5003704618424303, "grad_norm": 0.46250674104664735, "learning_rate": 4.830990757121424e-06, "loss": 0.506, "step": 6371 }, { "epoch": 2.5007656211410225, "grad_norm": 0.4619201958339098, "learning_rate": 4.830934125290443e-06, "loss": 0.4947, "step": 6372 }, { "epoch": 2.5011607804396148, "grad_norm": 0.4603940568427686, "learning_rate": 4.830877484304969e-06, "loss": 0.5052, "step": 6373 }, { "epoch": 2.501555939738207, "grad_norm": 0.4671784400513492, "learning_rate": 4.830820834165223e-06, "loss": 0.4942, "step": 6374 }, { "epoch": 2.5019510990367992, "grad_norm": 0.46267953291708924, "learning_rate": 4.830764174871429e-06, "loss": 0.5049, "step": 6375 }, { "epoch": 2.5023462583353915, "grad_norm": 0.4670573153772785, "learning_rate": 4.830707506423807e-06, "loss": 0.5071, "step": 6376 }, { "epoch": 2.5027414176339837, "grad_norm": 0.448679724311928, "learning_rate": 4.830650828822583e-06, "loss": 0.5036, "step": 6377 }, { "epoch": 2.503136576932576, "grad_norm": 0.4638582201810059, "learning_rate": 4.830594142067977e-06, "loss": 0.4958, "step": 6378 }, { "epoch": 2.503531736231168, "grad_norm": 0.46498455456100324, "learning_rate": 4.8305374461602115e-06, "loss": 0.4994, "step": 6379 }, { "epoch": 2.5039268955297604, "grad_norm": 0.47076449596566666, "learning_rate": 4.830480741099511e-06, "loss": 0.5077, "step": 6380 }, { "epoch": 2.5043220548283527, "grad_norm": 0.4620871607194632, "learning_rate": 4.830424026886098e-06, "loss": 0.5003, "step": 6381 }, { "epoch": 2.504717214126945, "grad_norm": 0.46348419234538263, "learning_rate": 4.8303673035201935e-06, "loss": 0.5286, "step": 6382 }, { "epoch": 2.505112373425537, "grad_norm": 0.45149330232898727, "learning_rate": 4.830310571002022e-06, "loss": 0.5099, "step": 6383 }, { "epoch": 2.5055075327241294, "grad_norm": 0.4375735232605822, "learning_rate": 4.830253829331805e-06, "loss": 0.4983, "step": 6384 }, { "epoch": 2.5059026920227216, "grad_norm": 0.4469286754515496, "learning_rate": 4.830197078509766e-06, "loss": 0.4992, "step": 6385 }, { "epoch": 2.506297851321314, "grad_norm": 0.46197178445812176, "learning_rate": 4.830140318536128e-06, "loss": 0.497, "step": 6386 }, { "epoch": 2.506693010619906, "grad_norm": 0.4619654942592018, "learning_rate": 4.830083549411114e-06, "loss": 0.4983, "step": 6387 }, { "epoch": 2.5070881699184984, "grad_norm": 0.4522982728309289, "learning_rate": 4.830026771134947e-06, "loss": 0.4994, "step": 6388 }, { "epoch": 2.5074833292170906, "grad_norm": 0.4628104450392865, "learning_rate": 4.82996998370785e-06, "loss": 0.5056, "step": 6389 }, { "epoch": 2.507878488515683, "grad_norm": 0.45715011969492436, "learning_rate": 4.829913187130044e-06, "loss": 0.5084, "step": 6390 }, { "epoch": 2.508273647814275, "grad_norm": 0.4620237998725422, "learning_rate": 4.8298563814017555e-06, "loss": 0.5114, "step": 6391 }, { "epoch": 2.5086688071128673, "grad_norm": 0.4632610101741824, "learning_rate": 4.829799566523205e-06, "loss": 0.5141, "step": 6392 }, { "epoch": 2.5090639664114596, "grad_norm": 0.4641510104801741, "learning_rate": 4.829742742494616e-06, "loss": 0.5042, "step": 6393 }, { "epoch": 2.509459125710052, "grad_norm": 0.4628296152429653, "learning_rate": 4.829685909316214e-06, "loss": 0.4989, "step": 6394 }, { "epoch": 2.509854285008644, "grad_norm": 0.47161142330182587, "learning_rate": 4.829629066988219e-06, "loss": 0.4889, "step": 6395 }, { "epoch": 2.5102494443072363, "grad_norm": 0.4703058506566186, "learning_rate": 4.829572215510856e-06, "loss": 0.5248, "step": 6396 }, { "epoch": 2.5106446036058285, "grad_norm": 0.4487390067504666, "learning_rate": 4.829515354884348e-06, "loss": 0.4989, "step": 6397 }, { "epoch": 2.5110397629044208, "grad_norm": 0.46578439069427224, "learning_rate": 4.829458485108918e-06, "loss": 0.5157, "step": 6398 }, { "epoch": 2.511434922203013, "grad_norm": 0.4589142880562439, "learning_rate": 4.8294016061847895e-06, "loss": 0.5113, "step": 6399 }, { "epoch": 2.5118300815016052, "grad_norm": 0.43436790417000837, "learning_rate": 4.829344718112186e-06, "loss": 0.4928, "step": 6400 }, { "epoch": 2.5122252408001975, "grad_norm": 0.454548256257017, "learning_rate": 4.829287820891332e-06, "loss": 0.4983, "step": 6401 }, { "epoch": 2.5126204000987897, "grad_norm": 0.44998917234532554, "learning_rate": 4.829230914522449e-06, "loss": 0.502, "step": 6402 }, { "epoch": 2.513015559397382, "grad_norm": 0.4419759844210292, "learning_rate": 4.82917399900576e-06, "loss": 0.4961, "step": 6403 }, { "epoch": 2.513410718695974, "grad_norm": 0.4424432885292469, "learning_rate": 4.829117074341492e-06, "loss": 0.4962, "step": 6404 }, { "epoch": 2.5138058779945665, "grad_norm": 0.47103994040807695, "learning_rate": 4.829060140529866e-06, "loss": 0.5016, "step": 6405 }, { "epoch": 2.5142010372931587, "grad_norm": 0.4583560717436245, "learning_rate": 4.829003197571106e-06, "loss": 0.5003, "step": 6406 }, { "epoch": 2.514596196591751, "grad_norm": 0.4511981257492769, "learning_rate": 4.828946245465435e-06, "loss": 0.5092, "step": 6407 }, { "epoch": 2.514991355890343, "grad_norm": 0.44378556134382374, "learning_rate": 4.828889284213078e-06, "loss": 0.4868, "step": 6408 }, { "epoch": 2.5153865151889354, "grad_norm": 0.4638400372635114, "learning_rate": 4.828832313814258e-06, "loss": 0.5157, "step": 6409 }, { "epoch": 2.5157816744875277, "grad_norm": 0.4555708689326073, "learning_rate": 4.828775334269198e-06, "loss": 0.503, "step": 6410 }, { "epoch": 2.51617683378612, "grad_norm": 0.4740137430827515, "learning_rate": 4.828718345578124e-06, "loss": 0.5012, "step": 6411 }, { "epoch": 2.516571993084712, "grad_norm": 0.440175406189311, "learning_rate": 4.828661347741258e-06, "loss": 0.4972, "step": 6412 }, { "epoch": 2.5169671523833044, "grad_norm": 0.44634195808516225, "learning_rate": 4.828604340758824e-06, "loss": 0.521, "step": 6413 }, { "epoch": 2.5173623116818966, "grad_norm": 0.44706452475534614, "learning_rate": 4.828547324631045e-06, "loss": 0.5099, "step": 6414 }, { "epoch": 2.517757470980489, "grad_norm": 0.45566928817560864, "learning_rate": 4.828490299358148e-06, "loss": 0.4969, "step": 6415 }, { "epoch": 2.518152630279081, "grad_norm": 0.45962733757335644, "learning_rate": 4.828433264940354e-06, "loss": 0.4894, "step": 6416 }, { "epoch": 2.5185477895776733, "grad_norm": 0.4699440047924458, "learning_rate": 4.828376221377889e-06, "loss": 0.5016, "step": 6417 }, { "epoch": 2.5189429488762656, "grad_norm": 0.4398573036728772, "learning_rate": 4.828319168670974e-06, "loss": 0.4954, "step": 6418 }, { "epoch": 2.519338108174858, "grad_norm": 0.4553277215876072, "learning_rate": 4.828262106819837e-06, "loss": 0.5103, "step": 6419 }, { "epoch": 2.51973326747345, "grad_norm": 0.455563395903747, "learning_rate": 4.8282050358247e-06, "loss": 0.5047, "step": 6420 }, { "epoch": 2.5201284267720423, "grad_norm": 0.4614817667592641, "learning_rate": 4.828147955685787e-06, "loss": 0.5015, "step": 6421 }, { "epoch": 2.5205235860706345, "grad_norm": 0.4540776683434743, "learning_rate": 4.8280908664033225e-06, "loss": 0.5004, "step": 6422 }, { "epoch": 2.5209187453692268, "grad_norm": 0.4427896768855242, "learning_rate": 4.828033767977531e-06, "loss": 0.4898, "step": 6423 }, { "epoch": 2.521313904667819, "grad_norm": 0.4546512843332004, "learning_rate": 4.8279766604086365e-06, "loss": 0.5219, "step": 6424 }, { "epoch": 2.5217090639664113, "grad_norm": 0.46014478411090454, "learning_rate": 4.827919543696863e-06, "loss": 0.5003, "step": 6425 }, { "epoch": 2.5221042232650035, "grad_norm": 0.5757044021010417, "learning_rate": 4.827862417842435e-06, "loss": 0.489, "step": 6426 }, { "epoch": 2.5224993825635957, "grad_norm": 0.44379865154981046, "learning_rate": 4.827805282845577e-06, "loss": 0.5096, "step": 6427 }, { "epoch": 2.522894541862188, "grad_norm": 0.4678140092742878, "learning_rate": 4.827748138706514e-06, "loss": 0.5189, "step": 6428 }, { "epoch": 2.52328970116078, "grad_norm": 0.45998044186365117, "learning_rate": 4.827690985425469e-06, "loss": 0.5091, "step": 6429 }, { "epoch": 2.5236848604593725, "grad_norm": 0.49126830773446944, "learning_rate": 4.827633823002669e-06, "loss": 0.5154, "step": 6430 }, { "epoch": 2.5240800197579647, "grad_norm": 0.5033898050900029, "learning_rate": 4.827576651438335e-06, "loss": 0.504, "step": 6431 }, { "epoch": 2.524475179056557, "grad_norm": 0.4401770069375346, "learning_rate": 4.827519470732693e-06, "loss": 0.5029, "step": 6432 }, { "epoch": 2.524870338355149, "grad_norm": 0.45085555469804983, "learning_rate": 4.82746228088597e-06, "loss": 0.4777, "step": 6433 }, { "epoch": 2.5252654976537414, "grad_norm": 0.4711645476173809, "learning_rate": 4.827405081898387e-06, "loss": 0.5107, "step": 6434 }, { "epoch": 2.5256606569523337, "grad_norm": 0.46331381932207494, "learning_rate": 4.82734787377017e-06, "loss": 0.5287, "step": 6435 }, { "epoch": 2.526055816250926, "grad_norm": 0.4595073465716432, "learning_rate": 4.827290656501544e-06, "loss": 0.5141, "step": 6436 }, { "epoch": 2.526450975549518, "grad_norm": 0.45432945562079763, "learning_rate": 4.827233430092733e-06, "loss": 0.4814, "step": 6437 }, { "epoch": 2.5268461348481104, "grad_norm": 0.4531070026892637, "learning_rate": 4.827176194543963e-06, "loss": 0.5066, "step": 6438 }, { "epoch": 2.5272412941467026, "grad_norm": 0.5299603756232713, "learning_rate": 4.8271189498554575e-06, "loss": 0.5128, "step": 6439 }, { "epoch": 2.527636453445295, "grad_norm": 0.444567842697348, "learning_rate": 4.827061696027442e-06, "loss": 0.4827, "step": 6440 }, { "epoch": 2.528031612743887, "grad_norm": 0.4590491574983818, "learning_rate": 4.827004433060142e-06, "loss": 0.4784, "step": 6441 }, { "epoch": 2.52842677204248, "grad_norm": 0.46222176709133134, "learning_rate": 4.826947160953781e-06, "loss": 0.5197, "step": 6442 }, { "epoch": 2.528821931341072, "grad_norm": 0.4553604192939767, "learning_rate": 4.826889879708585e-06, "loss": 0.4899, "step": 6443 }, { "epoch": 2.5292170906396643, "grad_norm": 0.4485178175280675, "learning_rate": 4.826832589324778e-06, "loss": 0.5062, "step": 6444 }, { "epoch": 2.5296122499382565, "grad_norm": 0.45899668741167826, "learning_rate": 4.8267752898025855e-06, "loss": 0.5224, "step": 6445 }, { "epoch": 2.5300074092368487, "grad_norm": 0.45782021791332567, "learning_rate": 4.826717981142233e-06, "loss": 0.5074, "step": 6446 }, { "epoch": 2.530402568535441, "grad_norm": 0.471834837281007, "learning_rate": 4.8266606633439445e-06, "loss": 0.5152, "step": 6447 }, { "epoch": 2.5307977278340332, "grad_norm": 0.48656419425692243, "learning_rate": 4.826603336407945e-06, "loss": 0.5209, "step": 6448 }, { "epoch": 2.5311928871326255, "grad_norm": 0.45199569225901554, "learning_rate": 4.826546000334462e-06, "loss": 0.5035, "step": 6449 }, { "epoch": 2.5315880464312177, "grad_norm": 0.4505649136262649, "learning_rate": 4.826488655123719e-06, "loss": 0.5034, "step": 6450 }, { "epoch": 2.53198320572981, "grad_norm": 0.4750069460993798, "learning_rate": 4.826431300775941e-06, "loss": 0.5377, "step": 6451 }, { "epoch": 2.532378365028402, "grad_norm": 0.464527033598869, "learning_rate": 4.826373937291353e-06, "loss": 0.4931, "step": 6452 }, { "epoch": 2.5327735243269944, "grad_norm": 0.4530418607627022, "learning_rate": 4.826316564670181e-06, "loss": 0.5072, "step": 6453 }, { "epoch": 2.5331686836255867, "grad_norm": 0.45364038939024304, "learning_rate": 4.82625918291265e-06, "loss": 0.5041, "step": 6454 }, { "epoch": 2.533563842924179, "grad_norm": 0.47698190204477, "learning_rate": 4.8262017920189864e-06, "loss": 0.5116, "step": 6455 }, { "epoch": 2.533959002222771, "grad_norm": 0.4731322955615775, "learning_rate": 4.826144391989414e-06, "loss": 0.5182, "step": 6456 }, { "epoch": 2.5343541615213634, "grad_norm": 0.46876448274990934, "learning_rate": 4.8260869828241595e-06, "loss": 0.5053, "step": 6457 }, { "epoch": 2.5347493208199556, "grad_norm": 0.4565534077873607, "learning_rate": 4.826029564523447e-06, "loss": 0.4996, "step": 6458 }, { "epoch": 2.535144480118548, "grad_norm": 0.47123001937860665, "learning_rate": 4.825972137087504e-06, "loss": 0.5209, "step": 6459 }, { "epoch": 2.53553963941714, "grad_norm": 0.4620746913890331, "learning_rate": 4.825914700516553e-06, "loss": 0.5306, "step": 6460 }, { "epoch": 2.5359347987157324, "grad_norm": 0.4431055424606069, "learning_rate": 4.825857254810823e-06, "loss": 0.4777, "step": 6461 }, { "epoch": 2.5363299580143246, "grad_norm": 0.4670518314206856, "learning_rate": 4.8257997999705365e-06, "loss": 0.4896, "step": 6462 }, { "epoch": 2.536725117312917, "grad_norm": 0.46517370738592695, "learning_rate": 4.825742335995922e-06, "loss": 0.514, "step": 6463 }, { "epoch": 2.537120276611509, "grad_norm": 0.46818923159639003, "learning_rate": 4.825684862887204e-06, "loss": 0.5092, "step": 6464 }, { "epoch": 2.5375154359101013, "grad_norm": 0.47402007401065194, "learning_rate": 4.825627380644607e-06, "loss": 0.4979, "step": 6465 }, { "epoch": 2.5379105952086936, "grad_norm": 0.45423949379520695, "learning_rate": 4.825569889268359e-06, "loss": 0.4905, "step": 6466 }, { "epoch": 2.538305754507286, "grad_norm": 0.44881199031238245, "learning_rate": 4.825512388758684e-06, "loss": 0.4949, "step": 6467 }, { "epoch": 2.538700913805878, "grad_norm": 0.43610597989809763, "learning_rate": 4.825454879115808e-06, "loss": 0.4913, "step": 6468 }, { "epoch": 2.5390960731044703, "grad_norm": 0.46713538360387696, "learning_rate": 4.8253973603399585e-06, "loss": 0.5091, "step": 6469 }, { "epoch": 2.5394912324030625, "grad_norm": 0.457557105090376, "learning_rate": 4.825339832431359e-06, "loss": 0.5067, "step": 6470 }, { "epoch": 2.5398863917016548, "grad_norm": 0.44744590808245827, "learning_rate": 4.8252822953902374e-06, "loss": 0.5041, "step": 6471 }, { "epoch": 2.540281551000247, "grad_norm": 0.44535213213717323, "learning_rate": 4.825224749216819e-06, "loss": 0.5114, "step": 6472 }, { "epoch": 2.5406767102988392, "grad_norm": 0.4457813945339316, "learning_rate": 4.825167193911329e-06, "loss": 0.5075, "step": 6473 }, { "epoch": 2.5410718695974315, "grad_norm": 0.45529691510876213, "learning_rate": 4.825109629473995e-06, "loss": 0.491, "step": 6474 }, { "epoch": 2.5414670288960237, "grad_norm": 0.4716080108006373, "learning_rate": 4.825052055905043e-06, "loss": 0.5155, "step": 6475 }, { "epoch": 2.541862188194616, "grad_norm": 0.4566158583555428, "learning_rate": 4.8249944732046975e-06, "loss": 0.522, "step": 6476 }, { "epoch": 2.542257347493208, "grad_norm": 0.46881103956534304, "learning_rate": 4.8249368813731845e-06, "loss": 0.5274, "step": 6477 }, { "epoch": 2.5426525067918004, "grad_norm": 0.45250162234599905, "learning_rate": 4.824879280410733e-06, "loss": 0.5005, "step": 6478 }, { "epoch": 2.5430476660903927, "grad_norm": 0.47056029175688424, "learning_rate": 4.824821670317566e-06, "loss": 0.5215, "step": 6479 }, { "epoch": 2.543442825388985, "grad_norm": 0.4597434281516728, "learning_rate": 4.824764051093912e-06, "loss": 0.5006, "step": 6480 }, { "epoch": 2.543837984687577, "grad_norm": 0.45131921755337623, "learning_rate": 4.824706422739996e-06, "loss": 0.5014, "step": 6481 }, { "epoch": 2.5442331439861694, "grad_norm": 0.4435352090089486, "learning_rate": 4.824648785256045e-06, "loss": 0.4683, "step": 6482 }, { "epoch": 2.5446283032847616, "grad_norm": 0.4361623565598776, "learning_rate": 4.824591138642285e-06, "loss": 0.4739, "step": 6483 }, { "epoch": 2.545023462583354, "grad_norm": 0.4528329358816713, "learning_rate": 4.824533482898943e-06, "loss": 0.4936, "step": 6484 }, { "epoch": 2.545418621881946, "grad_norm": 0.457804851022941, "learning_rate": 4.824475818026244e-06, "loss": 0.5038, "step": 6485 }, { "epoch": 2.5458137811805384, "grad_norm": 0.47354522749284367, "learning_rate": 4.824418144024416e-06, "loss": 0.5182, "step": 6486 }, { "epoch": 2.5462089404791306, "grad_norm": 0.4695229394887001, "learning_rate": 4.824360460893686e-06, "loss": 0.4991, "step": 6487 }, { "epoch": 2.546604099777723, "grad_norm": 0.46327892852606095, "learning_rate": 4.824302768634279e-06, "loss": 0.502, "step": 6488 }, { "epoch": 2.546999259076315, "grad_norm": 0.45297480248431404, "learning_rate": 4.824245067246422e-06, "loss": 0.5066, "step": 6489 }, { "epoch": 2.5473944183749073, "grad_norm": 0.46517129424956516, "learning_rate": 4.824187356730341e-06, "loss": 0.4846, "step": 6490 }, { "epoch": 2.5477895776734996, "grad_norm": 0.4668569089467578, "learning_rate": 4.824129637086264e-06, "loss": 0.5238, "step": 6491 }, { "epoch": 2.548184736972092, "grad_norm": 0.47720546968786415, "learning_rate": 4.824071908314417e-06, "loss": 0.5293, "step": 6492 }, { "epoch": 2.548579896270684, "grad_norm": 0.4729585091074834, "learning_rate": 4.824014170415027e-06, "loss": 0.5163, "step": 6493 }, { "epoch": 2.5489750555692763, "grad_norm": 0.4559767498901322, "learning_rate": 4.8239564233883205e-06, "loss": 0.5028, "step": 6494 }, { "epoch": 2.5493702148678685, "grad_norm": 0.46193288182539766, "learning_rate": 4.823898667234525e-06, "loss": 0.5036, "step": 6495 }, { "epoch": 2.5497653741664608, "grad_norm": 0.50276777112018, "learning_rate": 4.823840901953865e-06, "loss": 0.5199, "step": 6496 }, { "epoch": 2.550160533465053, "grad_norm": 0.44571450965752996, "learning_rate": 4.823783127546571e-06, "loss": 0.4981, "step": 6497 }, { "epoch": 2.5505556927636452, "grad_norm": 0.4545230887683023, "learning_rate": 4.823725344012866e-06, "loss": 0.5014, "step": 6498 }, { "epoch": 2.5509508520622375, "grad_norm": 0.45011277157561497, "learning_rate": 4.8236675513529804e-06, "loss": 0.5016, "step": 6499 }, { "epoch": 2.5513460113608297, "grad_norm": 0.45347326578518127, "learning_rate": 4.823609749567139e-06, "loss": 0.486, "step": 6500 }, { "epoch": 2.551741170659422, "grad_norm": 0.49671184627419557, "learning_rate": 4.823551938655569e-06, "loss": 0.5229, "step": 6501 }, { "epoch": 2.552136329958014, "grad_norm": 0.4630157588304572, "learning_rate": 4.823494118618499e-06, "loss": 0.4994, "step": 6502 }, { "epoch": 2.5525314892566064, "grad_norm": 0.44595435111081594, "learning_rate": 4.8234362894561544e-06, "loss": 0.4913, "step": 6503 }, { "epoch": 2.5529266485551987, "grad_norm": 0.46456032038638434, "learning_rate": 4.823378451168763e-06, "loss": 0.4989, "step": 6504 }, { "epoch": 2.553321807853791, "grad_norm": 0.4741181020350032, "learning_rate": 4.8233206037565515e-06, "loss": 0.5169, "step": 6505 }, { "epoch": 2.553716967152383, "grad_norm": 0.4586086884230933, "learning_rate": 4.823262747219749e-06, "loss": 0.5044, "step": 6506 }, { "epoch": 2.5541121264509754, "grad_norm": 0.4594391518699296, "learning_rate": 4.823204881558579e-06, "loss": 0.5095, "step": 6507 }, { "epoch": 2.5545072857495676, "grad_norm": 0.4520677455107984, "learning_rate": 4.8231470067732726e-06, "loss": 0.5007, "step": 6508 }, { "epoch": 2.55490244504816, "grad_norm": 0.4685641116507666, "learning_rate": 4.823089122864055e-06, "loss": 0.513, "step": 6509 }, { "epoch": 2.555297604346752, "grad_norm": 0.45202109312605754, "learning_rate": 4.8230312298311535e-06, "loss": 0.5163, "step": 6510 }, { "epoch": 2.5556927636453444, "grad_norm": 0.4521725537088804, "learning_rate": 4.822973327674796e-06, "loss": 0.4907, "step": 6511 }, { "epoch": 2.556087922943937, "grad_norm": 0.44282430032215886, "learning_rate": 4.82291541639521e-06, "loss": 0.5044, "step": 6512 }, { "epoch": 2.5564830822425293, "grad_norm": 0.4656119245187877, "learning_rate": 4.822857495992623e-06, "loss": 0.5138, "step": 6513 }, { "epoch": 2.5568782415411215, "grad_norm": 0.46796730981011786, "learning_rate": 4.8227995664672625e-06, "loss": 0.5162, "step": 6514 }, { "epoch": 2.5572734008397138, "grad_norm": 0.4491042247333808, "learning_rate": 4.822741627819355e-06, "loss": 0.5012, "step": 6515 }, { "epoch": 2.557668560138306, "grad_norm": 0.46834449695954017, "learning_rate": 4.82268368004913e-06, "loss": 0.5263, "step": 6516 }, { "epoch": 2.5580637194368983, "grad_norm": 0.44103675945686854, "learning_rate": 4.822625723156813e-06, "loss": 0.4862, "step": 6517 }, { "epoch": 2.5584588787354905, "grad_norm": 0.45017086475460627, "learning_rate": 4.822567757142634e-06, "loss": 0.4989, "step": 6518 }, { "epoch": 2.5588540380340827, "grad_norm": 0.44261210213602237, "learning_rate": 4.822509782006817e-06, "loss": 0.5209, "step": 6519 }, { "epoch": 2.559249197332675, "grad_norm": 0.45761518270513285, "learning_rate": 4.822451797749592e-06, "loss": 0.5203, "step": 6520 }, { "epoch": 2.559644356631267, "grad_norm": 0.46985808636516363, "learning_rate": 4.822393804371188e-06, "loss": 0.5149, "step": 6521 }, { "epoch": 2.5600395159298595, "grad_norm": 0.4526976621493061, "learning_rate": 4.822335801871832e-06, "loss": 0.5091, "step": 6522 }, { "epoch": 2.5604346752284517, "grad_norm": 0.46492146610005, "learning_rate": 4.822277790251749e-06, "loss": 0.5276, "step": 6523 }, { "epoch": 2.560829834527044, "grad_norm": 0.46271285637553683, "learning_rate": 4.82221976951117e-06, "loss": 0.4965, "step": 6524 }, { "epoch": 2.561224993825636, "grad_norm": 0.4503037126759286, "learning_rate": 4.822161739650322e-06, "loss": 0.516, "step": 6525 }, { "epoch": 2.5616201531242284, "grad_norm": 0.443470486159481, "learning_rate": 4.822103700669432e-06, "loss": 0.4974, "step": 6526 }, { "epoch": 2.5620153124228207, "grad_norm": 0.43703146172923873, "learning_rate": 4.82204565256873e-06, "loss": 0.4952, "step": 6527 }, { "epoch": 2.562410471721413, "grad_norm": 0.45292345235815396, "learning_rate": 4.821987595348442e-06, "loss": 0.5271, "step": 6528 }, { "epoch": 2.562805631020005, "grad_norm": 0.45759067526669045, "learning_rate": 4.821929529008797e-06, "loss": 0.5063, "step": 6529 }, { "epoch": 2.5632007903185974, "grad_norm": 0.47064593711048897, "learning_rate": 4.821871453550023e-06, "loss": 0.5057, "step": 6530 }, { "epoch": 2.5635959496171896, "grad_norm": 0.47813721885154226, "learning_rate": 4.821813368972347e-06, "loss": 0.539, "step": 6531 }, { "epoch": 2.563991108915782, "grad_norm": 0.4573359139734747, "learning_rate": 4.821755275275998e-06, "loss": 0.5119, "step": 6532 }, { "epoch": 2.564386268214374, "grad_norm": 0.5039910077391357, "learning_rate": 4.821697172461205e-06, "loss": 0.5096, "step": 6533 }, { "epoch": 2.5647814275129663, "grad_norm": 0.45480012799103875, "learning_rate": 4.821639060528194e-06, "loss": 0.5073, "step": 6534 }, { "epoch": 2.5651765868115586, "grad_norm": 0.43914393122344525, "learning_rate": 4.821580939477195e-06, "loss": 0.5173, "step": 6535 }, { "epoch": 2.565571746110151, "grad_norm": 0.4512267600492565, "learning_rate": 4.821522809308436e-06, "loss": 0.5046, "step": 6536 }, { "epoch": 2.565966905408743, "grad_norm": 0.45888241462106466, "learning_rate": 4.821464670022146e-06, "loss": 0.5159, "step": 6537 }, { "epoch": 2.5663620647073353, "grad_norm": 0.4441592838891458, "learning_rate": 4.821406521618551e-06, "loss": 0.5057, "step": 6538 }, { "epoch": 2.5667572240059275, "grad_norm": 0.45376803364604695, "learning_rate": 4.821348364097882e-06, "loss": 0.4884, "step": 6539 }, { "epoch": 2.56715238330452, "grad_norm": 0.46633304742246534, "learning_rate": 4.821290197460366e-06, "loss": 0.5307, "step": 6540 }, { "epoch": 2.567547542603112, "grad_norm": 0.43666480002999797, "learning_rate": 4.821232021706231e-06, "loss": 0.4926, "step": 6541 }, { "epoch": 2.5679427019017043, "grad_norm": 0.4605237411052962, "learning_rate": 4.8211738368357065e-06, "loss": 0.5049, "step": 6542 }, { "epoch": 2.5683378612002965, "grad_norm": 0.462050066279481, "learning_rate": 4.821115642849021e-06, "loss": 0.5165, "step": 6543 }, { "epoch": 2.5687330204988887, "grad_norm": 0.45750052243887834, "learning_rate": 4.821057439746402e-06, "loss": 0.4976, "step": 6544 }, { "epoch": 2.569128179797481, "grad_norm": 0.44555516105490556, "learning_rate": 4.820999227528079e-06, "loss": 0.5186, "step": 6545 }, { "epoch": 2.5695233390960732, "grad_norm": 0.46285962401306396, "learning_rate": 4.820941006194281e-06, "loss": 0.4978, "step": 6546 }, { "epoch": 2.5699184983946655, "grad_norm": 0.4469392519255687, "learning_rate": 4.820882775745236e-06, "loss": 0.4863, "step": 6547 }, { "epoch": 2.5703136576932577, "grad_norm": 0.4519758312975245, "learning_rate": 4.8208245361811724e-06, "loss": 0.5094, "step": 6548 }, { "epoch": 2.57070881699185, "grad_norm": 0.46503941371816515, "learning_rate": 4.820766287502319e-06, "loss": 0.5133, "step": 6549 }, { "epoch": 2.571103976290442, "grad_norm": 0.4600897389443086, "learning_rate": 4.820708029708905e-06, "loss": 0.5137, "step": 6550 }, { "epoch": 2.5714991355890344, "grad_norm": 0.4588463556332273, "learning_rate": 4.820649762801159e-06, "loss": 0.5092, "step": 6551 }, { "epoch": 2.5718942948876267, "grad_norm": 0.46703432271255124, "learning_rate": 4.820591486779312e-06, "loss": 0.4949, "step": 6552 }, { "epoch": 2.572289454186219, "grad_norm": 0.4489108181770717, "learning_rate": 4.820533201643588e-06, "loss": 0.4944, "step": 6553 }, { "epoch": 2.572684613484811, "grad_norm": 0.4608905598839847, "learning_rate": 4.82047490739422e-06, "loss": 0.5049, "step": 6554 }, { "epoch": 2.5730797727834034, "grad_norm": 0.46983121539687306, "learning_rate": 4.820416604031435e-06, "loss": 0.4909, "step": 6555 }, { "epoch": 2.5734749320819956, "grad_norm": 0.4533141543179746, "learning_rate": 4.820358291555462e-06, "loss": 0.525, "step": 6556 }, { "epoch": 2.573870091380588, "grad_norm": 0.4619074947389451, "learning_rate": 4.820299969966532e-06, "loss": 0.5169, "step": 6557 }, { "epoch": 2.57426525067918, "grad_norm": 0.44500184074671084, "learning_rate": 4.820241639264872e-06, "loss": 0.5106, "step": 6558 }, { "epoch": 2.5746604099777723, "grad_norm": 0.45059185521325373, "learning_rate": 4.820183299450713e-06, "loss": 0.5029, "step": 6559 }, { "epoch": 2.5750555692763646, "grad_norm": 0.45616010644680055, "learning_rate": 4.820124950524282e-06, "loss": 0.5018, "step": 6560 }, { "epoch": 2.575450728574957, "grad_norm": 0.4572348903384075, "learning_rate": 4.820066592485809e-06, "loss": 0.5139, "step": 6561 }, { "epoch": 2.575845887873549, "grad_norm": 0.4421219919687656, "learning_rate": 4.8200082253355226e-06, "loss": 0.4971, "step": 6562 }, { "epoch": 2.5762410471721413, "grad_norm": 0.4666909383851814, "learning_rate": 4.819949849073654e-06, "loss": 0.5117, "step": 6563 }, { "epoch": 2.5766362064707335, "grad_norm": 0.46477953473232203, "learning_rate": 4.8198914637004305e-06, "loss": 0.4949, "step": 6564 }, { "epoch": 2.577031365769326, "grad_norm": 0.46799345127111774, "learning_rate": 4.819833069216081e-06, "loss": 0.4962, "step": 6565 }, { "epoch": 2.577426525067918, "grad_norm": 0.4431351713952274, "learning_rate": 4.819774665620837e-06, "loss": 0.4857, "step": 6566 }, { "epoch": 2.5778216843665103, "grad_norm": 0.45369599827793194, "learning_rate": 4.819716252914927e-06, "loss": 0.5163, "step": 6567 }, { "epoch": 2.5782168436651025, "grad_norm": 0.4736993413815267, "learning_rate": 4.81965783109858e-06, "loss": 0.5077, "step": 6568 }, { "epoch": 2.5786120029636947, "grad_norm": 0.4767138840762411, "learning_rate": 4.819599400172025e-06, "loss": 0.5151, "step": 6569 }, { "epoch": 2.579007162262287, "grad_norm": 0.46197056590790053, "learning_rate": 4.819540960135493e-06, "loss": 0.494, "step": 6570 }, { "epoch": 2.5794023215608792, "grad_norm": 0.4719226833563452, "learning_rate": 4.819482510989211e-06, "loss": 0.5235, "step": 6571 }, { "epoch": 2.5797974808594715, "grad_norm": 0.47446676249677355, "learning_rate": 4.8194240527334115e-06, "loss": 0.5077, "step": 6572 }, { "epoch": 2.5801926401580637, "grad_norm": 0.4660645331065908, "learning_rate": 4.819365585368322e-06, "loss": 0.5013, "step": 6573 }, { "epoch": 2.580587799456656, "grad_norm": 0.45366183792375775, "learning_rate": 4.819307108894173e-06, "loss": 0.4923, "step": 6574 }, { "epoch": 2.580982958755248, "grad_norm": 0.43736598970253193, "learning_rate": 4.819248623311195e-06, "loss": 0.5013, "step": 6575 }, { "epoch": 2.5813781180538404, "grad_norm": 0.46508435134068205, "learning_rate": 4.819190128619617e-06, "loss": 0.4959, "step": 6576 }, { "epoch": 2.5817732773524327, "grad_norm": 0.46567765929034605, "learning_rate": 4.819131624819667e-06, "loss": 0.5076, "step": 6577 }, { "epoch": 2.582168436651025, "grad_norm": 0.4707030207964489, "learning_rate": 4.8190731119115766e-06, "loss": 0.5174, "step": 6578 }, { "epoch": 2.582563595949617, "grad_norm": 0.46834324977420716, "learning_rate": 4.819014589895575e-06, "loss": 0.5154, "step": 6579 }, { "epoch": 2.5829587552482094, "grad_norm": 0.4620901719740855, "learning_rate": 4.818956058771893e-06, "loss": 0.4916, "step": 6580 }, { "epoch": 2.5833539145468016, "grad_norm": 0.4619346062310383, "learning_rate": 4.81889751854076e-06, "loss": 0.5024, "step": 6581 }, { "epoch": 2.583749073845394, "grad_norm": 0.4704870295417848, "learning_rate": 4.818838969202405e-06, "loss": 0.507, "step": 6582 }, { "epoch": 2.584144233143986, "grad_norm": 0.48508779424763204, "learning_rate": 4.818780410757059e-06, "loss": 0.512, "step": 6583 }, { "epoch": 2.5845393924425784, "grad_norm": 0.44361406120966196, "learning_rate": 4.818721843204951e-06, "loss": 0.4971, "step": 6584 }, { "epoch": 2.5849345517411706, "grad_norm": 0.46045806937302997, "learning_rate": 4.818663266546312e-06, "loss": 0.5146, "step": 6585 }, { "epoch": 2.585329711039763, "grad_norm": 0.503532295954558, "learning_rate": 4.818604680781372e-06, "loss": 0.5202, "step": 6586 }, { "epoch": 2.585724870338355, "grad_norm": 0.4809956728838628, "learning_rate": 4.8185460859103596e-06, "loss": 0.5341, "step": 6587 }, { "epoch": 2.5861200296369473, "grad_norm": 0.4452613791874474, "learning_rate": 4.818487481933507e-06, "loss": 0.4868, "step": 6588 }, { "epoch": 2.5865151889355396, "grad_norm": 0.4645895638161033, "learning_rate": 4.818428868851042e-06, "loss": 0.5162, "step": 6589 }, { "epoch": 2.586910348234132, "grad_norm": 0.45690373781668653, "learning_rate": 4.818370246663199e-06, "loss": 0.5186, "step": 6590 }, { "epoch": 2.587305507532724, "grad_norm": 0.4624829183292078, "learning_rate": 4.818311615370204e-06, "loss": 0.5169, "step": 6591 }, { "epoch": 2.5877006668313163, "grad_norm": 0.48491493887224846, "learning_rate": 4.818252974972288e-06, "loss": 0.5092, "step": 6592 }, { "epoch": 2.5880958261299085, "grad_norm": 0.4642333592429095, "learning_rate": 4.818194325469683e-06, "loss": 0.532, "step": 6593 }, { "epoch": 2.5884909854285008, "grad_norm": 0.48132123097086577, "learning_rate": 4.818135666862618e-06, "loss": 0.5152, "step": 6594 }, { "epoch": 2.588886144727093, "grad_norm": 0.4736558582523291, "learning_rate": 4.818076999151323e-06, "loss": 0.5074, "step": 6595 }, { "epoch": 2.5892813040256852, "grad_norm": 0.45982193687887485, "learning_rate": 4.81801832233603e-06, "loss": 0.5136, "step": 6596 }, { "epoch": 2.5896764633242775, "grad_norm": 0.459117193820494, "learning_rate": 4.817959636416969e-06, "loss": 0.5178, "step": 6597 }, { "epoch": 2.5900716226228697, "grad_norm": 0.45795808917106084, "learning_rate": 4.817900941394369e-06, "loss": 0.497, "step": 6598 }, { "epoch": 2.590466781921462, "grad_norm": 0.4961771265409982, "learning_rate": 4.817842237268463e-06, "loss": 0.5025, "step": 6599 }, { "epoch": 2.590861941220054, "grad_norm": 0.4517244851535084, "learning_rate": 4.817783524039479e-06, "loss": 0.496, "step": 6600 }, { "epoch": 2.5912571005186464, "grad_norm": 0.4606749996141201, "learning_rate": 4.8177248017076496e-06, "loss": 0.4885, "step": 6601 }, { "epoch": 2.5916522598172387, "grad_norm": 0.44595584652993003, "learning_rate": 4.817666070273203e-06, "loss": 0.5178, "step": 6602 }, { "epoch": 2.592047419115831, "grad_norm": 0.4361569136817302, "learning_rate": 4.817607329736373e-06, "loss": 0.4927, "step": 6603 }, { "epoch": 2.592442578414423, "grad_norm": 0.48331893774269574, "learning_rate": 4.817548580097389e-06, "loss": 0.4918, "step": 6604 }, { "epoch": 2.5928377377130154, "grad_norm": 0.4480574187648233, "learning_rate": 4.81748982135648e-06, "loss": 0.4898, "step": 6605 }, { "epoch": 2.5932328970116076, "grad_norm": 0.4574403926622687, "learning_rate": 4.817431053513879e-06, "loss": 0.5153, "step": 6606 }, { "epoch": 2.5936280563102, "grad_norm": 0.4464682971660518, "learning_rate": 4.8173722765698165e-06, "loss": 0.4879, "step": 6607 }, { "epoch": 2.594023215608792, "grad_norm": 0.4433326822459801, "learning_rate": 4.817313490524523e-06, "loss": 0.4725, "step": 6608 }, { "epoch": 2.5944183749073844, "grad_norm": 0.4707423582400784, "learning_rate": 4.817254695378228e-06, "loss": 0.5221, "step": 6609 }, { "epoch": 2.5948135342059766, "grad_norm": 0.47295746581167725, "learning_rate": 4.8171958911311646e-06, "loss": 0.5108, "step": 6610 }, { "epoch": 2.595208693504569, "grad_norm": 0.45263375041631376, "learning_rate": 4.817137077783563e-06, "loss": 0.492, "step": 6611 }, { "epoch": 2.595603852803161, "grad_norm": 0.4552715976251914, "learning_rate": 4.817078255335653e-06, "loss": 0.496, "step": 6612 }, { "epoch": 2.5959990121017533, "grad_norm": 0.4595060840449498, "learning_rate": 4.817019423787667e-06, "loss": 0.5155, "step": 6613 }, { "epoch": 2.5963941714003456, "grad_norm": 0.45945144727738035, "learning_rate": 4.8169605831398355e-06, "loss": 0.5292, "step": 6614 }, { "epoch": 2.596789330698938, "grad_norm": 0.45474069846107534, "learning_rate": 4.81690173339239e-06, "loss": 0.5158, "step": 6615 }, { "epoch": 2.59718448999753, "grad_norm": 0.44549691265764996, "learning_rate": 4.816842874545562e-06, "loss": 0.4949, "step": 6616 }, { "epoch": 2.5975796492961223, "grad_norm": 0.4465165484665156, "learning_rate": 4.816784006599582e-06, "loss": 0.5135, "step": 6617 }, { "epoch": 2.5979748085947145, "grad_norm": 0.46690949256258174, "learning_rate": 4.81672512955468e-06, "loss": 0.4823, "step": 6618 }, { "epoch": 2.5983699678933068, "grad_norm": 0.44898509566584294, "learning_rate": 4.81666624341109e-06, "loss": 0.5126, "step": 6619 }, { "epoch": 2.598765127191899, "grad_norm": 0.4486657739928971, "learning_rate": 4.816607348169041e-06, "loss": 0.4901, "step": 6620 }, { "epoch": 2.5991602864904912, "grad_norm": 0.4527214681354412, "learning_rate": 4.816548443828765e-06, "loss": 0.5123, "step": 6621 }, { "epoch": 2.5995554457890835, "grad_norm": 0.4529843101656628, "learning_rate": 4.8164895303904935e-06, "loss": 0.4951, "step": 6622 }, { "epoch": 2.5999506050876757, "grad_norm": 0.4564936706989616, "learning_rate": 4.816430607854458e-06, "loss": 0.5113, "step": 6623 }, { "epoch": 2.600345764386268, "grad_norm": 0.4940966671369367, "learning_rate": 4.816371676220889e-06, "loss": 0.4969, "step": 6624 }, { "epoch": 2.60074092368486, "grad_norm": 0.45722310993861126, "learning_rate": 4.81631273549002e-06, "loss": 0.5132, "step": 6625 }, { "epoch": 2.6011360829834524, "grad_norm": 0.45286575656466427, "learning_rate": 4.816253785662079e-06, "loss": 0.5084, "step": 6626 }, { "epoch": 2.6015312422820447, "grad_norm": 0.445135014900183, "learning_rate": 4.816194826737302e-06, "loss": 0.5027, "step": 6627 }, { "epoch": 2.601926401580637, "grad_norm": 0.46012984610648944, "learning_rate": 4.816135858715917e-06, "loss": 0.4891, "step": 6628 }, { "epoch": 2.602321560879229, "grad_norm": 0.46640326101065027, "learning_rate": 4.816076881598156e-06, "loss": 0.5077, "step": 6629 }, { "epoch": 2.6027167201778214, "grad_norm": 0.4395627749768447, "learning_rate": 4.816017895384253e-06, "loss": 0.5029, "step": 6630 }, { "epoch": 2.603111879476414, "grad_norm": 0.4471917295160825, "learning_rate": 4.815958900074437e-06, "loss": 0.4987, "step": 6631 }, { "epoch": 2.6035070387750063, "grad_norm": 0.4577848836839979, "learning_rate": 4.815899895668941e-06, "loss": 0.5088, "step": 6632 }, { "epoch": 2.6039021980735986, "grad_norm": 0.44634805154755014, "learning_rate": 4.815840882167997e-06, "loss": 0.5121, "step": 6633 }, { "epoch": 2.604297357372191, "grad_norm": 0.4506256280957546, "learning_rate": 4.815781859571835e-06, "loss": 0.5081, "step": 6634 }, { "epoch": 2.604692516670783, "grad_norm": 0.44851796039012143, "learning_rate": 4.815722827880689e-06, "loss": 0.5061, "step": 6635 }, { "epoch": 2.6050876759693753, "grad_norm": 0.4632284380664699, "learning_rate": 4.81566378709479e-06, "loss": 0.5179, "step": 6636 }, { "epoch": 2.6054828352679675, "grad_norm": 0.4495346025856113, "learning_rate": 4.8156047372143695e-06, "loss": 0.5185, "step": 6637 }, { "epoch": 2.6058779945665598, "grad_norm": 0.45146252001014836, "learning_rate": 4.815545678239659e-06, "loss": 0.4782, "step": 6638 }, { "epoch": 2.606273153865152, "grad_norm": 0.473770212276825, "learning_rate": 4.8154866101708925e-06, "loss": 0.5126, "step": 6639 }, { "epoch": 2.6066683131637443, "grad_norm": 0.4485774694573647, "learning_rate": 4.8154275330083e-06, "loss": 0.5119, "step": 6640 }, { "epoch": 2.6070634724623365, "grad_norm": 0.49678656209215, "learning_rate": 4.815368446752114e-06, "loss": 0.5094, "step": 6641 }, { "epoch": 2.6074586317609287, "grad_norm": 0.470453453281169, "learning_rate": 4.815309351402568e-06, "loss": 0.5265, "step": 6642 }, { "epoch": 2.607853791059521, "grad_norm": 0.46330047850218925, "learning_rate": 4.815250246959891e-06, "loss": 0.5093, "step": 6643 }, { "epoch": 2.608248950358113, "grad_norm": 0.4646962109211404, "learning_rate": 4.815191133424318e-06, "loss": 0.5302, "step": 6644 }, { "epoch": 2.6086441096567055, "grad_norm": 0.45067265265161177, "learning_rate": 4.815132010796079e-06, "loss": 0.5108, "step": 6645 }, { "epoch": 2.6090392689552977, "grad_norm": 0.4679320453519832, "learning_rate": 4.815072879075409e-06, "loss": 0.5069, "step": 6646 }, { "epoch": 2.60943442825389, "grad_norm": 0.4633004240529549, "learning_rate": 4.815013738262537e-06, "loss": 0.5084, "step": 6647 }, { "epoch": 2.609829587552482, "grad_norm": 0.4579293641594263, "learning_rate": 4.8149545883576974e-06, "loss": 0.508, "step": 6648 }, { "epoch": 2.6102247468510744, "grad_norm": 0.44095006698161243, "learning_rate": 4.814895429361122e-06, "loss": 0.5141, "step": 6649 }, { "epoch": 2.6106199061496667, "grad_norm": 0.45014307724452574, "learning_rate": 4.814836261273043e-06, "loss": 0.5071, "step": 6650 }, { "epoch": 2.611015065448259, "grad_norm": 0.5653488468963971, "learning_rate": 4.814777084093692e-06, "loss": 0.4967, "step": 6651 }, { "epoch": 2.611410224746851, "grad_norm": 0.4715720317621717, "learning_rate": 4.814717897823303e-06, "loss": 0.5069, "step": 6652 }, { "epoch": 2.6118053840454434, "grad_norm": 0.4575070392319098, "learning_rate": 4.8146587024621075e-06, "loss": 0.5, "step": 6653 }, { "epoch": 2.6122005433440356, "grad_norm": 0.449501214753915, "learning_rate": 4.814599498010338e-06, "loss": 0.5035, "step": 6654 }, { "epoch": 2.612595702642628, "grad_norm": 0.4541220531434449, "learning_rate": 4.814540284468227e-06, "loss": 0.4795, "step": 6655 }, { "epoch": 2.61299086194122, "grad_norm": 0.4527150728896562, "learning_rate": 4.814481061836008e-06, "loss": 0.4958, "step": 6656 }, { "epoch": 2.6133860212398123, "grad_norm": 0.44537076654968044, "learning_rate": 4.814421830113913e-06, "loss": 0.494, "step": 6657 }, { "epoch": 2.6137811805384046, "grad_norm": 0.4538622914542414, "learning_rate": 4.814362589302174e-06, "loss": 0.5144, "step": 6658 }, { "epoch": 2.614176339836997, "grad_norm": 0.4585896393617423, "learning_rate": 4.8143033394010245e-06, "loss": 0.5091, "step": 6659 }, { "epoch": 2.614571499135589, "grad_norm": 0.47488969909117135, "learning_rate": 4.814244080410695e-06, "loss": 0.5268, "step": 6660 }, { "epoch": 2.6149666584341813, "grad_norm": 0.44323004486266726, "learning_rate": 4.814184812331422e-06, "loss": 0.4918, "step": 6661 }, { "epoch": 2.6153618177327735, "grad_norm": 0.4707926165710535, "learning_rate": 4.814125535163435e-06, "loss": 0.5096, "step": 6662 }, { "epoch": 2.615756977031366, "grad_norm": 0.446777983459746, "learning_rate": 4.814066248906969e-06, "loss": 0.5048, "step": 6663 }, { "epoch": 2.616152136329958, "grad_norm": 0.4423589321854824, "learning_rate": 4.8140069535622555e-06, "loss": 0.4932, "step": 6664 }, { "epoch": 2.6165472956285503, "grad_norm": 0.4578414653275977, "learning_rate": 4.813947649129528e-06, "loss": 0.5018, "step": 6665 }, { "epoch": 2.6169424549271425, "grad_norm": 0.45309280010222674, "learning_rate": 4.8138883356090196e-06, "loss": 0.4921, "step": 6666 }, { "epoch": 2.6173376142257347, "grad_norm": 0.45045781550198477, "learning_rate": 4.813829013000963e-06, "loss": 0.4947, "step": 6667 }, { "epoch": 2.617732773524327, "grad_norm": 0.4581432466232565, "learning_rate": 4.81376968130559e-06, "loss": 0.5086, "step": 6668 }, { "epoch": 2.6181279328229192, "grad_norm": 0.4746838428529649, "learning_rate": 4.813710340523135e-06, "loss": 0.5008, "step": 6669 }, { "epoch": 2.6185230921215115, "grad_norm": 0.44025887937767083, "learning_rate": 4.813650990653831e-06, "loss": 0.4871, "step": 6670 }, { "epoch": 2.6189182514201037, "grad_norm": 0.467331378827747, "learning_rate": 4.813591631697912e-06, "loss": 0.5059, "step": 6671 }, { "epoch": 2.619313410718696, "grad_norm": 0.44926649544211894, "learning_rate": 4.813532263655608e-06, "loss": 0.5107, "step": 6672 }, { "epoch": 2.619708570017288, "grad_norm": 0.4601513297437074, "learning_rate": 4.813472886527155e-06, "loss": 0.5235, "step": 6673 }, { "epoch": 2.6201037293158804, "grad_norm": 0.5023320218764915, "learning_rate": 4.813413500312785e-06, "loss": 0.5387, "step": 6674 }, { "epoch": 2.6204988886144727, "grad_norm": 0.45407121529937644, "learning_rate": 4.813354105012732e-06, "loss": 0.4986, "step": 6675 }, { "epoch": 2.620894047913065, "grad_norm": 0.4758694797793042, "learning_rate": 4.813294700627229e-06, "loss": 0.5081, "step": 6676 }, { "epoch": 2.621289207211657, "grad_norm": 0.477442584592846, "learning_rate": 4.8132352871565085e-06, "loss": 0.5098, "step": 6677 }, { "epoch": 2.6216843665102494, "grad_norm": 0.4635000681832021, "learning_rate": 4.813175864600805e-06, "loss": 0.4921, "step": 6678 }, { "epoch": 2.6220795258088416, "grad_norm": 0.5493110100084927, "learning_rate": 4.813116432960351e-06, "loss": 0.5086, "step": 6679 }, { "epoch": 2.622474685107434, "grad_norm": 0.4593734891855389, "learning_rate": 4.813056992235381e-06, "loss": 0.5297, "step": 6680 }, { "epoch": 2.622869844406026, "grad_norm": 0.4578122007316964, "learning_rate": 4.812997542426126e-06, "loss": 0.4811, "step": 6681 }, { "epoch": 2.6232650037046183, "grad_norm": 0.46842927501510234, "learning_rate": 4.812938083532822e-06, "loss": 0.5119, "step": 6682 }, { "epoch": 2.6236601630032106, "grad_norm": 0.46888027270874133, "learning_rate": 4.812878615555702e-06, "loss": 0.4904, "step": 6683 }, { "epoch": 2.624055322301803, "grad_norm": 0.45800572704355974, "learning_rate": 4.812819138495e-06, "loss": 0.4936, "step": 6684 }, { "epoch": 2.624450481600395, "grad_norm": 0.4643329241994898, "learning_rate": 4.812759652350947e-06, "loss": 0.5101, "step": 6685 }, { "epoch": 2.6248456408989873, "grad_norm": 0.47372550653573653, "learning_rate": 4.81270015712378e-06, "loss": 0.5129, "step": 6686 }, { "epoch": 2.6252408001975795, "grad_norm": 0.4539448907813019, "learning_rate": 4.81264065281373e-06, "loss": 0.5008, "step": 6687 }, { "epoch": 2.625635959496172, "grad_norm": 0.4678129441000318, "learning_rate": 4.812581139421033e-06, "loss": 0.5341, "step": 6688 }, { "epoch": 2.626031118794764, "grad_norm": 0.4425885751871649, "learning_rate": 4.812521616945921e-06, "loss": 0.5244, "step": 6689 }, { "epoch": 2.6264262780933563, "grad_norm": 0.4674942573227224, "learning_rate": 4.8124620853886285e-06, "loss": 0.5266, "step": 6690 }, { "epoch": 2.6268214373919485, "grad_norm": 0.4509317914278103, "learning_rate": 4.8124025447493885e-06, "loss": 0.5086, "step": 6691 }, { "epoch": 2.6272165966905408, "grad_norm": 0.465509813792353, "learning_rate": 4.8123429950284365e-06, "loss": 0.522, "step": 6692 }, { "epoch": 2.627611755989133, "grad_norm": 0.46175300779476025, "learning_rate": 4.812283436226004e-06, "loss": 0.506, "step": 6693 }, { "epoch": 2.6280069152877252, "grad_norm": 0.455507151711426, "learning_rate": 4.8122238683423276e-06, "loss": 0.5059, "step": 6694 }, { "epoch": 2.6284020745863175, "grad_norm": 0.45167680557954504, "learning_rate": 4.812164291377639e-06, "loss": 0.4937, "step": 6695 }, { "epoch": 2.6287972338849097, "grad_norm": 0.462114757199228, "learning_rate": 4.812104705332174e-06, "loss": 0.5205, "step": 6696 }, { "epoch": 2.629192393183502, "grad_norm": 0.4513431799853371, "learning_rate": 4.812045110206165e-06, "loss": 0.492, "step": 6697 }, { "epoch": 2.629587552482094, "grad_norm": 0.44284132274625015, "learning_rate": 4.811985505999846e-06, "loss": 0.4751, "step": 6698 }, { "epoch": 2.6299827117806864, "grad_norm": 0.4750386854078683, "learning_rate": 4.811925892713452e-06, "loss": 0.5302, "step": 6699 }, { "epoch": 2.6303778710792787, "grad_norm": 0.4651936596747168, "learning_rate": 4.811866270347219e-06, "loss": 0.5187, "step": 6700 }, { "epoch": 2.630773030377871, "grad_norm": 0.45466631519979056, "learning_rate": 4.811806638901378e-06, "loss": 0.4861, "step": 6701 }, { "epoch": 2.6311681896764636, "grad_norm": 0.4480365156981654, "learning_rate": 4.8117469983761636e-06, "loss": 0.4928, "step": 6702 }, { "epoch": 2.631563348975056, "grad_norm": 0.45596295905822454, "learning_rate": 4.811687348771811e-06, "loss": 0.5143, "step": 6703 }, { "epoch": 2.631958508273648, "grad_norm": 0.4612335510523021, "learning_rate": 4.811627690088555e-06, "loss": 0.5039, "step": 6704 }, { "epoch": 2.6323536675722403, "grad_norm": 0.4495637737856094, "learning_rate": 4.811568022326628e-06, "loss": 0.5014, "step": 6705 }, { "epoch": 2.6327488268708326, "grad_norm": 0.4571383147738367, "learning_rate": 4.811508345486267e-06, "loss": 0.4987, "step": 6706 }, { "epoch": 2.633143986169425, "grad_norm": 0.46620616655944114, "learning_rate": 4.811448659567703e-06, "loss": 0.5157, "step": 6707 }, { "epoch": 2.633539145468017, "grad_norm": 0.692201266331602, "learning_rate": 4.811388964571173e-06, "loss": 0.502, "step": 6708 }, { "epoch": 2.6339343047666093, "grad_norm": 0.452425821090267, "learning_rate": 4.811329260496911e-06, "loss": 0.5082, "step": 6709 }, { "epoch": 2.6343294640652015, "grad_norm": 0.4484831548580827, "learning_rate": 4.811269547345151e-06, "loss": 0.4953, "step": 6710 }, { "epoch": 2.6347246233637938, "grad_norm": 0.5286805004399712, "learning_rate": 4.8112098251161275e-06, "loss": 0.5059, "step": 6711 }, { "epoch": 2.635119782662386, "grad_norm": 0.47242345842337413, "learning_rate": 4.811150093810076e-06, "loss": 0.515, "step": 6712 }, { "epoch": 2.6355149419609782, "grad_norm": 0.45817282138512155, "learning_rate": 4.81109035342723e-06, "loss": 0.4959, "step": 6713 }, { "epoch": 2.6359101012595705, "grad_norm": 0.47033111569825325, "learning_rate": 4.811030603967824e-06, "loss": 0.5047, "step": 6714 }, { "epoch": 2.6363052605581627, "grad_norm": 0.4574013018432309, "learning_rate": 4.810970845432094e-06, "loss": 0.5121, "step": 6715 }, { "epoch": 2.636700419856755, "grad_norm": 0.47733427395045364, "learning_rate": 4.810911077820273e-06, "loss": 0.4993, "step": 6716 }, { "epoch": 2.637095579155347, "grad_norm": 0.45438462788409517, "learning_rate": 4.8108513011325965e-06, "loss": 0.496, "step": 6717 }, { "epoch": 2.6374907384539394, "grad_norm": 0.46176365204286124, "learning_rate": 4.8107915153693e-06, "loss": 0.5186, "step": 6718 }, { "epoch": 2.6378858977525317, "grad_norm": 0.46309358892460495, "learning_rate": 4.810731720530617e-06, "loss": 0.5083, "step": 6719 }, { "epoch": 2.638281057051124, "grad_norm": 0.4712899021409001, "learning_rate": 4.810671916616783e-06, "loss": 0.5113, "step": 6720 }, { "epoch": 2.638676216349716, "grad_norm": 0.4621511341873441, "learning_rate": 4.810612103628033e-06, "loss": 0.505, "step": 6721 }, { "epoch": 2.6390713756483084, "grad_norm": 0.4484479850838346, "learning_rate": 4.810552281564602e-06, "loss": 0.498, "step": 6722 }, { "epoch": 2.6394665349469006, "grad_norm": 0.4447686728319573, "learning_rate": 4.8104924504267245e-06, "loss": 0.4786, "step": 6723 }, { "epoch": 2.639861694245493, "grad_norm": 0.4587588719398148, "learning_rate": 4.810432610214636e-06, "loss": 0.5008, "step": 6724 }, { "epoch": 2.640256853544085, "grad_norm": 0.46171851458075386, "learning_rate": 4.81037276092857e-06, "loss": 0.5104, "step": 6725 }, { "epoch": 2.6406520128426774, "grad_norm": 0.47362051128187516, "learning_rate": 4.810312902568763e-06, "loss": 0.5179, "step": 6726 }, { "epoch": 2.6410471721412696, "grad_norm": 0.4707818961914056, "learning_rate": 4.81025303513545e-06, "loss": 0.5152, "step": 6727 }, { "epoch": 2.641442331439862, "grad_norm": 0.4487464299804814, "learning_rate": 4.810193158628867e-06, "loss": 0.4998, "step": 6728 }, { "epoch": 2.641837490738454, "grad_norm": 0.4550736327280535, "learning_rate": 4.810133273049247e-06, "loss": 0.5179, "step": 6729 }, { "epoch": 2.6422326500370463, "grad_norm": 0.47050227006026263, "learning_rate": 4.810073378396827e-06, "loss": 0.5214, "step": 6730 }, { "epoch": 2.6426278093356386, "grad_norm": 0.4519033384468275, "learning_rate": 4.81001347467184e-06, "loss": 0.4998, "step": 6731 }, { "epoch": 2.643022968634231, "grad_norm": 0.451105123921464, "learning_rate": 4.809953561874525e-06, "loss": 0.5057, "step": 6732 }, { "epoch": 2.643418127932823, "grad_norm": 0.4556875869733165, "learning_rate": 4.8098936400051145e-06, "loss": 0.5156, "step": 6733 }, { "epoch": 2.6438132872314153, "grad_norm": 0.45194215128861687, "learning_rate": 4.809833709063844e-06, "loss": 0.5221, "step": 6734 }, { "epoch": 2.6442084465300075, "grad_norm": 0.4695069511637479, "learning_rate": 4.809773769050948e-06, "loss": 0.5203, "step": 6735 }, { "epoch": 2.6446036058285998, "grad_norm": 0.4792987985028994, "learning_rate": 4.809713819966665e-06, "loss": 0.5284, "step": 6736 }, { "epoch": 2.644998765127192, "grad_norm": 0.45769602212488414, "learning_rate": 4.809653861811228e-06, "loss": 0.4851, "step": 6737 }, { "epoch": 2.6453939244257842, "grad_norm": 0.4424740957506516, "learning_rate": 4.809593894584873e-06, "loss": 0.4982, "step": 6738 }, { "epoch": 2.6457890837243765, "grad_norm": 0.4696493235047474, "learning_rate": 4.809533918287836e-06, "loss": 0.4989, "step": 6739 }, { "epoch": 2.6461842430229687, "grad_norm": 0.46456830773516294, "learning_rate": 4.809473932920352e-06, "loss": 0.4941, "step": 6740 }, { "epoch": 2.646579402321561, "grad_norm": 0.47014248694056754, "learning_rate": 4.809413938482657e-06, "loss": 0.5156, "step": 6741 }, { "epoch": 2.646974561620153, "grad_norm": 0.4356406681706022, "learning_rate": 4.809353934974987e-06, "loss": 0.4908, "step": 6742 }, { "epoch": 2.6473697209187455, "grad_norm": 0.4862338209729092, "learning_rate": 4.809293922397576e-06, "loss": 0.5048, "step": 6743 }, { "epoch": 2.6477648802173377, "grad_norm": 0.4575448187275317, "learning_rate": 4.80923390075066e-06, "loss": 0.5134, "step": 6744 }, { "epoch": 2.64816003951593, "grad_norm": 0.4564366850228013, "learning_rate": 4.809173870034477e-06, "loss": 0.5085, "step": 6745 }, { "epoch": 2.648555198814522, "grad_norm": 0.48219599498932664, "learning_rate": 4.809113830249261e-06, "loss": 0.5221, "step": 6746 }, { "epoch": 2.6489503581131144, "grad_norm": 0.4453567920915501, "learning_rate": 4.809053781395248e-06, "loss": 0.509, "step": 6747 }, { "epoch": 2.6493455174117067, "grad_norm": 0.4724611482066963, "learning_rate": 4.8089937234726734e-06, "loss": 0.4851, "step": 6748 }, { "epoch": 2.649740676710299, "grad_norm": 0.4444318838338744, "learning_rate": 4.808933656481774e-06, "loss": 0.5097, "step": 6749 }, { "epoch": 2.650135836008891, "grad_norm": 0.46372690064711497, "learning_rate": 4.808873580422785e-06, "loss": 0.5011, "step": 6750 }, { "epoch": 2.6505309953074834, "grad_norm": 0.45321412299435093, "learning_rate": 4.808813495295942e-06, "loss": 0.4916, "step": 6751 }, { "epoch": 2.6509261546060756, "grad_norm": 0.47068167177315234, "learning_rate": 4.808753401101483e-06, "loss": 0.5067, "step": 6752 }, { "epoch": 2.651321313904668, "grad_norm": 0.4635431012232405, "learning_rate": 4.808693297839642e-06, "loss": 0.5182, "step": 6753 }, { "epoch": 2.65171647320326, "grad_norm": 0.4412239947020369, "learning_rate": 4.8086331855106546e-06, "loss": 0.5035, "step": 6754 }, { "epoch": 2.6521116325018523, "grad_norm": 0.43995031164220066, "learning_rate": 4.80857306411476e-06, "loss": 0.5028, "step": 6755 }, { "epoch": 2.6525067918004446, "grad_norm": 0.4591057467664625, "learning_rate": 4.808512933652191e-06, "loss": 0.5049, "step": 6756 }, { "epoch": 2.652901951099037, "grad_norm": 0.4571034390087959, "learning_rate": 4.808452794123184e-06, "loss": 0.5272, "step": 6757 }, { "epoch": 2.653297110397629, "grad_norm": 0.4518130469202257, "learning_rate": 4.8083926455279775e-06, "loss": 0.5165, "step": 6758 }, { "epoch": 2.6536922696962213, "grad_norm": 0.439662265546044, "learning_rate": 4.808332487866806e-06, "loss": 0.4949, "step": 6759 }, { "epoch": 2.6540874289948135, "grad_norm": 0.46867256715880273, "learning_rate": 4.808272321139907e-06, "loss": 0.5098, "step": 6760 }, { "epoch": 2.6544825882934058, "grad_norm": 0.4407389861497981, "learning_rate": 4.808212145347515e-06, "loss": 0.4803, "step": 6761 }, { "epoch": 2.654877747591998, "grad_norm": 0.45779304183120295, "learning_rate": 4.808151960489867e-06, "loss": 0.4899, "step": 6762 }, { "epoch": 2.6552729068905903, "grad_norm": 0.4565973133474136, "learning_rate": 4.808091766567201e-06, "loss": 0.5085, "step": 6763 }, { "epoch": 2.6556680661891825, "grad_norm": 0.468953636638347, "learning_rate": 4.8080315635797515e-06, "loss": 0.5232, "step": 6764 }, { "epoch": 2.6560632254877747, "grad_norm": 0.44984729188558875, "learning_rate": 4.807971351527755e-06, "loss": 0.52, "step": 6765 }, { "epoch": 2.656458384786367, "grad_norm": 0.46345262945947907, "learning_rate": 4.807911130411449e-06, "loss": 0.4932, "step": 6766 }, { "epoch": 2.656853544084959, "grad_norm": 0.46323751843274685, "learning_rate": 4.80785090023107e-06, "loss": 0.5168, "step": 6767 }, { "epoch": 2.6572487033835515, "grad_norm": 0.4626841241252618, "learning_rate": 4.807790660986854e-06, "loss": 0.5224, "step": 6768 }, { "epoch": 2.6576438626821437, "grad_norm": 0.4413542814112758, "learning_rate": 4.807730412679037e-06, "loss": 0.5105, "step": 6769 }, { "epoch": 2.658039021980736, "grad_norm": 0.45515913790444384, "learning_rate": 4.807670155307857e-06, "loss": 0.4853, "step": 6770 }, { "epoch": 2.658434181279328, "grad_norm": 0.45080574051033717, "learning_rate": 4.807609888873548e-06, "loss": 0.5232, "step": 6771 }, { "epoch": 2.6588293405779204, "grad_norm": 0.45056542118018017, "learning_rate": 4.807549613376351e-06, "loss": 0.503, "step": 6772 }, { "epoch": 2.6592244998765127, "grad_norm": 0.45084980258696417, "learning_rate": 4.8074893288164995e-06, "loss": 0.5082, "step": 6773 }, { "epoch": 2.659619659175105, "grad_norm": 0.46000748014035947, "learning_rate": 4.80742903519423e-06, "loss": 0.5126, "step": 6774 }, { "epoch": 2.660014818473697, "grad_norm": 0.6125057638224274, "learning_rate": 4.807368732509782e-06, "loss": 0.516, "step": 6775 }, { "epoch": 2.6604099777722894, "grad_norm": 0.4465782067051833, "learning_rate": 4.8073084207633895e-06, "loss": 0.5017, "step": 6776 }, { "epoch": 2.6608051370708816, "grad_norm": 0.44598562336646147, "learning_rate": 4.807248099955291e-06, "loss": 0.4966, "step": 6777 }, { "epoch": 2.661200296369474, "grad_norm": 0.4617546372416006, "learning_rate": 4.807187770085724e-06, "loss": 0.5003, "step": 6778 }, { "epoch": 2.661595455668066, "grad_norm": 0.4676309244036775, "learning_rate": 4.807127431154923e-06, "loss": 0.5039, "step": 6779 }, { "epoch": 2.6619906149666583, "grad_norm": 0.5277411293225999, "learning_rate": 4.807067083163127e-06, "loss": 0.518, "step": 6780 }, { "epoch": 2.6623857742652506, "grad_norm": 0.45429895891466116, "learning_rate": 4.8070067261105725e-06, "loss": 0.4982, "step": 6781 }, { "epoch": 2.662780933563843, "grad_norm": 0.4464728729832292, "learning_rate": 4.806946359997496e-06, "loss": 0.5189, "step": 6782 }, { "epoch": 2.663176092862435, "grad_norm": 0.4715484427485262, "learning_rate": 4.806885984824136e-06, "loss": 0.5117, "step": 6783 }, { "epoch": 2.6635712521610273, "grad_norm": 0.45900610189295865, "learning_rate": 4.8068256005907275e-06, "loss": 0.5023, "step": 6784 }, { "epoch": 2.6639664114596195, "grad_norm": 0.45019303096695007, "learning_rate": 4.80676520729751e-06, "loss": 0.5113, "step": 6785 }, { "epoch": 2.664361570758212, "grad_norm": 0.46748176211191267, "learning_rate": 4.806704804944719e-06, "loss": 0.5164, "step": 6786 }, { "epoch": 2.664756730056804, "grad_norm": 0.46855906191928937, "learning_rate": 4.8066443935325926e-06, "loss": 0.5002, "step": 6787 }, { "epoch": 2.6651518893553963, "grad_norm": 0.452910775505456, "learning_rate": 4.806583973061367e-06, "loss": 0.5156, "step": 6788 }, { "epoch": 2.6655470486539885, "grad_norm": 0.47440038216416675, "learning_rate": 4.80652354353128e-06, "loss": 0.5303, "step": 6789 }, { "epoch": 2.6659422079525807, "grad_norm": 0.5924097415433517, "learning_rate": 4.806463104942569e-06, "loss": 0.5167, "step": 6790 }, { "epoch": 2.666337367251173, "grad_norm": 0.438886847398818, "learning_rate": 4.806402657295472e-06, "loss": 0.5003, "step": 6791 }, { "epoch": 2.6667325265497652, "grad_norm": 0.46609047736390946, "learning_rate": 4.806342200590227e-06, "loss": 0.5128, "step": 6792 }, { "epoch": 2.6671276858483575, "grad_norm": 0.4632346721128658, "learning_rate": 4.8062817348270684e-06, "loss": 0.4954, "step": 6793 }, { "epoch": 2.6675228451469497, "grad_norm": 0.4608854558412661, "learning_rate": 4.806221260006237e-06, "loss": 0.505, "step": 6794 }, { "epoch": 2.667918004445542, "grad_norm": 0.46630121452068407, "learning_rate": 4.806160776127968e-06, "loss": 0.5108, "step": 6795 }, { "epoch": 2.668313163744134, "grad_norm": 0.45531938998001714, "learning_rate": 4.806100283192501e-06, "loss": 0.4968, "step": 6796 }, { "epoch": 2.6687083230427264, "grad_norm": 0.4555861650279177, "learning_rate": 4.806039781200071e-06, "loss": 0.5088, "step": 6797 }, { "epoch": 2.6691034823413187, "grad_norm": 0.4803868219645436, "learning_rate": 4.805979270150918e-06, "loss": 0.5122, "step": 6798 }, { "epoch": 2.669498641639911, "grad_norm": 0.47581987750590815, "learning_rate": 4.805918750045278e-06, "loss": 0.4929, "step": 6799 }, { "epoch": 2.669893800938503, "grad_norm": 0.449650149880212, "learning_rate": 4.80585822088339e-06, "loss": 0.5197, "step": 6800 }, { "epoch": 2.6702889602370954, "grad_norm": 0.43855063136998546, "learning_rate": 4.8057976826654906e-06, "loss": 0.5101, "step": 6801 }, { "epoch": 2.6706841195356876, "grad_norm": 0.4720486302585973, "learning_rate": 4.805737135391818e-06, "loss": 0.5262, "step": 6802 }, { "epoch": 2.67107927883428, "grad_norm": 0.44945472494966127, "learning_rate": 4.80567657906261e-06, "loss": 0.5048, "step": 6803 }, { "epoch": 2.671474438132872, "grad_norm": 0.4669554725656825, "learning_rate": 4.8056160136781055e-06, "loss": 0.5115, "step": 6804 }, { "epoch": 2.6718695974314643, "grad_norm": 0.4403150506327582, "learning_rate": 4.805555439238541e-06, "loss": 0.5031, "step": 6805 }, { "epoch": 2.6722647567300566, "grad_norm": 0.5000895923389528, "learning_rate": 4.805494855744154e-06, "loss": 0.5302, "step": 6806 }, { "epoch": 2.672659916028649, "grad_norm": 0.47576343934993737, "learning_rate": 4.8054342631951836e-06, "loss": 0.4895, "step": 6807 }, { "epoch": 2.673055075327241, "grad_norm": 0.477265617906093, "learning_rate": 4.8053736615918675e-06, "loss": 0.5007, "step": 6808 }, { "epoch": 2.6734502346258333, "grad_norm": 0.4485511317732285, "learning_rate": 4.8053130509344434e-06, "loss": 0.5081, "step": 6809 }, { "epoch": 2.6738453939244256, "grad_norm": 0.45923352589367195, "learning_rate": 4.8052524312231494e-06, "loss": 0.5056, "step": 6810 }, { "epoch": 2.674240553223018, "grad_norm": 0.4661199256713386, "learning_rate": 4.8051918024582235e-06, "loss": 0.506, "step": 6811 }, { "epoch": 2.67463571252161, "grad_norm": 0.4610083722334732, "learning_rate": 4.8051311646399045e-06, "loss": 0.5033, "step": 6812 }, { "epoch": 2.6750308718202023, "grad_norm": 0.43652772614403434, "learning_rate": 4.80507051776843e-06, "loss": 0.5071, "step": 6813 }, { "epoch": 2.6754260311187945, "grad_norm": 0.4691550437232749, "learning_rate": 4.805009861844038e-06, "loss": 0.527, "step": 6814 }, { "epoch": 2.6758211904173868, "grad_norm": 0.4560452636416422, "learning_rate": 4.804949196866967e-06, "loss": 0.4908, "step": 6815 }, { "epoch": 2.676216349715979, "grad_norm": 0.48133219998650245, "learning_rate": 4.8048885228374556e-06, "loss": 0.5118, "step": 6816 }, { "epoch": 2.6766115090145712, "grad_norm": 0.452233986235823, "learning_rate": 4.804827839755741e-06, "loss": 0.4915, "step": 6817 }, { "epoch": 2.6770066683131635, "grad_norm": 0.4631514836434119, "learning_rate": 4.804767147622062e-06, "loss": 0.529, "step": 6818 }, { "epoch": 2.6774018276117557, "grad_norm": 0.4502944964366507, "learning_rate": 4.804706446436658e-06, "loss": 0.5077, "step": 6819 }, { "epoch": 2.6777969869103484, "grad_norm": 0.46421201437573734, "learning_rate": 4.8046457361997655e-06, "loss": 0.5185, "step": 6820 }, { "epoch": 2.6781921462089406, "grad_norm": 0.4613890649783742, "learning_rate": 4.804585016911625e-06, "loss": 0.5138, "step": 6821 }, { "epoch": 2.678587305507533, "grad_norm": 0.4456774139985832, "learning_rate": 4.8045242885724735e-06, "loss": 0.5217, "step": 6822 }, { "epoch": 2.678982464806125, "grad_norm": 0.47237905847343165, "learning_rate": 4.80446355118255e-06, "loss": 0.5191, "step": 6823 }, { "epoch": 2.6793776241047174, "grad_norm": 0.4974035566555643, "learning_rate": 4.804402804742093e-06, "loss": 0.5512, "step": 6824 }, { "epoch": 2.6797727834033096, "grad_norm": 0.47581595796135917, "learning_rate": 4.804342049251341e-06, "loss": 0.5303, "step": 6825 }, { "epoch": 2.680167942701902, "grad_norm": 0.47309784832193486, "learning_rate": 4.804281284710534e-06, "loss": 0.5315, "step": 6826 }, { "epoch": 2.680563102000494, "grad_norm": 0.44291199384154933, "learning_rate": 4.804220511119908e-06, "loss": 0.5095, "step": 6827 }, { "epoch": 2.6809582612990863, "grad_norm": 0.4418663874638194, "learning_rate": 4.804159728479703e-06, "loss": 0.5, "step": 6828 }, { "epoch": 2.6813534205976786, "grad_norm": 0.46327736667282093, "learning_rate": 4.804098936790158e-06, "loss": 0.5141, "step": 6829 }, { "epoch": 2.681748579896271, "grad_norm": 0.4479767367906708, "learning_rate": 4.804038136051512e-06, "loss": 0.5011, "step": 6830 }, { "epoch": 2.682143739194863, "grad_norm": 0.46247506860437304, "learning_rate": 4.803977326264003e-06, "loss": 0.5114, "step": 6831 }, { "epoch": 2.6825388984934553, "grad_norm": 0.46141174971832466, "learning_rate": 4.803916507427869e-06, "loss": 0.5141, "step": 6832 }, { "epoch": 2.6829340577920475, "grad_norm": 0.4873722164912079, "learning_rate": 4.803855679543352e-06, "loss": 0.4998, "step": 6833 }, { "epoch": 2.6833292170906398, "grad_norm": 0.46403561177788066, "learning_rate": 4.803794842610687e-06, "loss": 0.5164, "step": 6834 }, { "epoch": 2.683724376389232, "grad_norm": 0.4456637637514536, "learning_rate": 4.803733996630116e-06, "loss": 0.5018, "step": 6835 }, { "epoch": 2.6841195356878242, "grad_norm": 0.4563297591462206, "learning_rate": 4.803673141601876e-06, "loss": 0.4958, "step": 6836 }, { "epoch": 2.6845146949864165, "grad_norm": 0.46433291682819583, "learning_rate": 4.803612277526207e-06, "loss": 0.5041, "step": 6837 }, { "epoch": 2.6849098542850087, "grad_norm": 0.45384038987818476, "learning_rate": 4.803551404403348e-06, "loss": 0.5029, "step": 6838 }, { "epoch": 2.685305013583601, "grad_norm": 0.4396868856560902, "learning_rate": 4.803490522233538e-06, "loss": 0.4979, "step": 6839 }, { "epoch": 2.685700172882193, "grad_norm": 0.45017056466155314, "learning_rate": 4.803429631017016e-06, "loss": 0.5048, "step": 6840 }, { "epoch": 2.6860953321807854, "grad_norm": 0.4574075515113022, "learning_rate": 4.8033687307540214e-06, "loss": 0.5155, "step": 6841 }, { "epoch": 2.6864904914793777, "grad_norm": 0.4618670159293962, "learning_rate": 4.803307821444793e-06, "loss": 0.516, "step": 6842 }, { "epoch": 2.68688565077797, "grad_norm": 0.4464411508006451, "learning_rate": 4.803246903089569e-06, "loss": 0.5171, "step": 6843 }, { "epoch": 2.687280810076562, "grad_norm": 0.45337545621080044, "learning_rate": 4.80318597568859e-06, "loss": 0.5238, "step": 6844 }, { "epoch": 2.6876759693751544, "grad_norm": 0.4706459371294516, "learning_rate": 4.803125039242096e-06, "loss": 0.5214, "step": 6845 }, { "epoch": 2.6880711286737466, "grad_norm": 0.444744828944862, "learning_rate": 4.8030640937503245e-06, "loss": 0.5177, "step": 6846 }, { "epoch": 2.688466287972339, "grad_norm": 0.4650828362886886, "learning_rate": 4.803003139213517e-06, "loss": 0.5075, "step": 6847 }, { "epoch": 2.688861447270931, "grad_norm": 0.45639383031758396, "learning_rate": 4.802942175631911e-06, "loss": 0.5011, "step": 6848 }, { "epoch": 2.6892566065695234, "grad_norm": 0.466428988530897, "learning_rate": 4.802881203005746e-06, "loss": 0.5139, "step": 6849 }, { "epoch": 2.6896517658681156, "grad_norm": 0.4525428401741202, "learning_rate": 4.802820221335263e-06, "loss": 0.4978, "step": 6850 }, { "epoch": 2.690046925166708, "grad_norm": 0.45716169122612815, "learning_rate": 4.8027592306206995e-06, "loss": 0.501, "step": 6851 }, { "epoch": 2.6904420844653, "grad_norm": 0.48662071727200495, "learning_rate": 4.802698230862296e-06, "loss": 0.5265, "step": 6852 }, { "epoch": 2.6908372437638923, "grad_norm": 0.4623332955672627, "learning_rate": 4.802637222060293e-06, "loss": 0.5163, "step": 6853 }, { "epoch": 2.6912324030624846, "grad_norm": 0.45648458131398784, "learning_rate": 4.802576204214928e-06, "loss": 0.515, "step": 6854 }, { "epoch": 2.691627562361077, "grad_norm": 0.4551657895457975, "learning_rate": 4.802515177326444e-06, "loss": 0.5282, "step": 6855 }, { "epoch": 2.692022721659669, "grad_norm": 0.4475910585637045, "learning_rate": 4.802454141395076e-06, "loss": 0.5113, "step": 6856 }, { "epoch": 2.6924178809582613, "grad_norm": 0.4558226749142037, "learning_rate": 4.802393096421068e-06, "loss": 0.5123, "step": 6857 }, { "epoch": 2.6928130402568535, "grad_norm": 0.4486376587821075, "learning_rate": 4.802332042404657e-06, "loss": 0.5117, "step": 6858 }, { "epoch": 2.6932081995554458, "grad_norm": 0.44884818601499643, "learning_rate": 4.8022709793460846e-06, "loss": 0.5084, "step": 6859 }, { "epoch": 2.693603358854038, "grad_norm": 0.5475296537368842, "learning_rate": 4.8022099072455896e-06, "loss": 0.5086, "step": 6860 }, { "epoch": 2.6939985181526303, "grad_norm": 0.4747377379257253, "learning_rate": 4.802148826103412e-06, "loss": 0.5109, "step": 6861 }, { "epoch": 2.6943936774512225, "grad_norm": 0.47187115055855244, "learning_rate": 4.802087735919792e-06, "loss": 0.5107, "step": 6862 }, { "epoch": 2.6947888367498147, "grad_norm": 0.45581653066216854, "learning_rate": 4.802026636694969e-06, "loss": 0.5092, "step": 6863 }, { "epoch": 2.695183996048407, "grad_norm": 0.45531582354330513, "learning_rate": 4.8019655284291825e-06, "loss": 0.4925, "step": 6864 }, { "epoch": 2.695579155346999, "grad_norm": 0.5224115146598348, "learning_rate": 4.801904411122675e-06, "loss": 0.5203, "step": 6865 }, { "epoch": 2.6959743146455915, "grad_norm": 0.4448869457231406, "learning_rate": 4.8018432847756825e-06, "loss": 0.4948, "step": 6866 }, { "epoch": 2.6963694739441837, "grad_norm": 0.46002467750035936, "learning_rate": 4.801782149388448e-06, "loss": 0.5056, "step": 6867 }, { "epoch": 2.696764633242776, "grad_norm": 0.47172532896971114, "learning_rate": 4.801721004961213e-06, "loss": 0.5075, "step": 6868 }, { "epoch": 2.697159792541368, "grad_norm": 0.45878868214555063, "learning_rate": 4.8016598514942135e-06, "loss": 0.4925, "step": 6869 }, { "epoch": 2.6975549518399604, "grad_norm": 0.4544323936183125, "learning_rate": 4.801598688987692e-06, "loss": 0.5035, "step": 6870 }, { "epoch": 2.6979501111385527, "grad_norm": 0.4774600541049669, "learning_rate": 4.801537517441889e-06, "loss": 0.5106, "step": 6871 }, { "epoch": 2.698345270437145, "grad_norm": 0.4625044065364173, "learning_rate": 4.801476336857043e-06, "loss": 0.4934, "step": 6872 }, { "epoch": 2.698740429735737, "grad_norm": 0.45276021101004094, "learning_rate": 4.801415147233397e-06, "loss": 0.4948, "step": 6873 }, { "epoch": 2.6991355890343294, "grad_norm": 0.46016046750830586, "learning_rate": 4.801353948571189e-06, "loss": 0.5105, "step": 6874 }, { "epoch": 2.6995307483329216, "grad_norm": 0.46937028530160213, "learning_rate": 4.801292740870661e-06, "loss": 0.5325, "step": 6875 }, { "epoch": 2.699925907631514, "grad_norm": 0.43934115205425334, "learning_rate": 4.801231524132052e-06, "loss": 0.5062, "step": 6876 }, { "epoch": 2.700321066930106, "grad_norm": 0.4469967985779925, "learning_rate": 4.8011702983556026e-06, "loss": 0.4971, "step": 6877 }, { "epoch": 2.7007162262286983, "grad_norm": 0.46826293402204716, "learning_rate": 4.801109063541554e-06, "loss": 0.4992, "step": 6878 }, { "epoch": 2.7011113855272906, "grad_norm": 0.5113971704976565, "learning_rate": 4.801047819690146e-06, "loss": 0.5298, "step": 6879 }, { "epoch": 2.701506544825883, "grad_norm": 0.4645313397142929, "learning_rate": 4.80098656680162e-06, "loss": 0.5112, "step": 6880 }, { "epoch": 2.701901704124475, "grad_norm": 0.47144303739327625, "learning_rate": 4.800925304876215e-06, "loss": 0.5002, "step": 6881 }, { "epoch": 2.7022968634230673, "grad_norm": 0.4780153361617559, "learning_rate": 4.800864033914173e-06, "loss": 0.5317, "step": 6882 }, { "epoch": 2.7026920227216595, "grad_norm": 0.46311336498971456, "learning_rate": 4.800802753915735e-06, "loss": 0.5098, "step": 6883 }, { "epoch": 2.7030871820202518, "grad_norm": 0.4704994698152214, "learning_rate": 4.8007414648811405e-06, "loss": 0.4842, "step": 6884 }, { "epoch": 2.703482341318844, "grad_norm": 0.44615978605663914, "learning_rate": 4.80068016681063e-06, "loss": 0.5057, "step": 6885 }, { "epoch": 2.7038775006174363, "grad_norm": 0.4582730966673267, "learning_rate": 4.800618859704445e-06, "loss": 0.4998, "step": 6886 }, { "epoch": 2.7042726599160285, "grad_norm": 0.47695840090832414, "learning_rate": 4.800557543562827e-06, "loss": 0.5027, "step": 6887 }, { "epoch": 2.7046678192146207, "grad_norm": 0.464279484547299, "learning_rate": 4.800496218386015e-06, "loss": 0.4821, "step": 6888 }, { "epoch": 2.705062978513213, "grad_norm": 0.4627589954096053, "learning_rate": 4.800434884174251e-06, "loss": 0.5229, "step": 6889 }, { "epoch": 2.705458137811805, "grad_norm": 0.46481102969321464, "learning_rate": 4.800373540927776e-06, "loss": 0.4879, "step": 6890 }, { "epoch": 2.705853297110398, "grad_norm": 0.46484595991261723, "learning_rate": 4.800312188646831e-06, "loss": 0.5025, "step": 6891 }, { "epoch": 2.70624845640899, "grad_norm": 0.4679261982088802, "learning_rate": 4.800250827331656e-06, "loss": 0.5031, "step": 6892 }, { "epoch": 2.7066436157075824, "grad_norm": 0.45277952364743757, "learning_rate": 4.800189456982492e-06, "loss": 0.5067, "step": 6893 }, { "epoch": 2.7070387750061746, "grad_norm": 0.4581955934170635, "learning_rate": 4.800128077599581e-06, "loss": 0.498, "step": 6894 }, { "epoch": 2.707433934304767, "grad_norm": 0.4728192631389551, "learning_rate": 4.800066689183164e-06, "loss": 0.5116, "step": 6895 }, { "epoch": 2.707829093603359, "grad_norm": 0.4742126911889233, "learning_rate": 4.800005291733482e-06, "loss": 0.5127, "step": 6896 }, { "epoch": 2.7082242529019513, "grad_norm": 0.4515853200904919, "learning_rate": 4.7999438852507745e-06, "loss": 0.4993, "step": 6897 }, { "epoch": 2.7086194122005436, "grad_norm": 0.4613616972466868, "learning_rate": 4.799882469735285e-06, "loss": 0.5032, "step": 6898 }, { "epoch": 2.709014571499136, "grad_norm": 0.45816883607410985, "learning_rate": 4.799821045187254e-06, "loss": 0.5137, "step": 6899 }, { "epoch": 2.709409730797728, "grad_norm": 0.44940851329013853, "learning_rate": 4.7997596116069215e-06, "loss": 0.5187, "step": 6900 }, { "epoch": 2.7098048900963203, "grad_norm": 0.4502469570380973, "learning_rate": 4.79969816899453e-06, "loss": 0.5025, "step": 6901 }, { "epoch": 2.7102000493949125, "grad_norm": 0.4481223650440485, "learning_rate": 4.799636717350321e-06, "loss": 0.4923, "step": 6902 }, { "epoch": 2.710595208693505, "grad_norm": 0.45792623854887804, "learning_rate": 4.7995752566745345e-06, "loss": 0.5043, "step": 6903 }, { "epoch": 2.710990367992097, "grad_norm": 0.4601271483289185, "learning_rate": 4.7995137869674135e-06, "loss": 0.509, "step": 6904 }, { "epoch": 2.7113855272906893, "grad_norm": 0.4476972091826272, "learning_rate": 4.799452308229199e-06, "loss": 0.5014, "step": 6905 }, { "epoch": 2.7117806865892815, "grad_norm": 0.4600909924968701, "learning_rate": 4.7993908204601315e-06, "loss": 0.5073, "step": 6906 }, { "epoch": 2.7121758458878737, "grad_norm": 0.45934839488330736, "learning_rate": 4.799329323660453e-06, "loss": 0.5173, "step": 6907 }, { "epoch": 2.712571005186466, "grad_norm": 0.45337135497372977, "learning_rate": 4.799267817830406e-06, "loss": 0.501, "step": 6908 }, { "epoch": 2.7129661644850582, "grad_norm": 0.4500761726544255, "learning_rate": 4.7992063029702304e-06, "loss": 0.5018, "step": 6909 }, { "epoch": 2.7133613237836505, "grad_norm": 0.45950279024379764, "learning_rate": 4.799144779080169e-06, "loss": 0.517, "step": 6910 }, { "epoch": 2.7137564830822427, "grad_norm": 0.4730730331136291, "learning_rate": 4.799083246160463e-06, "loss": 0.4923, "step": 6911 }, { "epoch": 2.714151642380835, "grad_norm": 0.43830507511052397, "learning_rate": 4.799021704211354e-06, "loss": 0.4934, "step": 6912 }, { "epoch": 2.714546801679427, "grad_norm": 0.48563994353988965, "learning_rate": 4.798960153233084e-06, "loss": 0.5061, "step": 6913 }, { "epoch": 2.7149419609780194, "grad_norm": 0.4792693935069234, "learning_rate": 4.798898593225894e-06, "loss": 0.5025, "step": 6914 }, { "epoch": 2.7153371202766117, "grad_norm": 0.4535898076683153, "learning_rate": 4.798837024190027e-06, "loss": 0.5132, "step": 6915 }, { "epoch": 2.715732279575204, "grad_norm": 0.47116866983420425, "learning_rate": 4.798775446125723e-06, "loss": 0.4975, "step": 6916 }, { "epoch": 2.716127438873796, "grad_norm": 0.4460352697228603, "learning_rate": 4.7987138590332264e-06, "loss": 0.5062, "step": 6917 }, { "epoch": 2.7165225981723884, "grad_norm": 0.4520031645447902, "learning_rate": 4.798652262912776e-06, "loss": 0.512, "step": 6918 }, { "epoch": 2.7169177574709806, "grad_norm": 0.44599164428046767, "learning_rate": 4.798590657764617e-06, "loss": 0.4947, "step": 6919 }, { "epoch": 2.717312916769573, "grad_norm": 0.4462324282376401, "learning_rate": 4.798529043588989e-06, "loss": 0.4997, "step": 6920 }, { "epoch": 2.717708076068165, "grad_norm": 0.4517482214846663, "learning_rate": 4.798467420386133e-06, "loss": 0.5082, "step": 6921 }, { "epoch": 2.7181032353667574, "grad_norm": 0.45386053523383296, "learning_rate": 4.798405788156295e-06, "loss": 0.4976, "step": 6922 }, { "epoch": 2.7184983946653496, "grad_norm": 0.46008987922377587, "learning_rate": 4.7983441468997134e-06, "loss": 0.493, "step": 6923 }, { "epoch": 2.718893553963942, "grad_norm": 0.4481062465022577, "learning_rate": 4.798282496616633e-06, "loss": 0.5143, "step": 6924 }, { "epoch": 2.719288713262534, "grad_norm": 0.4505251384368751, "learning_rate": 4.7982208373072936e-06, "loss": 0.5175, "step": 6925 }, { "epoch": 2.7196838725611263, "grad_norm": 0.47657228107417726, "learning_rate": 4.798159168971938e-06, "loss": 0.5243, "step": 6926 }, { "epoch": 2.7200790318597186, "grad_norm": 0.46381523563325006, "learning_rate": 4.798097491610809e-06, "loss": 0.4926, "step": 6927 }, { "epoch": 2.720474191158311, "grad_norm": 0.4925744506297022, "learning_rate": 4.798035805224149e-06, "loss": 0.4848, "step": 6928 }, { "epoch": 2.720869350456903, "grad_norm": 0.44864029022686136, "learning_rate": 4.797974109812199e-06, "loss": 0.4967, "step": 6929 }, { "epoch": 2.7212645097554953, "grad_norm": 0.49217622158783636, "learning_rate": 4.797912405375203e-06, "loss": 0.5014, "step": 6930 }, { "epoch": 2.7216596690540875, "grad_norm": 0.4551306861648084, "learning_rate": 4.797850691913402e-06, "loss": 0.5291, "step": 6931 }, { "epoch": 2.7220548283526798, "grad_norm": 0.4637807915278924, "learning_rate": 4.797788969427039e-06, "loss": 0.5012, "step": 6932 }, { "epoch": 2.722449987651272, "grad_norm": 0.4512850558545194, "learning_rate": 4.797727237916355e-06, "loss": 0.5125, "step": 6933 }, { "epoch": 2.7228451469498642, "grad_norm": 0.4594283870187807, "learning_rate": 4.7976654973815955e-06, "loss": 0.5108, "step": 6934 }, { "epoch": 2.7232403062484565, "grad_norm": 0.5129146142831824, "learning_rate": 4.797603747823e-06, "loss": 0.5151, "step": 6935 }, { "epoch": 2.7236354655470487, "grad_norm": 0.4624544070980088, "learning_rate": 4.797541989240812e-06, "loss": 0.5022, "step": 6936 }, { "epoch": 2.724030624845641, "grad_norm": 0.44163885935392466, "learning_rate": 4.797480221635276e-06, "loss": 0.4944, "step": 6937 }, { "epoch": 2.724425784144233, "grad_norm": 0.4428403204450448, "learning_rate": 4.7974184450066305e-06, "loss": 0.5108, "step": 6938 }, { "epoch": 2.7248209434428254, "grad_norm": 0.46568478000032276, "learning_rate": 4.7973566593551216e-06, "loss": 0.4976, "step": 6939 }, { "epoch": 2.7252161027414177, "grad_norm": 0.45155171800762267, "learning_rate": 4.7972948646809906e-06, "loss": 0.5149, "step": 6940 }, { "epoch": 2.72561126204001, "grad_norm": 0.46921689781602866, "learning_rate": 4.797233060984481e-06, "loss": 0.5185, "step": 6941 }, { "epoch": 2.726006421338602, "grad_norm": 0.4584753926896495, "learning_rate": 4.797171248265833e-06, "loss": 0.5043, "step": 6942 }, { "epoch": 2.7264015806371944, "grad_norm": 0.4457236150051623, "learning_rate": 4.797109426525293e-06, "loss": 0.5003, "step": 6943 }, { "epoch": 2.7267967399357866, "grad_norm": 0.4577197175651194, "learning_rate": 4.797047595763101e-06, "loss": 0.5268, "step": 6944 }, { "epoch": 2.727191899234379, "grad_norm": 0.4574229252485062, "learning_rate": 4.796985755979502e-06, "loss": 0.498, "step": 6945 }, { "epoch": 2.727587058532971, "grad_norm": 0.4596664767781495, "learning_rate": 4.796923907174737e-06, "loss": 0.5038, "step": 6946 }, { "epoch": 2.7279822178315634, "grad_norm": 0.4458255538392768, "learning_rate": 4.79686204934905e-06, "loss": 0.5137, "step": 6947 }, { "epoch": 2.7283773771301556, "grad_norm": 0.45280689074026337, "learning_rate": 4.796800182502683e-06, "loss": 0.5208, "step": 6948 }, { "epoch": 2.728772536428748, "grad_norm": 0.47554043866753837, "learning_rate": 4.7967383066358795e-06, "loss": 0.5044, "step": 6949 }, { "epoch": 2.72916769572734, "grad_norm": 0.4607340942101068, "learning_rate": 4.796676421748884e-06, "loss": 0.5169, "step": 6950 }, { "epoch": 2.7295628550259323, "grad_norm": 0.44291773525236144, "learning_rate": 4.796614527841937e-06, "loss": 0.4949, "step": 6951 }, { "epoch": 2.7299580143245246, "grad_norm": 0.46432616537422905, "learning_rate": 4.796552624915283e-06, "loss": 0.5058, "step": 6952 }, { "epoch": 2.730353173623117, "grad_norm": 0.4633027740708962, "learning_rate": 4.796490712969165e-06, "loss": 0.5167, "step": 6953 }, { "epoch": 2.730748332921709, "grad_norm": 0.5118956983442591, "learning_rate": 4.796428792003826e-06, "loss": 0.5229, "step": 6954 }, { "epoch": 2.7311434922203013, "grad_norm": 0.4543120210417002, "learning_rate": 4.796366862019508e-06, "loss": 0.4928, "step": 6955 }, { "epoch": 2.7315386515188935, "grad_norm": 0.47789407600911105, "learning_rate": 4.796304923016456e-06, "loss": 0.5188, "step": 6956 }, { "epoch": 2.7319338108174858, "grad_norm": 0.46242168805945055, "learning_rate": 4.796242974994913e-06, "loss": 0.4952, "step": 6957 }, { "epoch": 2.732328970116078, "grad_norm": 0.4566066993777903, "learning_rate": 4.796181017955122e-06, "loss": 0.5047, "step": 6958 }, { "epoch": 2.7327241294146702, "grad_norm": 0.47657476604138915, "learning_rate": 4.796119051897327e-06, "loss": 0.5012, "step": 6959 }, { "epoch": 2.7331192887132625, "grad_norm": 0.46254465859951066, "learning_rate": 4.79605707682177e-06, "loss": 0.4958, "step": 6960 }, { "epoch": 2.7335144480118547, "grad_norm": 0.45509248055911644, "learning_rate": 4.795995092728694e-06, "loss": 0.5063, "step": 6961 }, { "epoch": 2.733909607310447, "grad_norm": 0.4460935553959142, "learning_rate": 4.795933099618344e-06, "loss": 0.5105, "step": 6962 }, { "epoch": 2.734304766609039, "grad_norm": 0.4477458914794227, "learning_rate": 4.795871097490964e-06, "loss": 0.5147, "step": 6963 }, { "epoch": 2.7346999259076314, "grad_norm": 0.4423880972213776, "learning_rate": 4.795809086346796e-06, "loss": 0.5089, "step": 6964 }, { "epoch": 2.7350950852062237, "grad_norm": 0.4391301789018041, "learning_rate": 4.795747066186083e-06, "loss": 0.5023, "step": 6965 }, { "epoch": 2.735490244504816, "grad_norm": 0.45270875027903384, "learning_rate": 4.79568503700907e-06, "loss": 0.5027, "step": 6966 }, { "epoch": 2.735885403803408, "grad_norm": 0.4548394809042717, "learning_rate": 4.795622998816001e-06, "loss": 0.4998, "step": 6967 }, { "epoch": 2.7362805631020004, "grad_norm": 0.4500140263131528, "learning_rate": 4.795560951607118e-06, "loss": 0.4956, "step": 6968 }, { "epoch": 2.7366757224005926, "grad_norm": 0.45444549472429036, "learning_rate": 4.795498895382667e-06, "loss": 0.4999, "step": 6969 }, { "epoch": 2.737070881699185, "grad_norm": 0.45943067355398565, "learning_rate": 4.795436830142888e-06, "loss": 0.5079, "step": 6970 }, { "epoch": 2.737466040997777, "grad_norm": 0.4583842097104527, "learning_rate": 4.795374755888028e-06, "loss": 0.5035, "step": 6971 }, { "epoch": 2.7378612002963694, "grad_norm": 0.44524024874479756, "learning_rate": 4.7953126726183305e-06, "loss": 0.4885, "step": 6972 }, { "epoch": 2.7382563595949616, "grad_norm": 0.5259627464540994, "learning_rate": 4.795250580334038e-06, "loss": 0.5078, "step": 6973 }, { "epoch": 2.738651518893554, "grad_norm": 0.44629367850165574, "learning_rate": 4.795188479035395e-06, "loss": 0.5132, "step": 6974 }, { "epoch": 2.739046678192146, "grad_norm": 0.4738145104107334, "learning_rate": 4.7951263687226444e-06, "loss": 0.5144, "step": 6975 }, { "epoch": 2.7394418374907383, "grad_norm": 0.4536390577628937, "learning_rate": 4.795064249396032e-06, "loss": 0.4981, "step": 6976 }, { "epoch": 2.7398369967893306, "grad_norm": 0.48887491920473725, "learning_rate": 4.795002121055802e-06, "loss": 0.529, "step": 6977 }, { "epoch": 2.740232156087923, "grad_norm": 0.45703040398721106, "learning_rate": 4.794939983702196e-06, "loss": 0.4995, "step": 6978 }, { "epoch": 2.740627315386515, "grad_norm": 0.4688231922181613, "learning_rate": 4.7948778373354585e-06, "loss": 0.5007, "step": 6979 }, { "epoch": 2.7410224746851073, "grad_norm": 0.4466851138075401, "learning_rate": 4.794815681955836e-06, "loss": 0.496, "step": 6980 }, { "epoch": 2.7414176339836995, "grad_norm": 0.4893848636599617, "learning_rate": 4.79475351756357e-06, "loss": 0.5012, "step": 6981 }, { "epoch": 2.7418127932822918, "grad_norm": 0.4883005520320106, "learning_rate": 4.794691344158906e-06, "loss": 0.5085, "step": 6982 }, { "epoch": 2.742207952580884, "grad_norm": 0.45293984493757544, "learning_rate": 4.794629161742088e-06, "loss": 0.5008, "step": 6983 }, { "epoch": 2.7426031118794763, "grad_norm": 0.5501230497941952, "learning_rate": 4.79456697031336e-06, "loss": 0.5128, "step": 6984 }, { "epoch": 2.7429982711780685, "grad_norm": 0.4639739628186388, "learning_rate": 4.794504769872966e-06, "loss": 0.5169, "step": 6985 }, { "epoch": 2.7433934304766607, "grad_norm": 0.4576900039164546, "learning_rate": 4.794442560421151e-06, "loss": 0.4819, "step": 6986 }, { "epoch": 2.743788589775253, "grad_norm": 0.4954297664525487, "learning_rate": 4.794380341958158e-06, "loss": 0.5166, "step": 6987 }, { "epoch": 2.744183749073845, "grad_norm": 0.46126145279470626, "learning_rate": 4.794318114484233e-06, "loss": 0.5118, "step": 6988 }, { "epoch": 2.7445789083724375, "grad_norm": 0.4584209830526114, "learning_rate": 4.79425587799962e-06, "loss": 0.5086, "step": 6989 }, { "epoch": 2.7449740676710297, "grad_norm": 0.4831191153527967, "learning_rate": 4.794193632504561e-06, "loss": 0.5441, "step": 6990 }, { "epoch": 2.745369226969622, "grad_norm": 0.4797130021650316, "learning_rate": 4.794131377999305e-06, "loss": 0.5279, "step": 6991 }, { "epoch": 2.745764386268214, "grad_norm": 0.4699057790970996, "learning_rate": 4.794069114484092e-06, "loss": 0.5147, "step": 6992 }, { "epoch": 2.7461595455668064, "grad_norm": 0.46046778025908625, "learning_rate": 4.79400684195917e-06, "loss": 0.5086, "step": 6993 }, { "epoch": 2.7465547048653987, "grad_norm": 0.4383439181821961, "learning_rate": 4.793944560424782e-06, "loss": 0.4921, "step": 6994 }, { "epoch": 2.746949864163991, "grad_norm": 0.4643419519593291, "learning_rate": 4.7938822698811725e-06, "loss": 0.5024, "step": 6995 }, { "epoch": 2.747345023462583, "grad_norm": 0.45885771648686097, "learning_rate": 4.793819970328586e-06, "loss": 0.4996, "step": 6996 }, { "epoch": 2.7477401827611754, "grad_norm": 0.47670938755540426, "learning_rate": 4.793757661767268e-06, "loss": 0.5024, "step": 6997 }, { "epoch": 2.7481353420597676, "grad_norm": 0.45854522700324046, "learning_rate": 4.7936953441974624e-06, "loss": 0.4988, "step": 6998 }, { "epoch": 2.74853050135836, "grad_norm": 0.4518882922405723, "learning_rate": 4.793633017619415e-06, "loss": 0.5026, "step": 6999 }, { "epoch": 2.748925660656952, "grad_norm": 0.4721585044201508, "learning_rate": 4.793570682033368e-06, "loss": 0.5066, "step": 7000 }, { "epoch": 2.7493208199555443, "grad_norm": 0.45566875735908363, "learning_rate": 4.7935083374395694e-06, "loss": 0.4957, "step": 7001 }, { "epoch": 2.7497159792541366, "grad_norm": 0.46827293594382546, "learning_rate": 4.793445983838263e-06, "loss": 0.5161, "step": 7002 }, { "epoch": 2.750111138552729, "grad_norm": 0.45535123480528633, "learning_rate": 4.793383621229694e-06, "loss": 0.4846, "step": 7003 }, { "epoch": 2.750506297851321, "grad_norm": 0.4640975956499678, "learning_rate": 4.7933212496141055e-06, "loss": 0.5196, "step": 7004 }, { "epoch": 2.7509014571499133, "grad_norm": 0.45391152016709185, "learning_rate": 4.793258868991743e-06, "loss": 0.5044, "step": 7005 }, { "epoch": 2.7512966164485055, "grad_norm": 0.456017711396762, "learning_rate": 4.793196479362854e-06, "loss": 0.5073, "step": 7006 }, { "epoch": 2.751691775747098, "grad_norm": 0.457250650843779, "learning_rate": 4.793134080727682e-06, "loss": 0.5072, "step": 7007 }, { "epoch": 2.75208693504569, "grad_norm": 0.5241068945571156, "learning_rate": 4.79307167308647e-06, "loss": 0.5143, "step": 7008 }, { "epoch": 2.7524820943442827, "grad_norm": 0.44978349825521247, "learning_rate": 4.793009256439466e-06, "loss": 0.491, "step": 7009 }, { "epoch": 2.752877253642875, "grad_norm": 0.445219206652602, "learning_rate": 4.792946830786914e-06, "loss": 0.4946, "step": 7010 }, { "epoch": 2.753272412941467, "grad_norm": 0.4728591513095926, "learning_rate": 4.792884396129059e-06, "loss": 0.5342, "step": 7011 }, { "epoch": 2.7536675722400594, "grad_norm": 0.4494210336181794, "learning_rate": 4.792821952466146e-06, "loss": 0.4868, "step": 7012 }, { "epoch": 2.7540627315386517, "grad_norm": 0.46547211537802097, "learning_rate": 4.7927594997984215e-06, "loss": 0.491, "step": 7013 }, { "epoch": 2.754457890837244, "grad_norm": 0.47364471713178513, "learning_rate": 4.7926970381261295e-06, "loss": 0.506, "step": 7014 }, { "epoch": 2.754853050135836, "grad_norm": 0.4508628675635779, "learning_rate": 4.7926345674495155e-06, "loss": 0.5066, "step": 7015 }, { "epoch": 2.7552482094344284, "grad_norm": 0.47245052795730946, "learning_rate": 4.792572087768825e-06, "loss": 0.509, "step": 7016 }, { "epoch": 2.7556433687330206, "grad_norm": 0.49318688651877063, "learning_rate": 4.792509599084304e-06, "loss": 0.4953, "step": 7017 }, { "epoch": 2.756038528031613, "grad_norm": 0.45637468572032197, "learning_rate": 4.792447101396197e-06, "loss": 0.5149, "step": 7018 }, { "epoch": 2.756433687330205, "grad_norm": 0.4682354739864325, "learning_rate": 4.79238459470475e-06, "loss": 0.5186, "step": 7019 }, { "epoch": 2.7568288466287973, "grad_norm": 0.4568829125381733, "learning_rate": 4.7923220790102084e-06, "loss": 0.5055, "step": 7020 }, { "epoch": 2.7572240059273896, "grad_norm": 0.45180194260707746, "learning_rate": 4.792259554312817e-06, "loss": 0.4921, "step": 7021 }, { "epoch": 2.757619165225982, "grad_norm": 0.4668813422184499, "learning_rate": 4.7921970206128235e-06, "loss": 0.5301, "step": 7022 }, { "epoch": 2.758014324524574, "grad_norm": 0.4487332150132883, "learning_rate": 4.7921344779104705e-06, "loss": 0.5031, "step": 7023 }, { "epoch": 2.7584094838231663, "grad_norm": 0.45510934399651354, "learning_rate": 4.7920719262060055e-06, "loss": 0.5386, "step": 7024 }, { "epoch": 2.7588046431217585, "grad_norm": 0.44723741022980124, "learning_rate": 4.792009365499674e-06, "loss": 0.5155, "step": 7025 }, { "epoch": 2.759199802420351, "grad_norm": 0.4504980939404593, "learning_rate": 4.791946795791721e-06, "loss": 0.5161, "step": 7026 }, { "epoch": 2.759594961718943, "grad_norm": 0.44795581514294985, "learning_rate": 4.791884217082394e-06, "loss": 0.5015, "step": 7027 }, { "epoch": 2.7599901210175353, "grad_norm": 0.45748558259217437, "learning_rate": 4.791821629371936e-06, "loss": 0.5038, "step": 7028 }, { "epoch": 2.7603852803161275, "grad_norm": 0.47531123398447556, "learning_rate": 4.791759032660596e-06, "loss": 0.4986, "step": 7029 }, { "epoch": 2.7607804396147198, "grad_norm": 0.46673985312016447, "learning_rate": 4.7916964269486165e-06, "loss": 0.4956, "step": 7030 }, { "epoch": 2.761175598913312, "grad_norm": 0.4629491572931826, "learning_rate": 4.791633812236245e-06, "loss": 0.4946, "step": 7031 }, { "epoch": 2.7615707582119042, "grad_norm": 0.4525334948265311, "learning_rate": 4.791571188523729e-06, "loss": 0.5164, "step": 7032 }, { "epoch": 2.7619659175104965, "grad_norm": 0.45400679108436304, "learning_rate": 4.7915085558113115e-06, "loss": 0.4999, "step": 7033 }, { "epoch": 2.7623610768090887, "grad_norm": 0.47943008845611956, "learning_rate": 4.791445914099241e-06, "loss": 0.5032, "step": 7034 }, { "epoch": 2.762756236107681, "grad_norm": 0.4452226257043686, "learning_rate": 4.791383263387761e-06, "loss": 0.501, "step": 7035 }, { "epoch": 2.763151395406273, "grad_norm": 0.48784364857172546, "learning_rate": 4.7913206036771195e-06, "loss": 0.5172, "step": 7036 }, { "epoch": 2.7635465547048654, "grad_norm": 0.47792955328117803, "learning_rate": 4.791257934967563e-06, "loss": 0.5341, "step": 7037 }, { "epoch": 2.7639417140034577, "grad_norm": 0.4432418586956085, "learning_rate": 4.791195257259335e-06, "loss": 0.4891, "step": 7038 }, { "epoch": 2.76433687330205, "grad_norm": 0.4712175445574134, "learning_rate": 4.791132570552685e-06, "loss": 0.4999, "step": 7039 }, { "epoch": 2.764732032600642, "grad_norm": 0.4439252343190045, "learning_rate": 4.791069874847857e-06, "loss": 0.5085, "step": 7040 }, { "epoch": 2.7651271918992344, "grad_norm": 0.5317426037392942, "learning_rate": 4.791007170145097e-06, "loss": 0.5129, "step": 7041 }, { "epoch": 2.7655223511978266, "grad_norm": 0.4587796397905756, "learning_rate": 4.790944456444653e-06, "loss": 0.5116, "step": 7042 }, { "epoch": 2.765917510496419, "grad_norm": 0.4797852387845019, "learning_rate": 4.7908817337467695e-06, "loss": 0.5254, "step": 7043 }, { "epoch": 2.766312669795011, "grad_norm": 0.4558663094889101, "learning_rate": 4.790819002051694e-06, "loss": 0.5199, "step": 7044 }, { "epoch": 2.7667078290936034, "grad_norm": 0.44832319567068757, "learning_rate": 4.790756261359673e-06, "loss": 0.515, "step": 7045 }, { "epoch": 2.7671029883921956, "grad_norm": 0.4482969941450535, "learning_rate": 4.7906935116709505e-06, "loss": 0.5046, "step": 7046 }, { "epoch": 2.767498147690788, "grad_norm": 0.4559545857856152, "learning_rate": 4.790630752985776e-06, "loss": 0.4937, "step": 7047 }, { "epoch": 2.76789330698938, "grad_norm": 0.44267716011895897, "learning_rate": 4.790567985304396e-06, "loss": 0.508, "step": 7048 }, { "epoch": 2.7682884662879723, "grad_norm": 0.464044432217731, "learning_rate": 4.790505208627055e-06, "loss": 0.5148, "step": 7049 }, { "epoch": 2.7686836255865646, "grad_norm": 0.4467116038672829, "learning_rate": 4.790442422954e-06, "loss": 0.498, "step": 7050 }, { "epoch": 2.769078784885157, "grad_norm": 0.4798202007038068, "learning_rate": 4.790379628285479e-06, "loss": 0.5087, "step": 7051 }, { "epoch": 2.769473944183749, "grad_norm": 0.44503110439284255, "learning_rate": 4.790316824621736e-06, "loss": 0.5137, "step": 7052 }, { "epoch": 2.7698691034823413, "grad_norm": 0.46457729311037305, "learning_rate": 4.79025401196302e-06, "loss": 0.5195, "step": 7053 }, { "epoch": 2.7702642627809335, "grad_norm": 0.44066547002420625, "learning_rate": 4.790191190309578e-06, "loss": 0.4925, "step": 7054 }, { "epoch": 2.7706594220795258, "grad_norm": 0.45430470319634064, "learning_rate": 4.790128359661654e-06, "loss": 0.495, "step": 7055 }, { "epoch": 2.771054581378118, "grad_norm": 0.4414603456774517, "learning_rate": 4.790065520019498e-06, "loss": 0.5038, "step": 7056 }, { "epoch": 2.7714497406767102, "grad_norm": 0.4922018456920078, "learning_rate": 4.790002671383354e-06, "loss": 0.5155, "step": 7057 }, { "epoch": 2.7718448999753025, "grad_norm": 0.453999025325055, "learning_rate": 4.789939813753471e-06, "loss": 0.5008, "step": 7058 }, { "epoch": 2.7722400592738947, "grad_norm": 0.4617428583463696, "learning_rate": 4.789876947130095e-06, "loss": 0.4974, "step": 7059 }, { "epoch": 2.772635218572487, "grad_norm": 0.464515686848104, "learning_rate": 4.789814071513472e-06, "loss": 0.514, "step": 7060 }, { "epoch": 2.773030377871079, "grad_norm": 0.44904340222359973, "learning_rate": 4.78975118690385e-06, "loss": 0.5063, "step": 7061 }, { "epoch": 2.7734255371696714, "grad_norm": 0.44818575851748277, "learning_rate": 4.789688293301477e-06, "loss": 0.5114, "step": 7062 }, { "epoch": 2.7738206964682637, "grad_norm": 0.4394864938402016, "learning_rate": 4.789625390706597e-06, "loss": 0.4861, "step": 7063 }, { "epoch": 2.774215855766856, "grad_norm": 0.4476957402660163, "learning_rate": 4.789562479119459e-06, "loss": 0.5039, "step": 7064 }, { "epoch": 2.774611015065448, "grad_norm": 0.44866741905501123, "learning_rate": 4.789499558540311e-06, "loss": 0.4856, "step": 7065 }, { "epoch": 2.7750061743640404, "grad_norm": 0.4657641738772069, "learning_rate": 4.7894366289693984e-06, "loss": 0.4909, "step": 7066 }, { "epoch": 2.7754013336626326, "grad_norm": 0.43895805450614483, "learning_rate": 4.789373690406969e-06, "loss": 0.4977, "step": 7067 }, { "epoch": 2.775796492961225, "grad_norm": 0.45870187996681017, "learning_rate": 4.789310742853269e-06, "loss": 0.4998, "step": 7068 }, { "epoch": 2.776191652259817, "grad_norm": 0.45232602161964686, "learning_rate": 4.789247786308548e-06, "loss": 0.5082, "step": 7069 }, { "epoch": 2.7765868115584094, "grad_norm": 0.4533454018434586, "learning_rate": 4.789184820773052e-06, "loss": 0.4736, "step": 7070 }, { "epoch": 2.7769819708570016, "grad_norm": 0.4473360662431684, "learning_rate": 4.7891218462470264e-06, "loss": 0.4908, "step": 7071 }, { "epoch": 2.777377130155594, "grad_norm": 0.4682252863668288, "learning_rate": 4.7890588627307214e-06, "loss": 0.5016, "step": 7072 }, { "epoch": 2.777772289454186, "grad_norm": 0.4457755613086908, "learning_rate": 4.788995870224382e-06, "loss": 0.5183, "step": 7073 }, { "epoch": 2.7781674487527783, "grad_norm": 0.46757695064672683, "learning_rate": 4.788932868728258e-06, "loss": 0.514, "step": 7074 }, { "epoch": 2.7785626080513706, "grad_norm": 0.4538274673145632, "learning_rate": 4.788869858242595e-06, "loss": 0.5092, "step": 7075 }, { "epoch": 2.778957767349963, "grad_norm": 0.4561259698590066, "learning_rate": 4.788806838767642e-06, "loss": 0.5331, "step": 7076 }, { "epoch": 2.779352926648555, "grad_norm": 0.4589851990064589, "learning_rate": 4.788743810303644e-06, "loss": 0.5087, "step": 7077 }, { "epoch": 2.7797480859471473, "grad_norm": 0.4587981064535018, "learning_rate": 4.788680772850852e-06, "loss": 0.5064, "step": 7078 }, { "epoch": 2.7801432452457395, "grad_norm": 0.4495946809709087, "learning_rate": 4.78861772640951e-06, "loss": 0.5094, "step": 7079 }, { "epoch": 2.780538404544332, "grad_norm": 0.4687277276868725, "learning_rate": 4.788554670979868e-06, "loss": 0.4998, "step": 7080 }, { "epoch": 2.7809335638429244, "grad_norm": 0.4624450226026877, "learning_rate": 4.7884916065621735e-06, "loss": 0.5357, "step": 7081 }, { "epoch": 2.7813287231415167, "grad_norm": 0.44737808383227995, "learning_rate": 4.788428533156673e-06, "loss": 0.5085, "step": 7082 }, { "epoch": 2.781723882440109, "grad_norm": 0.4435365459154493, "learning_rate": 4.788365450763614e-06, "loss": 0.4855, "step": 7083 }, { "epoch": 2.782119041738701, "grad_norm": 0.47758039605363733, "learning_rate": 4.788302359383247e-06, "loss": 0.5147, "step": 7084 }, { "epoch": 2.7825142010372934, "grad_norm": 0.47467035291327786, "learning_rate": 4.788239259015817e-06, "loss": 0.5085, "step": 7085 }, { "epoch": 2.7829093603358857, "grad_norm": 0.44586423467559405, "learning_rate": 4.788176149661572e-06, "loss": 0.4858, "step": 7086 }, { "epoch": 2.783304519634478, "grad_norm": 0.45852446260824725, "learning_rate": 4.7881130313207615e-06, "loss": 0.4946, "step": 7087 }, { "epoch": 2.78369967893307, "grad_norm": 0.4608076484794306, "learning_rate": 4.7880499039936315e-06, "loss": 0.5079, "step": 7088 }, { "epoch": 2.7840948382316624, "grad_norm": 0.4653013987128913, "learning_rate": 4.787986767680431e-06, "loss": 0.5409, "step": 7089 }, { "epoch": 2.7844899975302546, "grad_norm": 0.4563060161643037, "learning_rate": 4.787923622381409e-06, "loss": 0.4793, "step": 7090 }, { "epoch": 2.784885156828847, "grad_norm": 0.474596234725646, "learning_rate": 4.787860468096811e-06, "loss": 0.5247, "step": 7091 }, { "epoch": 2.785280316127439, "grad_norm": 0.4340035243691619, "learning_rate": 4.787797304826887e-06, "loss": 0.5038, "step": 7092 }, { "epoch": 2.7856754754260313, "grad_norm": 0.45248266773447615, "learning_rate": 4.787734132571884e-06, "loss": 0.5287, "step": 7093 }, { "epoch": 2.7860706347246236, "grad_norm": 0.45357497159528226, "learning_rate": 4.7876709513320506e-06, "loss": 0.4866, "step": 7094 }, { "epoch": 2.786465794023216, "grad_norm": 0.471725493815604, "learning_rate": 4.787607761107634e-06, "loss": 0.509, "step": 7095 }, { "epoch": 2.786860953321808, "grad_norm": 0.4550489319224383, "learning_rate": 4.7875445618988846e-06, "loss": 0.5069, "step": 7096 }, { "epoch": 2.7872561126204003, "grad_norm": 0.4480112491649345, "learning_rate": 4.787481353706049e-06, "loss": 0.5214, "step": 7097 }, { "epoch": 2.7876512719189925, "grad_norm": 0.46020685458924593, "learning_rate": 4.787418136529376e-06, "loss": 0.4913, "step": 7098 }, { "epoch": 2.7880464312175848, "grad_norm": 0.47097392767674306, "learning_rate": 4.787354910369113e-06, "loss": 0.5046, "step": 7099 }, { "epoch": 2.788441590516177, "grad_norm": 0.6635008391885756, "learning_rate": 4.787291675225508e-06, "loss": 0.5148, "step": 7100 }, { "epoch": 2.7888367498147693, "grad_norm": 0.4483283359174607, "learning_rate": 4.7872284310988115e-06, "loss": 0.4828, "step": 7101 }, { "epoch": 2.7892319091133615, "grad_norm": 0.45492414146412075, "learning_rate": 4.78716517798927e-06, "loss": 0.493, "step": 7102 }, { "epoch": 2.7896270684119537, "grad_norm": 0.46950627047824917, "learning_rate": 4.787101915897133e-06, "loss": 0.5081, "step": 7103 }, { "epoch": 2.790022227710546, "grad_norm": 0.46505419717794555, "learning_rate": 4.787038644822649e-06, "loss": 0.5116, "step": 7104 }, { "epoch": 2.790417387009138, "grad_norm": 0.4544759503964953, "learning_rate": 4.786975364766064e-06, "loss": 0.5137, "step": 7105 }, { "epoch": 2.7908125463077305, "grad_norm": 0.4615992806036096, "learning_rate": 4.786912075727631e-06, "loss": 0.5141, "step": 7106 }, { "epoch": 2.7912077056063227, "grad_norm": 0.44988730317410236, "learning_rate": 4.786848777707594e-06, "loss": 0.5142, "step": 7107 }, { "epoch": 2.791602864904915, "grad_norm": 0.46645509824578013, "learning_rate": 4.786785470706204e-06, "loss": 0.5091, "step": 7108 }, { "epoch": 2.791998024203507, "grad_norm": 0.4714405835689825, "learning_rate": 4.78672215472371e-06, "loss": 0.5221, "step": 7109 }, { "epoch": 2.7923931835020994, "grad_norm": 0.4614676395031843, "learning_rate": 4.78665882976036e-06, "loss": 0.5036, "step": 7110 }, { "epoch": 2.7927883428006917, "grad_norm": 0.45921742192416665, "learning_rate": 4.786595495816402e-06, "loss": 0.5073, "step": 7111 }, { "epoch": 2.793183502099284, "grad_norm": 0.46441158320725995, "learning_rate": 4.786532152892086e-06, "loss": 0.5343, "step": 7112 }, { "epoch": 2.793578661397876, "grad_norm": 0.4473283825529127, "learning_rate": 4.78646880098766e-06, "loss": 0.4916, "step": 7113 }, { "epoch": 2.7939738206964684, "grad_norm": 0.4565617472568001, "learning_rate": 4.786405440103372e-06, "loss": 0.4896, "step": 7114 }, { "epoch": 2.7943689799950606, "grad_norm": 0.4484189292638863, "learning_rate": 4.786342070239473e-06, "loss": 0.4876, "step": 7115 }, { "epoch": 2.794764139293653, "grad_norm": 0.4561057568670873, "learning_rate": 4.78627869139621e-06, "loss": 0.5226, "step": 7116 }, { "epoch": 2.795159298592245, "grad_norm": 0.45392026125118257, "learning_rate": 4.786215303573834e-06, "loss": 0.5162, "step": 7117 }, { "epoch": 2.7955544578908373, "grad_norm": 0.44001406352495115, "learning_rate": 4.7861519067725904e-06, "loss": 0.5084, "step": 7118 }, { "epoch": 2.7959496171894296, "grad_norm": 0.45328763812652456, "learning_rate": 4.786088500992732e-06, "loss": 0.5035, "step": 7119 }, { "epoch": 2.796344776488022, "grad_norm": 0.47975356308620704, "learning_rate": 4.786025086234505e-06, "loss": 0.5178, "step": 7120 }, { "epoch": 2.796739935786614, "grad_norm": 0.4358024295607447, "learning_rate": 4.78596166249816e-06, "loss": 0.508, "step": 7121 }, { "epoch": 2.7971350950852063, "grad_norm": 0.45560496266499945, "learning_rate": 4.785898229783946e-06, "loss": 0.522, "step": 7122 }, { "epoch": 2.7975302543837985, "grad_norm": 0.4493585711669279, "learning_rate": 4.785834788092112e-06, "loss": 0.5086, "step": 7123 }, { "epoch": 2.797925413682391, "grad_norm": 0.4518530962962152, "learning_rate": 4.785771337422906e-06, "loss": 0.5163, "step": 7124 }, { "epoch": 2.798320572980983, "grad_norm": 0.44724673545538657, "learning_rate": 4.7857078777765796e-06, "loss": 0.5082, "step": 7125 }, { "epoch": 2.7987157322795753, "grad_norm": 0.46761704301589785, "learning_rate": 4.785644409153379e-06, "loss": 0.514, "step": 7126 }, { "epoch": 2.7991108915781675, "grad_norm": 0.4533222753692102, "learning_rate": 4.785580931553556e-06, "loss": 0.5179, "step": 7127 }, { "epoch": 2.7995060508767597, "grad_norm": 0.4559014721619301, "learning_rate": 4.7855174449773595e-06, "loss": 0.508, "step": 7128 }, { "epoch": 2.799901210175352, "grad_norm": 0.4677071703771446, "learning_rate": 4.785453949425038e-06, "loss": 0.5001, "step": 7129 }, { "epoch": 2.8002963694739442, "grad_norm": 0.4786331564258494, "learning_rate": 4.785390444896841e-06, "loss": 0.5206, "step": 7130 }, { "epoch": 2.8006915287725365, "grad_norm": 0.4518046203867029, "learning_rate": 4.7853269313930175e-06, "loss": 0.5311, "step": 7131 }, { "epoch": 2.8010866880711287, "grad_norm": 0.4493408611983403, "learning_rate": 4.785263408913818e-06, "loss": 0.5078, "step": 7132 }, { "epoch": 2.801481847369721, "grad_norm": 0.4413293231411184, "learning_rate": 4.7851998774594915e-06, "loss": 0.5015, "step": 7133 }, { "epoch": 2.801877006668313, "grad_norm": 0.4405329418854599, "learning_rate": 4.7851363370302875e-06, "loss": 0.4971, "step": 7134 }, { "epoch": 2.8022721659669054, "grad_norm": 0.47861777339015044, "learning_rate": 4.785072787626456e-06, "loss": 0.505, "step": 7135 }, { "epoch": 2.8026673252654977, "grad_norm": 0.4540541349102917, "learning_rate": 4.785009229248246e-06, "loss": 0.5346, "step": 7136 }, { "epoch": 2.80306248456409, "grad_norm": 0.4660733299034907, "learning_rate": 4.784945661895907e-06, "loss": 0.5054, "step": 7137 }, { "epoch": 2.803457643862682, "grad_norm": 0.454650518694851, "learning_rate": 4.784882085569689e-06, "loss": 0.5111, "step": 7138 }, { "epoch": 2.8038528031612744, "grad_norm": 2.699233093548558, "learning_rate": 4.784818500269842e-06, "loss": 0.512, "step": 7139 }, { "epoch": 2.8042479624598666, "grad_norm": 0.44347795457544964, "learning_rate": 4.7847549059966144e-06, "loss": 0.509, "step": 7140 }, { "epoch": 2.804643121758459, "grad_norm": 0.44865460293087767, "learning_rate": 4.784691302750257e-06, "loss": 0.4965, "step": 7141 }, { "epoch": 2.805038281057051, "grad_norm": 0.44186758621797884, "learning_rate": 4.78462769053102e-06, "loss": 0.4935, "step": 7142 }, { "epoch": 2.8054334403556433, "grad_norm": 0.4690246679204338, "learning_rate": 4.784564069339154e-06, "loss": 0.4998, "step": 7143 }, { "epoch": 2.8058285996542356, "grad_norm": 0.4413211990503885, "learning_rate": 4.7845004391749065e-06, "loss": 0.5136, "step": 7144 }, { "epoch": 2.806223758952828, "grad_norm": 0.4477276180507849, "learning_rate": 4.784436800038528e-06, "loss": 0.4832, "step": 7145 }, { "epoch": 2.80661891825142, "grad_norm": 0.4624922297012007, "learning_rate": 4.784373151930269e-06, "loss": 0.5086, "step": 7146 }, { "epoch": 2.8070140775500123, "grad_norm": 0.452716126237242, "learning_rate": 4.78430949485038e-06, "loss": 0.5171, "step": 7147 }, { "epoch": 2.8074092368486046, "grad_norm": 0.4531177548368098, "learning_rate": 4.78424582879911e-06, "loss": 0.5133, "step": 7148 }, { "epoch": 2.807804396147197, "grad_norm": 0.45184974430917935, "learning_rate": 4.7841821537767095e-06, "loss": 0.5062, "step": 7149 }, { "epoch": 2.808199555445789, "grad_norm": 0.45422086326819416, "learning_rate": 4.784118469783429e-06, "loss": 0.5039, "step": 7150 }, { "epoch": 2.8085947147443813, "grad_norm": 0.4253947576601844, "learning_rate": 4.784054776819517e-06, "loss": 0.4924, "step": 7151 }, { "epoch": 2.8089898740429735, "grad_norm": 0.4578816649664364, "learning_rate": 4.7839910748852255e-06, "loss": 0.5276, "step": 7152 }, { "epoch": 2.8093850333415658, "grad_norm": 0.4483518419377659, "learning_rate": 4.7839273639808035e-06, "loss": 0.4853, "step": 7153 }, { "epoch": 2.809780192640158, "grad_norm": 0.46266492861443637, "learning_rate": 4.783863644106502e-06, "loss": 0.5165, "step": 7154 }, { "epoch": 2.8101753519387502, "grad_norm": 0.45201465369049515, "learning_rate": 4.783799915262571e-06, "loss": 0.5112, "step": 7155 }, { "epoch": 2.8105705112373425, "grad_norm": 0.47492599053950296, "learning_rate": 4.783736177449262e-06, "loss": 0.5106, "step": 7156 }, { "epoch": 2.8109656705359347, "grad_norm": 0.46952297692513184, "learning_rate": 4.783672430666822e-06, "loss": 0.5118, "step": 7157 }, { "epoch": 2.811360829834527, "grad_norm": 0.45235950979008643, "learning_rate": 4.783608674915505e-06, "loss": 0.5173, "step": 7158 }, { "epoch": 2.811755989133119, "grad_norm": 0.4900581695225324, "learning_rate": 4.783544910195559e-06, "loss": 0.506, "step": 7159 }, { "epoch": 2.8121511484317114, "grad_norm": 0.4777901985820845, "learning_rate": 4.783481136507236e-06, "loss": 0.5085, "step": 7160 }, { "epoch": 2.8125463077303037, "grad_norm": 0.5041776097941606, "learning_rate": 4.783417353850785e-06, "loss": 0.5097, "step": 7161 }, { "epoch": 2.812941467028896, "grad_norm": 0.45039977578331913, "learning_rate": 4.7833535622264565e-06, "loss": 0.4931, "step": 7162 }, { "epoch": 2.813336626327488, "grad_norm": 0.46033667197541245, "learning_rate": 4.783289761634502e-06, "loss": 0.5218, "step": 7163 }, { "epoch": 2.8137317856260804, "grad_norm": 0.5045096295509437, "learning_rate": 4.783225952075173e-06, "loss": 0.506, "step": 7164 }, { "epoch": 2.8141269449246726, "grad_norm": 0.46808375423952775, "learning_rate": 4.783162133548718e-06, "loss": 0.5239, "step": 7165 }, { "epoch": 2.814522104223265, "grad_norm": 0.47727715778274427, "learning_rate": 4.783098306055389e-06, "loss": 0.5242, "step": 7166 }, { "epoch": 2.814917263521857, "grad_norm": 0.49741116135472996, "learning_rate": 4.7830344695954356e-06, "loss": 0.5105, "step": 7167 }, { "epoch": 2.8153124228204494, "grad_norm": 0.4765653183745411, "learning_rate": 4.78297062416911e-06, "loss": 0.5158, "step": 7168 }, { "epoch": 2.8157075821190416, "grad_norm": 0.441604632522944, "learning_rate": 4.782906769776661e-06, "loss": 0.4992, "step": 7169 }, { "epoch": 2.816102741417634, "grad_norm": 0.7388460812324761, "learning_rate": 4.782842906418341e-06, "loss": 0.5187, "step": 7170 }, { "epoch": 2.816497900716226, "grad_norm": 0.4647214598823728, "learning_rate": 4.7827790340944e-06, "loss": 0.5028, "step": 7171 }, { "epoch": 2.8168930600148183, "grad_norm": 0.46169878022399585, "learning_rate": 4.7827151528050894e-06, "loss": 0.5102, "step": 7172 }, { "epoch": 2.8172882193134106, "grad_norm": 0.46029778083095246, "learning_rate": 4.782651262550661e-06, "loss": 0.51, "step": 7173 }, { "epoch": 2.817683378612003, "grad_norm": 0.45078052262587254, "learning_rate": 4.782587363331363e-06, "loss": 0.5207, "step": 7174 }, { "epoch": 2.818078537910595, "grad_norm": 0.46397038369117694, "learning_rate": 4.782523455147448e-06, "loss": 0.5054, "step": 7175 }, { "epoch": 2.8184736972091873, "grad_norm": 0.45332697701497565, "learning_rate": 4.782459537999168e-06, "loss": 0.4939, "step": 7176 }, { "epoch": 2.8188688565077795, "grad_norm": 0.45625065796075703, "learning_rate": 4.782395611886771e-06, "loss": 0.5069, "step": 7177 }, { "epoch": 2.8192640158063718, "grad_norm": 0.45551406905446623, "learning_rate": 4.7823316768105115e-06, "loss": 0.5109, "step": 7178 }, { "epoch": 2.819659175104964, "grad_norm": 0.46505352472091316, "learning_rate": 4.782267732770639e-06, "loss": 0.5063, "step": 7179 }, { "epoch": 2.8200543344035562, "grad_norm": 0.46864702173796885, "learning_rate": 4.782203779767404e-06, "loss": 0.4998, "step": 7180 }, { "epoch": 2.8204494937021485, "grad_norm": 0.44137492397186884, "learning_rate": 4.782139817801059e-06, "loss": 0.5154, "step": 7181 }, { "epoch": 2.8208446530007407, "grad_norm": 0.457276576966862, "learning_rate": 4.782075846871855e-06, "loss": 0.506, "step": 7182 }, { "epoch": 2.821239812299333, "grad_norm": 0.4617777248102637, "learning_rate": 4.782011866980042e-06, "loss": 0.5193, "step": 7183 }, { "epoch": 2.821634971597925, "grad_norm": 0.4495925503990227, "learning_rate": 4.781947878125872e-06, "loss": 0.5091, "step": 7184 }, { "epoch": 2.8220301308965174, "grad_norm": 0.4464844531883246, "learning_rate": 4.781883880309597e-06, "loss": 0.5096, "step": 7185 }, { "epoch": 2.8224252901951097, "grad_norm": 0.4553422801335472, "learning_rate": 4.781819873531467e-06, "loss": 0.4845, "step": 7186 }, { "epoch": 2.822820449493702, "grad_norm": 0.508345334941812, "learning_rate": 4.781755857791734e-06, "loss": 0.512, "step": 7187 }, { "epoch": 2.823215608792294, "grad_norm": 0.46314240936594236, "learning_rate": 4.78169183309065e-06, "loss": 0.5283, "step": 7188 }, { "epoch": 2.8236107680908864, "grad_norm": 0.45552834604576314, "learning_rate": 4.781627799428466e-06, "loss": 0.4988, "step": 7189 }, { "epoch": 2.8240059273894786, "grad_norm": 0.43823171263924665, "learning_rate": 4.781563756805434e-06, "loss": 0.4898, "step": 7190 }, { "epoch": 2.824401086688071, "grad_norm": 0.4425700045729354, "learning_rate": 4.781499705221805e-06, "loss": 0.4902, "step": 7191 }, { "epoch": 2.824796245986663, "grad_norm": 0.46454792833992753, "learning_rate": 4.7814356446778294e-06, "loss": 0.5339, "step": 7192 }, { "epoch": 2.8251914052852554, "grad_norm": 0.4766460659681996, "learning_rate": 4.781371575173762e-06, "loss": 0.5107, "step": 7193 }, { "epoch": 2.8255865645838476, "grad_norm": 0.46813319069331144, "learning_rate": 4.78130749670985e-06, "loss": 0.5201, "step": 7194 }, { "epoch": 2.82598172388244, "grad_norm": 0.4545253394863707, "learning_rate": 4.781243409286349e-06, "loss": 0.5225, "step": 7195 }, { "epoch": 2.826376883181032, "grad_norm": 0.439864191746294, "learning_rate": 4.781179312903509e-06, "loss": 0.5041, "step": 7196 }, { "epoch": 2.8267720424796243, "grad_norm": 0.45086076483594706, "learning_rate": 4.781115207561582e-06, "loss": 0.5045, "step": 7197 }, { "epoch": 2.827167201778217, "grad_norm": 0.46994834299009564, "learning_rate": 4.781051093260819e-06, "loss": 0.4944, "step": 7198 }, { "epoch": 2.8275623610768092, "grad_norm": 0.4455482853226493, "learning_rate": 4.7809869700014726e-06, "loss": 0.5059, "step": 7199 }, { "epoch": 2.8279575203754015, "grad_norm": 0.45802471378032417, "learning_rate": 4.7809228377837934e-06, "loss": 0.5053, "step": 7200 }, { "epoch": 2.8283526796739937, "grad_norm": 0.4642886398985806, "learning_rate": 4.780858696608036e-06, "loss": 0.5204, "step": 7201 }, { "epoch": 2.828747838972586, "grad_norm": 0.4489929183982641, "learning_rate": 4.78079454647445e-06, "loss": 0.5073, "step": 7202 }, { "epoch": 2.829142998271178, "grad_norm": 0.4626978965499321, "learning_rate": 4.7807303873832875e-06, "loss": 0.5068, "step": 7203 }, { "epoch": 2.8295381575697705, "grad_norm": 0.45282255975863683, "learning_rate": 4.780666219334802e-06, "loss": 0.5095, "step": 7204 }, { "epoch": 2.8299333168683627, "grad_norm": 0.45313611105031126, "learning_rate": 4.780602042329244e-06, "loss": 0.5198, "step": 7205 }, { "epoch": 2.830328476166955, "grad_norm": 0.4584691584839706, "learning_rate": 4.7805378563668655e-06, "loss": 0.5147, "step": 7206 }, { "epoch": 2.830723635465547, "grad_norm": 0.46044793656981825, "learning_rate": 4.780473661447921e-06, "loss": 0.5205, "step": 7207 }, { "epoch": 2.8311187947641394, "grad_norm": 0.4583164398209612, "learning_rate": 4.7804094575726585e-06, "loss": 0.5222, "step": 7208 }, { "epoch": 2.8315139540627317, "grad_norm": 0.4498029349438353, "learning_rate": 4.780345244741333e-06, "loss": 0.4927, "step": 7209 }, { "epoch": 2.831909113361324, "grad_norm": 0.44328976919231206, "learning_rate": 4.780281022954196e-06, "loss": 0.5102, "step": 7210 }, { "epoch": 2.832304272659916, "grad_norm": 0.44810747427037423, "learning_rate": 4.7802167922115e-06, "loss": 0.5071, "step": 7211 }, { "epoch": 2.8326994319585084, "grad_norm": 0.44384368117197937, "learning_rate": 4.780152552513499e-06, "loss": 0.4983, "step": 7212 }, { "epoch": 2.8330945912571006, "grad_norm": 0.44676501579869293, "learning_rate": 4.7800883038604404e-06, "loss": 0.5002, "step": 7213 }, { "epoch": 2.833489750555693, "grad_norm": 0.44242213343088016, "learning_rate": 4.780024046252581e-06, "loss": 0.5114, "step": 7214 }, { "epoch": 2.833884909854285, "grad_norm": 0.44041006597665694, "learning_rate": 4.779959779690171e-06, "loss": 0.5059, "step": 7215 }, { "epoch": 2.8342800691528773, "grad_norm": 0.4440298358961055, "learning_rate": 4.779895504173464e-06, "loss": 0.5156, "step": 7216 }, { "epoch": 2.8346752284514696, "grad_norm": 0.45776869409256077, "learning_rate": 4.779831219702712e-06, "loss": 0.5051, "step": 7217 }, { "epoch": 2.835070387750062, "grad_norm": 0.43770623591530705, "learning_rate": 4.7797669262781665e-06, "loss": 0.4988, "step": 7218 }, { "epoch": 2.835465547048654, "grad_norm": 0.4442467675600192, "learning_rate": 4.779702623900082e-06, "loss": 0.5167, "step": 7219 }, { "epoch": 2.8358607063472463, "grad_norm": 0.4453421691195409, "learning_rate": 4.779638312568708e-06, "loss": 0.498, "step": 7220 }, { "epoch": 2.8362558656458385, "grad_norm": 0.43148125749848565, "learning_rate": 4.779573992284301e-06, "loss": 0.4994, "step": 7221 }, { "epoch": 2.8366510249444308, "grad_norm": 0.4384294284300938, "learning_rate": 4.779509663047111e-06, "loss": 0.4924, "step": 7222 }, { "epoch": 2.837046184243023, "grad_norm": 0.450005448688682, "learning_rate": 4.779445324857391e-06, "loss": 0.5015, "step": 7223 }, { "epoch": 2.8374413435416153, "grad_norm": 0.4383831955520918, "learning_rate": 4.779380977715394e-06, "loss": 0.5075, "step": 7224 }, { "epoch": 2.8378365028402075, "grad_norm": 0.44872433012548524, "learning_rate": 4.7793166216213725e-06, "loss": 0.4907, "step": 7225 }, { "epoch": 2.8382316621387997, "grad_norm": 0.4486414248911323, "learning_rate": 4.77925225657558e-06, "loss": 0.5103, "step": 7226 }, { "epoch": 2.838626821437392, "grad_norm": 0.4505470486129958, "learning_rate": 4.7791878825782675e-06, "loss": 0.4949, "step": 7227 }, { "epoch": 2.839021980735984, "grad_norm": 0.45187161145250815, "learning_rate": 4.77912349962969e-06, "loss": 0.5004, "step": 7228 }, { "epoch": 2.8394171400345765, "grad_norm": 0.4532129642345992, "learning_rate": 4.779059107730099e-06, "loss": 0.4977, "step": 7229 }, { "epoch": 2.8398122993331687, "grad_norm": 0.4463390504441417, "learning_rate": 4.7789947068797474e-06, "loss": 0.5049, "step": 7230 }, { "epoch": 2.840207458631761, "grad_norm": 0.44255699499296886, "learning_rate": 4.7789302970788895e-06, "loss": 0.4965, "step": 7231 }, { "epoch": 2.840602617930353, "grad_norm": 0.46126441571913374, "learning_rate": 4.7788658783277765e-06, "loss": 0.5196, "step": 7232 }, { "epoch": 2.8409977772289454, "grad_norm": 0.4545844577709667, "learning_rate": 4.778801450626662e-06, "loss": 0.5099, "step": 7233 }, { "epoch": 2.8413929365275377, "grad_norm": 0.45959479224584476, "learning_rate": 4.7787370139758e-06, "loss": 0.5083, "step": 7234 }, { "epoch": 2.84178809582613, "grad_norm": 0.4506555503493617, "learning_rate": 4.7786725683754415e-06, "loss": 0.5267, "step": 7235 }, { "epoch": 2.842183255124722, "grad_norm": 0.4569111610608343, "learning_rate": 4.7786081138258414e-06, "loss": 0.5184, "step": 7236 }, { "epoch": 2.8425784144233144, "grad_norm": 0.4436242554559036, "learning_rate": 4.778543650327252e-06, "loss": 0.4825, "step": 7237 }, { "epoch": 2.8429735737219066, "grad_norm": 0.4452045626348084, "learning_rate": 4.778479177879928e-06, "loss": 0.5106, "step": 7238 }, { "epoch": 2.843368733020499, "grad_norm": 0.44570436675947306, "learning_rate": 4.77841469648412e-06, "loss": 0.5197, "step": 7239 }, { "epoch": 2.843763892319091, "grad_norm": 0.44104364743730273, "learning_rate": 4.778350206140083e-06, "loss": 0.4942, "step": 7240 }, { "epoch": 2.8441590516176833, "grad_norm": 0.470164189035705, "learning_rate": 4.77828570684807e-06, "loss": 0.52, "step": 7241 }, { "epoch": 2.8445542109162756, "grad_norm": 0.45844457094618113, "learning_rate": 4.778221198608333e-06, "loss": 0.4999, "step": 7242 }, { "epoch": 2.844949370214868, "grad_norm": 0.45188438120573965, "learning_rate": 4.778156681421129e-06, "loss": 0.5137, "step": 7243 }, { "epoch": 2.84534452951346, "grad_norm": 0.4865736628712067, "learning_rate": 4.778092155286707e-06, "loss": 0.52, "step": 7244 }, { "epoch": 2.8457396888120523, "grad_norm": 0.4618516637382695, "learning_rate": 4.778027620205323e-06, "loss": 0.5022, "step": 7245 }, { "epoch": 2.8461348481106445, "grad_norm": 0.4518757121486141, "learning_rate": 4.77796307617723e-06, "loss": 0.513, "step": 7246 }, { "epoch": 2.846530007409237, "grad_norm": 0.45286344906811393, "learning_rate": 4.777898523202681e-06, "loss": 0.4984, "step": 7247 }, { "epoch": 2.846925166707829, "grad_norm": 0.46241029465759526, "learning_rate": 4.777833961281929e-06, "loss": 0.5241, "step": 7248 }, { "epoch": 2.8473203260064213, "grad_norm": 0.4572057988626441, "learning_rate": 4.7777693904152295e-06, "loss": 0.4991, "step": 7249 }, { "epoch": 2.8477154853050135, "grad_norm": 0.46688359869533963, "learning_rate": 4.7777048106028345e-06, "loss": 0.5258, "step": 7250 }, { "epoch": 2.8481106446036057, "grad_norm": 0.4419843786987222, "learning_rate": 4.777640221844998e-06, "loss": 0.5003, "step": 7251 }, { "epoch": 2.848505803902198, "grad_norm": 0.4509604918984796, "learning_rate": 4.777575624141975e-06, "loss": 0.4943, "step": 7252 }, { "epoch": 2.8489009632007902, "grad_norm": 0.4544386957164005, "learning_rate": 4.777511017494017e-06, "loss": 0.5191, "step": 7253 }, { "epoch": 2.8492961224993825, "grad_norm": 0.46217731638209764, "learning_rate": 4.777446401901378e-06, "loss": 0.5026, "step": 7254 }, { "epoch": 2.8496912817979747, "grad_norm": 0.46198789235520654, "learning_rate": 4.777381777364314e-06, "loss": 0.5046, "step": 7255 }, { "epoch": 2.850086441096567, "grad_norm": 0.467501208212079, "learning_rate": 4.777317143883076e-06, "loss": 0.5054, "step": 7256 }, { "epoch": 2.850481600395159, "grad_norm": 0.45572805493163815, "learning_rate": 4.77725250145792e-06, "loss": 0.502, "step": 7257 }, { "epoch": 2.8508767596937514, "grad_norm": 0.4585262214570415, "learning_rate": 4.777187850089098e-06, "loss": 0.5001, "step": 7258 }, { "epoch": 2.8512719189923437, "grad_norm": 0.46257434246120355, "learning_rate": 4.777123189776865e-06, "loss": 0.4884, "step": 7259 }, { "epoch": 2.851667078290936, "grad_norm": 0.44787734910546845, "learning_rate": 4.777058520521476e-06, "loss": 0.4854, "step": 7260 }, { "epoch": 2.852062237589528, "grad_norm": 0.4679108827035021, "learning_rate": 4.7769938423231825e-06, "loss": 0.4984, "step": 7261 }, { "epoch": 2.8524573968881204, "grad_norm": 0.5824913189397212, "learning_rate": 4.776929155182241e-06, "loss": 0.5165, "step": 7262 }, { "epoch": 2.8528525561867126, "grad_norm": 0.47000334347932204, "learning_rate": 4.776864459098904e-06, "loss": 0.5063, "step": 7263 }, { "epoch": 2.853247715485305, "grad_norm": 0.46271998546318105, "learning_rate": 4.776799754073425e-06, "loss": 0.4995, "step": 7264 }, { "epoch": 2.853642874783897, "grad_norm": 0.46020642262681605, "learning_rate": 4.776735040106061e-06, "loss": 0.5026, "step": 7265 }, { "epoch": 2.8540380340824894, "grad_norm": 0.4570714611160204, "learning_rate": 4.776670317197063e-06, "loss": 0.4976, "step": 7266 }, { "epoch": 2.8544331933810816, "grad_norm": 0.45694522553571065, "learning_rate": 4.776605585346687e-06, "loss": 0.4947, "step": 7267 }, { "epoch": 2.854828352679674, "grad_norm": 0.45434827116015725, "learning_rate": 4.776540844555186e-06, "loss": 0.5041, "step": 7268 }, { "epoch": 2.8552235119782665, "grad_norm": 0.4449151544183041, "learning_rate": 4.776476094822815e-06, "loss": 0.4964, "step": 7269 }, { "epoch": 2.8556186712768588, "grad_norm": 0.455998711224259, "learning_rate": 4.7764113361498284e-06, "loss": 0.5014, "step": 7270 }, { "epoch": 2.856013830575451, "grad_norm": 0.4336150479227637, "learning_rate": 4.776346568536481e-06, "loss": 0.4975, "step": 7271 }, { "epoch": 2.8564089898740432, "grad_norm": 0.46393244313991583, "learning_rate": 4.776281791983026e-06, "loss": 0.5004, "step": 7272 }, { "epoch": 2.8568041491726355, "grad_norm": 0.4738720171848614, "learning_rate": 4.776217006489719e-06, "loss": 0.5105, "step": 7273 }, { "epoch": 2.8571993084712277, "grad_norm": 0.4533385874249988, "learning_rate": 4.776152212056813e-06, "loss": 0.4789, "step": 7274 }, { "epoch": 2.85759446776982, "grad_norm": 0.45076697220608347, "learning_rate": 4.7760874086845635e-06, "loss": 0.5222, "step": 7275 }, { "epoch": 2.857989627068412, "grad_norm": 0.45274267999042384, "learning_rate": 4.7760225963732255e-06, "loss": 0.5075, "step": 7276 }, { "epoch": 2.8583847863670044, "grad_norm": 0.4543131577956172, "learning_rate": 4.775957775123052e-06, "loss": 0.495, "step": 7277 }, { "epoch": 2.8587799456655967, "grad_norm": 0.4551171658276694, "learning_rate": 4.775892944934299e-06, "loss": 0.5094, "step": 7278 }, { "epoch": 2.859175104964189, "grad_norm": 0.45140156710288193, "learning_rate": 4.77582810580722e-06, "loss": 0.5102, "step": 7279 }, { "epoch": 2.859570264262781, "grad_norm": 0.45003747644498915, "learning_rate": 4.7757632577420696e-06, "loss": 0.4983, "step": 7280 }, { "epoch": 2.8599654235613734, "grad_norm": 0.45665797218831644, "learning_rate": 4.775698400739104e-06, "loss": 0.5201, "step": 7281 }, { "epoch": 2.8603605828599656, "grad_norm": 0.4621874107147548, "learning_rate": 4.775633534798576e-06, "loss": 0.5044, "step": 7282 }, { "epoch": 2.860755742158558, "grad_norm": 0.45736986361350984, "learning_rate": 4.775568659920742e-06, "loss": 0.4892, "step": 7283 }, { "epoch": 2.86115090145715, "grad_norm": 0.44664628991297056, "learning_rate": 4.775503776105857e-06, "loss": 0.5025, "step": 7284 }, { "epoch": 2.8615460607557424, "grad_norm": 0.44279821627241905, "learning_rate": 4.775438883354173e-06, "loss": 0.5267, "step": 7285 }, { "epoch": 2.8619412200543346, "grad_norm": 0.4555964442573869, "learning_rate": 4.775373981665949e-06, "loss": 0.5362, "step": 7286 }, { "epoch": 2.862336379352927, "grad_norm": 0.4616612464408454, "learning_rate": 4.775309071041435e-06, "loss": 0.5024, "step": 7287 }, { "epoch": 2.862731538651519, "grad_norm": 0.44173733289424433, "learning_rate": 4.7752441514808905e-06, "loss": 0.4976, "step": 7288 }, { "epoch": 2.8631266979501113, "grad_norm": 0.44729707048188977, "learning_rate": 4.775179222984568e-06, "loss": 0.5, "step": 7289 }, { "epoch": 2.8635218572487036, "grad_norm": 0.45155830390167095, "learning_rate": 4.775114285552723e-06, "loss": 0.5023, "step": 7290 }, { "epoch": 2.863917016547296, "grad_norm": 0.4777436940373346, "learning_rate": 4.7750493391856116e-06, "loss": 0.5002, "step": 7291 }, { "epoch": 2.864312175845888, "grad_norm": 0.45674967310156706, "learning_rate": 4.7749843838834865e-06, "loss": 0.5122, "step": 7292 }, { "epoch": 2.8647073351444803, "grad_norm": 0.4399469754211526, "learning_rate": 4.774919419646605e-06, "loss": 0.5023, "step": 7293 }, { "epoch": 2.8651024944430725, "grad_norm": 0.4405627072122233, "learning_rate": 4.774854446475221e-06, "loss": 0.4848, "step": 7294 }, { "epoch": 2.8654976537416648, "grad_norm": 0.4654307974697821, "learning_rate": 4.7747894643695904e-06, "loss": 0.5148, "step": 7295 }, { "epoch": 2.865892813040257, "grad_norm": 0.4502154850100158, "learning_rate": 4.774724473329968e-06, "loss": 0.4916, "step": 7296 }, { "epoch": 2.8662879723388492, "grad_norm": 0.45464786407421726, "learning_rate": 4.7746594733566085e-06, "loss": 0.5084, "step": 7297 }, { "epoch": 2.8666831316374415, "grad_norm": 0.46932700358577506, "learning_rate": 4.774594464449769e-06, "loss": 0.521, "step": 7298 }, { "epoch": 2.8670782909360337, "grad_norm": 0.455144441625113, "learning_rate": 4.774529446609703e-06, "loss": 0.5042, "step": 7299 }, { "epoch": 2.867473450234626, "grad_norm": 0.456638540958973, "learning_rate": 4.7744644198366665e-06, "loss": 0.5098, "step": 7300 }, { "epoch": 2.867868609533218, "grad_norm": 0.4731909071584723, "learning_rate": 4.774399384130916e-06, "loss": 0.4896, "step": 7301 }, { "epoch": 2.8682637688318104, "grad_norm": 0.45219579881485555, "learning_rate": 4.774334339492704e-06, "loss": 0.4995, "step": 7302 }, { "epoch": 2.8686589281304027, "grad_norm": 0.47541582012310035, "learning_rate": 4.774269285922289e-06, "loss": 0.5225, "step": 7303 }, { "epoch": 2.869054087428995, "grad_norm": 0.4562665607670454, "learning_rate": 4.774204223419925e-06, "loss": 0.4862, "step": 7304 }, { "epoch": 2.869449246727587, "grad_norm": 0.47218102712930754, "learning_rate": 4.774139151985867e-06, "loss": 0.5075, "step": 7305 }, { "epoch": 2.8698444060261794, "grad_norm": 0.4403127626196469, "learning_rate": 4.774074071620372e-06, "loss": 0.5105, "step": 7306 }, { "epoch": 2.8702395653247716, "grad_norm": 0.45708262550702466, "learning_rate": 4.7740089823236955e-06, "loss": 0.4972, "step": 7307 }, { "epoch": 2.870634724623364, "grad_norm": 0.46018852248502795, "learning_rate": 4.773943884096091e-06, "loss": 0.4945, "step": 7308 }, { "epoch": 2.871029883921956, "grad_norm": 0.4601257085296602, "learning_rate": 4.773878776937817e-06, "loss": 0.5051, "step": 7309 }, { "epoch": 2.8714250432205484, "grad_norm": 0.45920489045937124, "learning_rate": 4.7738136608491284e-06, "loss": 0.505, "step": 7310 }, { "epoch": 2.8718202025191406, "grad_norm": 0.45371392464155347, "learning_rate": 4.77374853583028e-06, "loss": 0.4837, "step": 7311 }, { "epoch": 2.872215361817733, "grad_norm": 0.45429850072636996, "learning_rate": 4.773683401881527e-06, "loss": 0.4857, "step": 7312 }, { "epoch": 2.872610521116325, "grad_norm": 0.4680648093877002, "learning_rate": 4.773618259003127e-06, "loss": 0.5074, "step": 7313 }, { "epoch": 2.8730056804149173, "grad_norm": 0.461576978903435, "learning_rate": 4.773553107195336e-06, "loss": 0.513, "step": 7314 }, { "epoch": 2.8734008397135096, "grad_norm": 0.5184320923986061, "learning_rate": 4.773487946458407e-06, "loss": 0.4973, "step": 7315 }, { "epoch": 2.873795999012102, "grad_norm": 0.43754112690923064, "learning_rate": 4.7734227767926e-06, "loss": 0.4991, "step": 7316 }, { "epoch": 2.874191158310694, "grad_norm": 0.4487036188195782, "learning_rate": 4.773357598198167e-06, "loss": 0.5125, "step": 7317 }, { "epoch": 2.8745863176092863, "grad_norm": 0.4574040806296256, "learning_rate": 4.773292410675366e-06, "loss": 0.5261, "step": 7318 }, { "epoch": 2.8749814769078785, "grad_norm": 0.4490619381032573, "learning_rate": 4.773227214224454e-06, "loss": 0.4986, "step": 7319 }, { "epoch": 2.8753766362064708, "grad_norm": 0.47037528439860465, "learning_rate": 4.773162008845685e-06, "loss": 0.4982, "step": 7320 }, { "epoch": 2.875771795505063, "grad_norm": 0.4506229178725294, "learning_rate": 4.773096794539317e-06, "loss": 0.4922, "step": 7321 }, { "epoch": 2.8761669548036553, "grad_norm": 0.43652933566594937, "learning_rate": 4.773031571305604e-06, "loss": 0.5004, "step": 7322 }, { "epoch": 2.8765621141022475, "grad_norm": 0.5540614478567485, "learning_rate": 4.7729663391448035e-06, "loss": 0.5186, "step": 7323 }, { "epoch": 2.8769572734008397, "grad_norm": 0.46194230756962723, "learning_rate": 4.772901098057172e-06, "loss": 0.5071, "step": 7324 }, { "epoch": 2.877352432699432, "grad_norm": 0.465341882765266, "learning_rate": 4.772835848042965e-06, "loss": 0.5202, "step": 7325 }, { "epoch": 2.877747591998024, "grad_norm": 0.4778559900906391, "learning_rate": 4.772770589102438e-06, "loss": 0.4969, "step": 7326 }, { "epoch": 2.8781427512966165, "grad_norm": 0.4405625019192915, "learning_rate": 4.772705321235849e-06, "loss": 0.4917, "step": 7327 }, { "epoch": 2.8785379105952087, "grad_norm": 0.4506373137457211, "learning_rate": 4.772640044443454e-06, "loss": 0.507, "step": 7328 }, { "epoch": 2.878933069893801, "grad_norm": 0.46863725137476087, "learning_rate": 4.772574758725507e-06, "loss": 0.4913, "step": 7329 }, { "epoch": 2.879328229192393, "grad_norm": 0.4987365734752338, "learning_rate": 4.772509464082269e-06, "loss": 0.5125, "step": 7330 }, { "epoch": 2.8797233884909854, "grad_norm": 0.45031884644226655, "learning_rate": 4.772444160513992e-06, "loss": 0.497, "step": 7331 }, { "epoch": 2.8801185477895777, "grad_norm": 0.453093686573893, "learning_rate": 4.772378848020935e-06, "loss": 0.5252, "step": 7332 }, { "epoch": 2.88051370708817, "grad_norm": 0.46311837414420265, "learning_rate": 4.772313526603354e-06, "loss": 0.5245, "step": 7333 }, { "epoch": 2.880908866386762, "grad_norm": 0.5599787194673955, "learning_rate": 4.772248196261504e-06, "loss": 0.492, "step": 7334 }, { "epoch": 2.8813040256853544, "grad_norm": 0.47600078027671755, "learning_rate": 4.7721828569956435e-06, "loss": 0.5181, "step": 7335 }, { "epoch": 2.8816991849839466, "grad_norm": 0.4672363357538037, "learning_rate": 4.772117508806029e-06, "loss": 0.5156, "step": 7336 }, { "epoch": 2.882094344282539, "grad_norm": 0.44982543608975994, "learning_rate": 4.7720521516929155e-06, "loss": 0.5017, "step": 7337 }, { "epoch": 2.882489503581131, "grad_norm": 0.47014208289943477, "learning_rate": 4.7719867856565615e-06, "loss": 0.5021, "step": 7338 }, { "epoch": 2.8828846628797233, "grad_norm": 0.4641826462044799, "learning_rate": 4.771921410697224e-06, "loss": 0.51, "step": 7339 }, { "epoch": 2.8832798221783156, "grad_norm": 0.4480770317445626, "learning_rate": 4.771856026815157e-06, "loss": 0.4978, "step": 7340 }, { "epoch": 2.883674981476908, "grad_norm": 0.4764205030319496, "learning_rate": 4.77179063401062e-06, "loss": 0.5137, "step": 7341 }, { "epoch": 2.8840701407755, "grad_norm": 0.44175592798798174, "learning_rate": 4.771725232283869e-06, "loss": 0.5218, "step": 7342 }, { "epoch": 2.8844653000740923, "grad_norm": 0.4522738770160621, "learning_rate": 4.771659821635161e-06, "loss": 0.5073, "step": 7343 }, { "epoch": 2.8848604593726845, "grad_norm": 0.45063177134700066, "learning_rate": 4.771594402064752e-06, "loss": 0.5103, "step": 7344 }, { "epoch": 2.885255618671277, "grad_norm": 0.4519278901335532, "learning_rate": 4.7715289735729e-06, "loss": 0.508, "step": 7345 }, { "epoch": 2.885650777969869, "grad_norm": 0.45610027675210985, "learning_rate": 4.771463536159861e-06, "loss": 0.4991, "step": 7346 }, { "epoch": 2.8860459372684613, "grad_norm": 0.4657657398587538, "learning_rate": 4.771398089825893e-06, "loss": 0.4981, "step": 7347 }, { "epoch": 2.8864410965670535, "grad_norm": 0.4504836071125689, "learning_rate": 4.771332634571252e-06, "loss": 0.5194, "step": 7348 }, { "epoch": 2.8868362558656457, "grad_norm": 0.4482132370137858, "learning_rate": 4.771267170396197e-06, "loss": 0.5019, "step": 7349 }, { "epoch": 2.887231415164238, "grad_norm": 0.44368133807741067, "learning_rate": 4.771201697300982e-06, "loss": 0.5046, "step": 7350 }, { "epoch": 2.88762657446283, "grad_norm": 0.4506387198321668, "learning_rate": 4.7711362152858665e-06, "loss": 0.5078, "step": 7351 }, { "epoch": 2.8880217337614225, "grad_norm": 0.4570475595802833, "learning_rate": 4.771070724351108e-06, "loss": 0.5087, "step": 7352 }, { "epoch": 2.8884168930600147, "grad_norm": 0.45887991966607716, "learning_rate": 4.771005224496962e-06, "loss": 0.5119, "step": 7353 }, { "epoch": 2.888812052358607, "grad_norm": 0.4556415441058092, "learning_rate": 4.770939715723686e-06, "loss": 0.5164, "step": 7354 }, { "epoch": 2.889207211657199, "grad_norm": 0.44832761334800575, "learning_rate": 4.7708741980315386e-06, "loss": 0.4931, "step": 7355 }, { "epoch": 2.8896023709557914, "grad_norm": 0.47312914667107786, "learning_rate": 4.770808671420775e-06, "loss": 0.5121, "step": 7356 }, { "epoch": 2.8899975302543837, "grad_norm": 0.6480164808981641, "learning_rate": 4.770743135891656e-06, "loss": 0.5064, "step": 7357 }, { "epoch": 2.890392689552976, "grad_norm": 0.4502180251195585, "learning_rate": 4.770677591444434e-06, "loss": 0.5109, "step": 7358 }, { "epoch": 2.890787848851568, "grad_norm": 0.44215136184674697, "learning_rate": 4.770612038079372e-06, "loss": 0.5187, "step": 7359 }, { "epoch": 2.8911830081501604, "grad_norm": 0.46977522141656114, "learning_rate": 4.770546475796724e-06, "loss": 0.5058, "step": 7360 }, { "epoch": 2.8915781674487526, "grad_norm": 0.44217645544646506, "learning_rate": 4.770480904596747e-06, "loss": 0.4908, "step": 7361 }, { "epoch": 2.891973326747345, "grad_norm": 0.45598608295802123, "learning_rate": 4.770415324479701e-06, "loss": 0.5157, "step": 7362 }, { "epoch": 2.892368486045937, "grad_norm": 0.4412753609610493, "learning_rate": 4.770349735445841e-06, "loss": 0.4907, "step": 7363 }, { "epoch": 2.8927636453445293, "grad_norm": 0.4688090294235565, "learning_rate": 4.770284137495428e-06, "loss": 0.5032, "step": 7364 }, { "epoch": 2.8931588046431216, "grad_norm": 0.46203038169470984, "learning_rate": 4.770218530628716e-06, "loss": 0.5207, "step": 7365 }, { "epoch": 2.893553963941714, "grad_norm": 0.45196862440782837, "learning_rate": 4.770152914845964e-06, "loss": 0.498, "step": 7366 }, { "epoch": 2.893949123240306, "grad_norm": 0.45872640459487846, "learning_rate": 4.77008729014743e-06, "loss": 0.5073, "step": 7367 }, { "epoch": 2.8943442825388983, "grad_norm": 0.4534048366751859, "learning_rate": 4.770021656533372e-06, "loss": 0.4985, "step": 7368 }, { "epoch": 2.8947394418374905, "grad_norm": 0.443264773459016, "learning_rate": 4.769956014004047e-06, "loss": 0.5142, "step": 7369 }, { "epoch": 2.895134601136083, "grad_norm": 0.4379839420016731, "learning_rate": 4.769890362559714e-06, "loss": 0.4825, "step": 7370 }, { "epoch": 2.895529760434675, "grad_norm": 0.4481743034206296, "learning_rate": 4.769824702200629e-06, "loss": 0.5016, "step": 7371 }, { "epoch": 2.8959249197332673, "grad_norm": 0.4451918949234217, "learning_rate": 4.769759032927051e-06, "loss": 0.4956, "step": 7372 }, { "epoch": 2.8963200790318595, "grad_norm": 0.5979190982944923, "learning_rate": 4.7696933547392375e-06, "loss": 0.5125, "step": 7373 }, { "epoch": 2.8967152383304517, "grad_norm": 0.4563147906713146, "learning_rate": 4.769627667637448e-06, "loss": 0.5096, "step": 7374 }, { "epoch": 2.897110397629044, "grad_norm": 0.46718974725304174, "learning_rate": 4.7695619716219384e-06, "loss": 0.5175, "step": 7375 }, { "epoch": 2.8975055569276362, "grad_norm": 0.44977361032355456, "learning_rate": 4.7694962666929674e-06, "loss": 0.4988, "step": 7376 }, { "epoch": 2.8979007162262285, "grad_norm": 0.4770742328128973, "learning_rate": 4.769430552850793e-06, "loss": 0.5208, "step": 7377 }, { "epoch": 2.8982958755248207, "grad_norm": 0.46699375367596374, "learning_rate": 4.769364830095674e-06, "loss": 0.5051, "step": 7378 }, { "epoch": 2.898691034823413, "grad_norm": 0.4793154561864407, "learning_rate": 4.769299098427868e-06, "loss": 0.505, "step": 7379 }, { "epoch": 2.899086194122005, "grad_norm": 0.45166395843198986, "learning_rate": 4.769233357847633e-06, "loss": 0.5283, "step": 7380 }, { "epoch": 2.8994813534205974, "grad_norm": 0.47128699616743297, "learning_rate": 4.769167608355227e-06, "loss": 0.516, "step": 7381 }, { "epoch": 2.8998765127191897, "grad_norm": 0.4582075932359091, "learning_rate": 4.769101849950909e-06, "loss": 0.5087, "step": 7382 }, { "epoch": 2.900271672017782, "grad_norm": 0.4557041584886058, "learning_rate": 4.7690360826349365e-06, "loss": 0.5215, "step": 7383 }, { "epoch": 2.900666831316374, "grad_norm": 0.45326360410121896, "learning_rate": 4.768970306407569e-06, "loss": 0.5068, "step": 7384 }, { "epoch": 2.9010619906149664, "grad_norm": 0.44111797702117767, "learning_rate": 4.7689045212690625e-06, "loss": 0.4926, "step": 7385 }, { "epoch": 2.9014571499135586, "grad_norm": 0.47593611801487395, "learning_rate": 4.7688387272196775e-06, "loss": 0.5163, "step": 7386 }, { "epoch": 2.901852309212151, "grad_norm": 0.4473189561304879, "learning_rate": 4.768772924259671e-06, "loss": 0.5233, "step": 7387 }, { "epoch": 2.9022474685107436, "grad_norm": 0.44995418334579357, "learning_rate": 4.768707112389303e-06, "loss": 0.5283, "step": 7388 }, { "epoch": 2.902642627809336, "grad_norm": 0.45948641092246273, "learning_rate": 4.768641291608831e-06, "loss": 0.5166, "step": 7389 }, { "epoch": 2.903037787107928, "grad_norm": 0.4697983204082374, "learning_rate": 4.768575461918513e-06, "loss": 0.5185, "step": 7390 }, { "epoch": 2.9034329464065203, "grad_norm": 0.4654187783348642, "learning_rate": 4.768509623318609e-06, "loss": 0.5268, "step": 7391 }, { "epoch": 2.9038281057051125, "grad_norm": 0.4558337696980285, "learning_rate": 4.768443775809376e-06, "loss": 0.5024, "step": 7392 }, { "epoch": 2.9042232650037048, "grad_norm": 0.4606281586552123, "learning_rate": 4.768377919391074e-06, "loss": 0.514, "step": 7393 }, { "epoch": 2.904618424302297, "grad_norm": 0.4613832493128262, "learning_rate": 4.768312054063961e-06, "loss": 0.5055, "step": 7394 }, { "epoch": 2.9050135836008892, "grad_norm": 0.455634197555287, "learning_rate": 4.768246179828295e-06, "loss": 0.5112, "step": 7395 }, { "epoch": 2.9054087428994815, "grad_norm": 0.45367728348365555, "learning_rate": 4.768180296684335e-06, "loss": 0.5233, "step": 7396 }, { "epoch": 2.9058039021980737, "grad_norm": 0.4571715325974132, "learning_rate": 4.768114404632341e-06, "loss": 0.5039, "step": 7397 }, { "epoch": 2.906199061496666, "grad_norm": 0.45459414076829735, "learning_rate": 4.768048503672571e-06, "loss": 0.4977, "step": 7398 }, { "epoch": 2.906594220795258, "grad_norm": 0.476609792116574, "learning_rate": 4.7679825938052825e-06, "loss": 0.4998, "step": 7399 }, { "epoch": 2.9069893800938504, "grad_norm": 0.44935073335968, "learning_rate": 4.7679166750307364e-06, "loss": 0.5068, "step": 7400 }, { "epoch": 2.9073845393924427, "grad_norm": 0.44816683458804896, "learning_rate": 4.767850747349191e-06, "loss": 0.5195, "step": 7401 }, { "epoch": 2.907779698691035, "grad_norm": 0.514040565789535, "learning_rate": 4.767784810760905e-06, "loss": 0.4983, "step": 7402 }, { "epoch": 2.908174857989627, "grad_norm": 0.4678938516683875, "learning_rate": 4.767718865266136e-06, "loss": 0.5241, "step": 7403 }, { "epoch": 2.9085700172882194, "grad_norm": 0.44463113513631825, "learning_rate": 4.767652910865146e-06, "loss": 0.4928, "step": 7404 }, { "epoch": 2.9089651765868116, "grad_norm": 0.44305031644764015, "learning_rate": 4.767586947558191e-06, "loss": 0.5274, "step": 7405 }, { "epoch": 2.909360335885404, "grad_norm": 0.4608795710666698, "learning_rate": 4.767520975345533e-06, "loss": 0.496, "step": 7406 }, { "epoch": 2.909755495183996, "grad_norm": 0.45454535548418995, "learning_rate": 4.767454994227428e-06, "loss": 0.5294, "step": 7407 }, { "epoch": 2.9101506544825884, "grad_norm": 0.4438403467716369, "learning_rate": 4.767389004204137e-06, "loss": 0.5058, "step": 7408 }, { "epoch": 2.9105458137811806, "grad_norm": 0.45975886229728474, "learning_rate": 4.76732300527592e-06, "loss": 0.4937, "step": 7409 }, { "epoch": 2.910940973079773, "grad_norm": 0.4679652680374663, "learning_rate": 4.767256997443034e-06, "loss": 0.5338, "step": 7410 }, { "epoch": 2.911336132378365, "grad_norm": 0.4443491416640828, "learning_rate": 4.767190980705739e-06, "loss": 0.5009, "step": 7411 }, { "epoch": 2.9117312916769573, "grad_norm": 0.4466231335445614, "learning_rate": 4.767124955064295e-06, "loss": 0.5008, "step": 7412 }, { "epoch": 2.9121264509755496, "grad_norm": 0.45416773779205705, "learning_rate": 4.767058920518961e-06, "loss": 0.5097, "step": 7413 }, { "epoch": 2.912521610274142, "grad_norm": 0.4407003267094362, "learning_rate": 4.766992877069996e-06, "loss": 0.5013, "step": 7414 }, { "epoch": 2.912916769572734, "grad_norm": 0.4526490214558496, "learning_rate": 4.76692682471766e-06, "loss": 0.5065, "step": 7415 }, { "epoch": 2.9133119288713263, "grad_norm": 0.4384526778991886, "learning_rate": 4.766860763462211e-06, "loss": 0.5202, "step": 7416 }, { "epoch": 2.9137070881699185, "grad_norm": 0.4554119386883933, "learning_rate": 4.76679469330391e-06, "loss": 0.4985, "step": 7417 }, { "epoch": 2.9141022474685108, "grad_norm": 0.4364078258328405, "learning_rate": 4.766728614243016e-06, "loss": 0.4893, "step": 7418 }, { "epoch": 2.914497406767103, "grad_norm": 0.4588538320640383, "learning_rate": 4.766662526279788e-06, "loss": 0.521, "step": 7419 }, { "epoch": 2.9148925660656952, "grad_norm": 0.45670544657223855, "learning_rate": 4.766596429414487e-06, "loss": 0.5111, "step": 7420 }, { "epoch": 2.9152877253642875, "grad_norm": 0.4417258587433614, "learning_rate": 4.76653032364737e-06, "loss": 0.5018, "step": 7421 }, { "epoch": 2.9156828846628797, "grad_norm": 0.4365589726517331, "learning_rate": 4.7664642089787e-06, "loss": 0.5026, "step": 7422 }, { "epoch": 2.916078043961472, "grad_norm": 0.4556876909416063, "learning_rate": 4.766398085408734e-06, "loss": 0.5208, "step": 7423 }, { "epoch": 2.916473203260064, "grad_norm": 0.4496719110515717, "learning_rate": 4.766331952937732e-06, "loss": 0.5158, "step": 7424 }, { "epoch": 2.9168683625586564, "grad_norm": 0.45674338189085323, "learning_rate": 4.7662658115659546e-06, "loss": 0.5155, "step": 7425 }, { "epoch": 2.9172635218572487, "grad_norm": 0.4365519880167459, "learning_rate": 4.766199661293662e-06, "loss": 0.5067, "step": 7426 }, { "epoch": 2.917658681155841, "grad_norm": 0.46044332574529667, "learning_rate": 4.766133502121113e-06, "loss": 0.509, "step": 7427 }, { "epoch": 2.918053840454433, "grad_norm": 0.4523808596214587, "learning_rate": 4.766067334048567e-06, "loss": 0.5011, "step": 7428 }, { "epoch": 2.9184489997530254, "grad_norm": 0.4552082949507314, "learning_rate": 4.766001157076284e-06, "loss": 0.515, "step": 7429 }, { "epoch": 2.9188441590516176, "grad_norm": 0.45129094467367264, "learning_rate": 4.765934971204526e-06, "loss": 0.5026, "step": 7430 }, { "epoch": 2.91923931835021, "grad_norm": 0.4615128953141365, "learning_rate": 4.765868776433551e-06, "loss": 0.5293, "step": 7431 }, { "epoch": 2.919634477648802, "grad_norm": 0.44445080798757497, "learning_rate": 4.765802572763619e-06, "loss": 0.5084, "step": 7432 }, { "epoch": 2.9200296369473944, "grad_norm": 0.5535941391787863, "learning_rate": 4.76573636019499e-06, "loss": 0.5165, "step": 7433 }, { "epoch": 2.9204247962459866, "grad_norm": 0.4647127083547014, "learning_rate": 4.765670138727925e-06, "loss": 0.5072, "step": 7434 }, { "epoch": 2.920819955544579, "grad_norm": 0.44196536115349694, "learning_rate": 4.765603908362683e-06, "loss": 0.5036, "step": 7435 }, { "epoch": 2.921215114843171, "grad_norm": 0.45968440262090315, "learning_rate": 4.765537669099525e-06, "loss": 0.4972, "step": 7436 }, { "epoch": 2.9216102741417633, "grad_norm": 0.444196485819374, "learning_rate": 4.765471420938711e-06, "loss": 0.5025, "step": 7437 }, { "epoch": 2.9220054334403556, "grad_norm": 0.4480022299436206, "learning_rate": 4.7654051638805e-06, "loss": 0.4847, "step": 7438 }, { "epoch": 2.922400592738948, "grad_norm": 0.4567439332123647, "learning_rate": 4.765338897925154e-06, "loss": 0.5128, "step": 7439 }, { "epoch": 2.92279575203754, "grad_norm": 0.458511120503876, "learning_rate": 4.765272623072932e-06, "loss": 0.5166, "step": 7440 }, { "epoch": 2.9231909113361323, "grad_norm": 0.44860217503885985, "learning_rate": 4.765206339324095e-06, "loss": 0.5045, "step": 7441 }, { "epoch": 2.9235860706347245, "grad_norm": 0.4546548390510289, "learning_rate": 4.765140046678903e-06, "loss": 0.4965, "step": 7442 }, { "epoch": 2.9239812299333168, "grad_norm": 0.4492830754296067, "learning_rate": 4.765073745137616e-06, "loss": 0.5065, "step": 7443 }, { "epoch": 2.924376389231909, "grad_norm": 0.4605719668288093, "learning_rate": 4.765007434700495e-06, "loss": 0.5078, "step": 7444 }, { "epoch": 2.9247715485305013, "grad_norm": 0.4640714655281598, "learning_rate": 4.7649411153678e-06, "loss": 0.523, "step": 7445 }, { "epoch": 2.9251667078290935, "grad_norm": 0.45659739128055815, "learning_rate": 4.764874787139792e-06, "loss": 0.4993, "step": 7446 }, { "epoch": 2.9255618671276857, "grad_norm": 0.4728553705365856, "learning_rate": 4.764808450016731e-06, "loss": 0.5352, "step": 7447 }, { "epoch": 2.925957026426278, "grad_norm": 0.43793250070587963, "learning_rate": 4.764742103998877e-06, "loss": 0.4935, "step": 7448 }, { "epoch": 2.92635218572487, "grad_norm": 0.4423604050440384, "learning_rate": 4.7646757490864926e-06, "loss": 0.5088, "step": 7449 }, { "epoch": 2.9267473450234625, "grad_norm": 0.4501666815149439, "learning_rate": 4.764609385279836e-06, "loss": 0.5102, "step": 7450 }, { "epoch": 2.9271425043220547, "grad_norm": 0.4574235898623384, "learning_rate": 4.764543012579169e-06, "loss": 0.5032, "step": 7451 }, { "epoch": 2.927537663620647, "grad_norm": 0.460475598434439, "learning_rate": 4.764476630984752e-06, "loss": 0.4958, "step": 7452 }, { "epoch": 2.927932822919239, "grad_norm": 0.43089611921617016, "learning_rate": 4.764410240496846e-06, "loss": 0.4941, "step": 7453 }, { "epoch": 2.9283279822178314, "grad_norm": 0.45244880455514785, "learning_rate": 4.764343841115712e-06, "loss": 0.5068, "step": 7454 }, { "epoch": 2.9287231415164237, "grad_norm": 0.4572924724707516, "learning_rate": 4.76427743284161e-06, "loss": 0.509, "step": 7455 }, { "epoch": 2.929118300815016, "grad_norm": 0.4535852769301378, "learning_rate": 4.764211015674801e-06, "loss": 0.5127, "step": 7456 }, { "epoch": 2.929513460113608, "grad_norm": 0.5430024075970933, "learning_rate": 4.764144589615547e-06, "loss": 0.5065, "step": 7457 }, { "epoch": 2.929908619412201, "grad_norm": 0.44556126301612464, "learning_rate": 4.764078154664107e-06, "loss": 0.5309, "step": 7458 }, { "epoch": 2.930303778710793, "grad_norm": 0.4710840587154262, "learning_rate": 4.764011710820743e-06, "loss": 0.5104, "step": 7459 }, { "epoch": 2.9306989380093853, "grad_norm": 0.45571321032211237, "learning_rate": 4.763945258085716e-06, "loss": 0.5092, "step": 7460 }, { "epoch": 2.9310940973079775, "grad_norm": 0.44102051744598414, "learning_rate": 4.763878796459287e-06, "loss": 0.5168, "step": 7461 }, { "epoch": 2.93148925660657, "grad_norm": 0.4470640541086543, "learning_rate": 4.7638123259417166e-06, "loss": 0.5013, "step": 7462 }, { "epoch": 2.931884415905162, "grad_norm": 0.43554949182172414, "learning_rate": 4.763745846533265e-06, "loss": 0.4982, "step": 7463 }, { "epoch": 2.9322795752037543, "grad_norm": 0.4720396479178599, "learning_rate": 4.763679358234196e-06, "loss": 0.5106, "step": 7464 }, { "epoch": 2.9326747345023465, "grad_norm": 0.4602272736857191, "learning_rate": 4.763612861044768e-06, "loss": 0.5108, "step": 7465 }, { "epoch": 2.9330698938009387, "grad_norm": 0.4552466120844723, "learning_rate": 4.763546354965244e-06, "loss": 0.5034, "step": 7466 }, { "epoch": 2.933465053099531, "grad_norm": 0.4575835047170144, "learning_rate": 4.763479839995883e-06, "loss": 0.5096, "step": 7467 }, { "epoch": 2.9338602123981232, "grad_norm": 0.4678916204593777, "learning_rate": 4.763413316136949e-06, "loss": 0.5274, "step": 7468 }, { "epoch": 2.9342553716967155, "grad_norm": 0.4452559996447758, "learning_rate": 4.7633467833887015e-06, "loss": 0.4957, "step": 7469 }, { "epoch": 2.9346505309953077, "grad_norm": 0.44208744364576574, "learning_rate": 4.763280241751402e-06, "loss": 0.495, "step": 7470 }, { "epoch": 2.9350456902939, "grad_norm": 0.46246549564900696, "learning_rate": 4.763213691225313e-06, "loss": 0.5021, "step": 7471 }, { "epoch": 2.935440849592492, "grad_norm": 0.44656255715995435, "learning_rate": 4.763147131810693e-06, "loss": 0.5128, "step": 7472 }, { "epoch": 2.9358360088910844, "grad_norm": 0.4612831877698752, "learning_rate": 4.7630805635078065e-06, "loss": 0.5001, "step": 7473 }, { "epoch": 2.9362311681896767, "grad_norm": 0.4956157029905282, "learning_rate": 4.763013986316914e-06, "loss": 0.5342, "step": 7474 }, { "epoch": 2.936626327488269, "grad_norm": 0.4505671941107815, "learning_rate": 4.762947400238276e-06, "loss": 0.4923, "step": 7475 }, { "epoch": 2.937021486786861, "grad_norm": 0.45390395120855775, "learning_rate": 4.762880805272155e-06, "loss": 0.5247, "step": 7476 }, { "epoch": 2.9374166460854534, "grad_norm": 0.44602033162244886, "learning_rate": 4.762814201418813e-06, "loss": 0.4961, "step": 7477 }, { "epoch": 2.9378118053840456, "grad_norm": 0.45061560756472163, "learning_rate": 4.76274758867851e-06, "loss": 0.5152, "step": 7478 }, { "epoch": 2.938206964682638, "grad_norm": 0.4472180927534863, "learning_rate": 4.762680967051509e-06, "loss": 0.5113, "step": 7479 }, { "epoch": 2.93860212398123, "grad_norm": 0.4582451501185086, "learning_rate": 4.762614336538071e-06, "loss": 0.5157, "step": 7480 }, { "epoch": 2.9389972832798223, "grad_norm": 0.4383818777857067, "learning_rate": 4.762547697138458e-06, "loss": 0.4909, "step": 7481 }, { "epoch": 2.9393924425784146, "grad_norm": 0.46044036455758336, "learning_rate": 4.762481048852931e-06, "loss": 0.5176, "step": 7482 }, { "epoch": 2.939787601877007, "grad_norm": 0.4559104153811503, "learning_rate": 4.762414391681753e-06, "loss": 0.5117, "step": 7483 }, { "epoch": 2.940182761175599, "grad_norm": 0.4345018162819212, "learning_rate": 4.762347725625185e-06, "loss": 0.4939, "step": 7484 }, { "epoch": 2.9405779204741913, "grad_norm": 0.44329288256437194, "learning_rate": 4.7622810506834885e-06, "loss": 0.4959, "step": 7485 }, { "epoch": 2.9409730797727835, "grad_norm": 0.4719182053165318, "learning_rate": 4.762214366856925e-06, "loss": 0.5199, "step": 7486 }, { "epoch": 2.941368239071376, "grad_norm": 0.500449067523404, "learning_rate": 4.762147674145759e-06, "loss": 0.5036, "step": 7487 }, { "epoch": 2.941763398369968, "grad_norm": 0.44559700179627093, "learning_rate": 4.762080972550249e-06, "loss": 0.5127, "step": 7488 }, { "epoch": 2.9421585576685603, "grad_norm": 0.45054565116407974, "learning_rate": 4.762014262070659e-06, "loss": 0.5329, "step": 7489 }, { "epoch": 2.9425537169671525, "grad_norm": 0.4530812436143374, "learning_rate": 4.761947542707251e-06, "loss": 0.4968, "step": 7490 }, { "epoch": 2.9429488762657448, "grad_norm": 0.45123498001248574, "learning_rate": 4.761880814460286e-06, "loss": 0.5331, "step": 7491 }, { "epoch": 2.943344035564337, "grad_norm": 0.46119671535259266, "learning_rate": 4.761814077330027e-06, "loss": 0.5299, "step": 7492 }, { "epoch": 2.9437391948629292, "grad_norm": 0.4371101220621986, "learning_rate": 4.7617473313167365e-06, "loss": 0.4896, "step": 7493 }, { "epoch": 2.9441343541615215, "grad_norm": 0.45650104982890227, "learning_rate": 4.761680576420674e-06, "loss": 0.5266, "step": 7494 }, { "epoch": 2.9445295134601137, "grad_norm": 0.45022327882961727, "learning_rate": 4.761613812642105e-06, "loss": 0.5307, "step": 7495 }, { "epoch": 2.944924672758706, "grad_norm": 0.45404058586100055, "learning_rate": 4.76154703998129e-06, "loss": 0.5091, "step": 7496 }, { "epoch": 2.945319832057298, "grad_norm": 0.45618375047378157, "learning_rate": 4.761480258438491e-06, "loss": 0.5167, "step": 7497 }, { "epoch": 2.9457149913558904, "grad_norm": 0.44164866519004375, "learning_rate": 4.761413468013972e-06, "loss": 0.4942, "step": 7498 }, { "epoch": 2.9461101506544827, "grad_norm": 0.43860964167840893, "learning_rate": 4.761346668707993e-06, "loss": 0.5087, "step": 7499 }, { "epoch": 2.946505309953075, "grad_norm": 0.4453511072236731, "learning_rate": 4.7612798605208175e-06, "loss": 0.5224, "step": 7500 }, { "epoch": 2.946900469251667, "grad_norm": 0.44709440039596826, "learning_rate": 4.761213043452708e-06, "loss": 0.5303, "step": 7501 }, { "epoch": 2.9472956285502594, "grad_norm": 0.454881212603844, "learning_rate": 4.761146217503927e-06, "loss": 0.523, "step": 7502 }, { "epoch": 2.9476907878488516, "grad_norm": 0.43851122242734974, "learning_rate": 4.761079382674737e-06, "loss": 0.5026, "step": 7503 }, { "epoch": 2.948085947147444, "grad_norm": 0.47892329084918284, "learning_rate": 4.761012538965399e-06, "loss": 0.5178, "step": 7504 }, { "epoch": 2.948481106446036, "grad_norm": 0.4698801672891979, "learning_rate": 4.760945686376178e-06, "loss": 0.5098, "step": 7505 }, { "epoch": 2.9488762657446284, "grad_norm": 0.48150219567951025, "learning_rate": 4.760878824907335e-06, "loss": 0.4947, "step": 7506 }, { "epoch": 2.9492714250432206, "grad_norm": 0.4417389041761658, "learning_rate": 4.7608119545591326e-06, "loss": 0.5044, "step": 7507 }, { "epoch": 2.949666584341813, "grad_norm": 0.44156128051674065, "learning_rate": 4.760745075331833e-06, "loss": 0.4993, "step": 7508 }, { "epoch": 2.950061743640405, "grad_norm": 0.43631000583026247, "learning_rate": 4.7606781872257e-06, "loss": 0.4978, "step": 7509 }, { "epoch": 2.9504569029389973, "grad_norm": 0.4570064542158102, "learning_rate": 4.760611290240996e-06, "loss": 0.5007, "step": 7510 }, { "epoch": 2.9508520622375896, "grad_norm": 0.45615671034916505, "learning_rate": 4.760544384377984e-06, "loss": 0.4908, "step": 7511 }, { "epoch": 2.951247221536182, "grad_norm": 0.4343490552276528, "learning_rate": 4.760477469636926e-06, "loss": 0.4896, "step": 7512 }, { "epoch": 2.951642380834774, "grad_norm": 0.4459408199880296, "learning_rate": 4.760410546018085e-06, "loss": 0.5091, "step": 7513 }, { "epoch": 2.9520375401333663, "grad_norm": 0.4636503598906805, "learning_rate": 4.760343613521724e-06, "loss": 0.4895, "step": 7514 }, { "epoch": 2.9524326994319585, "grad_norm": 0.4473832087140672, "learning_rate": 4.7602766721481055e-06, "loss": 0.5059, "step": 7515 }, { "epoch": 2.9528278587305508, "grad_norm": 0.45903466878021015, "learning_rate": 4.760209721897493e-06, "loss": 0.5259, "step": 7516 }, { "epoch": 2.953223018029143, "grad_norm": 0.4389672467155343, "learning_rate": 4.76014276277015e-06, "loss": 0.5145, "step": 7517 }, { "epoch": 2.9536181773277352, "grad_norm": 0.44946325074519894, "learning_rate": 4.760075794766338e-06, "loss": 0.5092, "step": 7518 }, { "epoch": 2.9540133366263275, "grad_norm": 0.44012358398055595, "learning_rate": 4.76000881788632e-06, "loss": 0.4943, "step": 7519 }, { "epoch": 2.9544084959249197, "grad_norm": 0.45946199113007924, "learning_rate": 4.75994183213036e-06, "loss": 0.5014, "step": 7520 }, { "epoch": 2.954803655223512, "grad_norm": 0.45502527214353133, "learning_rate": 4.759874837498721e-06, "loss": 0.4964, "step": 7521 }, { "epoch": 2.955198814522104, "grad_norm": 0.465012979637925, "learning_rate": 4.759807833991667e-06, "loss": 0.4897, "step": 7522 }, { "epoch": 2.9555939738206964, "grad_norm": 0.4467396954982866, "learning_rate": 4.759740821609459e-06, "loss": 0.5003, "step": 7523 }, { "epoch": 2.9559891331192887, "grad_norm": 0.4425638158476036, "learning_rate": 4.759673800352362e-06, "loss": 0.4933, "step": 7524 }, { "epoch": 2.956384292417881, "grad_norm": 0.4616979621992214, "learning_rate": 4.759606770220638e-06, "loss": 0.5142, "step": 7525 }, { "epoch": 2.956779451716473, "grad_norm": 0.4722218812872345, "learning_rate": 4.759539731214549e-06, "loss": 0.5087, "step": 7526 }, { "epoch": 2.9571746110150654, "grad_norm": 0.439973697685221, "learning_rate": 4.759472683334362e-06, "loss": 0.4875, "step": 7527 }, { "epoch": 2.9575697703136576, "grad_norm": 0.4571911157675649, "learning_rate": 4.759405626580338e-06, "loss": 0.513, "step": 7528 }, { "epoch": 2.95796492961225, "grad_norm": 0.4657839715911801, "learning_rate": 4.7593385609527406e-06, "loss": 0.5174, "step": 7529 }, { "epoch": 2.958360088910842, "grad_norm": 0.4530863288631989, "learning_rate": 4.759271486451833e-06, "loss": 0.4956, "step": 7530 }, { "epoch": 2.9587552482094344, "grad_norm": 0.4722880955783831, "learning_rate": 4.759204403077879e-06, "loss": 0.5187, "step": 7531 }, { "epoch": 2.9591504075080266, "grad_norm": 0.4567472933113083, "learning_rate": 4.7591373108311425e-06, "loss": 0.5272, "step": 7532 }, { "epoch": 2.959545566806619, "grad_norm": 0.46118458955762015, "learning_rate": 4.759070209711886e-06, "loss": 0.515, "step": 7533 }, { "epoch": 2.959940726105211, "grad_norm": 0.4337420420723696, "learning_rate": 4.759003099720373e-06, "loss": 0.4868, "step": 7534 }, { "epoch": 2.9603358854038033, "grad_norm": 0.4597390934949171, "learning_rate": 4.758935980856868e-06, "loss": 0.524, "step": 7535 }, { "epoch": 2.9607310447023956, "grad_norm": 0.4520311626789291, "learning_rate": 4.758868853121635e-06, "loss": 0.5164, "step": 7536 }, { "epoch": 2.961126204000988, "grad_norm": 0.4604721101236667, "learning_rate": 4.758801716514935e-06, "loss": 0.5022, "step": 7537 }, { "epoch": 2.96152136329958, "grad_norm": 0.4419481364317015, "learning_rate": 4.758734571037035e-06, "loss": 0.4752, "step": 7538 }, { "epoch": 2.9619165225981723, "grad_norm": 0.4449236247292902, "learning_rate": 4.758667416688197e-06, "loss": 0.5091, "step": 7539 }, { "epoch": 2.9623116818967645, "grad_norm": 0.4553128748466083, "learning_rate": 4.758600253468684e-06, "loss": 0.5, "step": 7540 }, { "epoch": 2.9627068411953568, "grad_norm": 0.46612998739450867, "learning_rate": 4.758533081378762e-06, "loss": 0.5018, "step": 7541 }, { "epoch": 2.963102000493949, "grad_norm": 0.46551416452290106, "learning_rate": 4.7584659004186924e-06, "loss": 0.508, "step": 7542 }, { "epoch": 2.9634971597925412, "grad_norm": 0.452825185453952, "learning_rate": 4.758398710588741e-06, "loss": 0.4992, "step": 7543 }, { "epoch": 2.9638923190911335, "grad_norm": 0.4604901561144933, "learning_rate": 4.758331511889171e-06, "loss": 0.5071, "step": 7544 }, { "epoch": 2.9642874783897257, "grad_norm": 0.4700510709171447, "learning_rate": 4.7582643043202445e-06, "loss": 0.5159, "step": 7545 }, { "epoch": 2.964682637688318, "grad_norm": 0.4702968346285271, "learning_rate": 4.758197087882228e-06, "loss": 0.5211, "step": 7546 }, { "epoch": 2.96507779698691, "grad_norm": 0.45518608070060007, "learning_rate": 4.758129862575386e-06, "loss": 0.4996, "step": 7547 }, { "epoch": 2.9654729562855024, "grad_norm": 0.46985798766213305, "learning_rate": 4.758062628399979e-06, "loss": 0.5043, "step": 7548 }, { "epoch": 2.9658681155840947, "grad_norm": 0.47941413887771617, "learning_rate": 4.7579953853562744e-06, "loss": 0.5225, "step": 7549 }, { "epoch": 2.966263274882687, "grad_norm": 0.44370397690809943, "learning_rate": 4.757928133444534e-06, "loss": 0.508, "step": 7550 }, { "epoch": 2.966658434181279, "grad_norm": 0.5281064344206505, "learning_rate": 4.757860872665024e-06, "loss": 0.5019, "step": 7551 }, { "epoch": 2.9670535934798714, "grad_norm": 0.46395863123158015, "learning_rate": 4.757793603018007e-06, "loss": 0.5176, "step": 7552 }, { "epoch": 2.9674487527784636, "grad_norm": 0.4640272614666952, "learning_rate": 4.757726324503749e-06, "loss": 0.5029, "step": 7553 }, { "epoch": 2.967843912077056, "grad_norm": 0.443178090717011, "learning_rate": 4.757659037122511e-06, "loss": 0.5143, "step": 7554 }, { "epoch": 2.968239071375648, "grad_norm": 0.4562032262581782, "learning_rate": 4.75759174087456e-06, "loss": 0.4894, "step": 7555 }, { "epoch": 2.9686342306742404, "grad_norm": 0.4505861613127547, "learning_rate": 4.75752443576016e-06, "loss": 0.511, "step": 7556 }, { "epoch": 2.9690293899728326, "grad_norm": 0.46040086407622155, "learning_rate": 4.757457121779575e-06, "loss": 0.5134, "step": 7557 }, { "epoch": 2.969424549271425, "grad_norm": 0.46009994634444334, "learning_rate": 4.757389798933069e-06, "loss": 0.4881, "step": 7558 }, { "epoch": 2.969819708570017, "grad_norm": 0.48161543752949787, "learning_rate": 4.757322467220906e-06, "loss": 0.4888, "step": 7559 }, { "epoch": 2.9702148678686093, "grad_norm": 0.4468364867174091, "learning_rate": 4.7572551266433506e-06, "loss": 0.5166, "step": 7560 }, { "epoch": 2.9706100271672016, "grad_norm": 0.451144658830222, "learning_rate": 4.757187777200669e-06, "loss": 0.497, "step": 7561 }, { "epoch": 2.971005186465794, "grad_norm": 0.4435308430537957, "learning_rate": 4.757120418893124e-06, "loss": 0.5151, "step": 7562 }, { "epoch": 2.971400345764386, "grad_norm": 0.4757327743330427, "learning_rate": 4.7570530517209815e-06, "loss": 0.5268, "step": 7563 }, { "epoch": 2.9717955050629783, "grad_norm": 0.46010095557816083, "learning_rate": 4.756985675684504e-06, "loss": 0.5048, "step": 7564 }, { "epoch": 2.9721906643615705, "grad_norm": 0.4457407014760488, "learning_rate": 4.756918290783957e-06, "loss": 0.5066, "step": 7565 }, { "epoch": 2.9725858236601628, "grad_norm": 0.4797320041985171, "learning_rate": 4.756850897019606e-06, "loss": 0.5166, "step": 7566 }, { "epoch": 2.972980982958755, "grad_norm": 0.44199710687789934, "learning_rate": 4.756783494391716e-06, "loss": 0.487, "step": 7567 }, { "epoch": 2.9733761422573473, "grad_norm": 0.4426609628978589, "learning_rate": 4.7567160829005496e-06, "loss": 0.5036, "step": 7568 }, { "epoch": 2.9737713015559395, "grad_norm": 0.4505400917951688, "learning_rate": 4.756648662546373e-06, "loss": 0.5125, "step": 7569 }, { "epoch": 2.9741664608545317, "grad_norm": 0.4676898947705984, "learning_rate": 4.756581233329451e-06, "loss": 0.529, "step": 7570 }, { "epoch": 2.974561620153124, "grad_norm": 0.44793604608650994, "learning_rate": 4.756513795250048e-06, "loss": 0.5314, "step": 7571 }, { "epoch": 2.974956779451716, "grad_norm": 0.4257182384070774, "learning_rate": 4.756446348308429e-06, "loss": 0.4938, "step": 7572 }, { "epoch": 2.9753519387503085, "grad_norm": 0.4541639462382242, "learning_rate": 4.7563788925048596e-06, "loss": 0.5049, "step": 7573 }, { "epoch": 2.9757470980489007, "grad_norm": 0.45049135883048075, "learning_rate": 4.7563114278396025e-06, "loss": 0.4962, "step": 7574 }, { "epoch": 2.976142257347493, "grad_norm": 0.44642306301580476, "learning_rate": 4.756243954312926e-06, "loss": 0.4906, "step": 7575 }, { "epoch": 2.976537416646085, "grad_norm": 0.4551265744121246, "learning_rate": 4.756176471925092e-06, "loss": 0.5114, "step": 7576 }, { "epoch": 2.976932575944678, "grad_norm": 0.44159910731274965, "learning_rate": 4.756108980676367e-06, "loss": 0.5182, "step": 7577 }, { "epoch": 2.97732773524327, "grad_norm": 0.43027421694295137, "learning_rate": 4.756041480567017e-06, "loss": 0.4889, "step": 7578 }, { "epoch": 2.9777228945418623, "grad_norm": 0.4337873859510924, "learning_rate": 4.755973971597305e-06, "loss": 0.4941, "step": 7579 }, { "epoch": 2.9781180538404546, "grad_norm": 0.45031945697341647, "learning_rate": 4.7559064537674975e-06, "loss": 0.4998, "step": 7580 }, { "epoch": 2.978513213139047, "grad_norm": 0.4712226590155607, "learning_rate": 4.755838927077859e-06, "loss": 0.5111, "step": 7581 }, { "epoch": 2.978908372437639, "grad_norm": 0.4409693860661984, "learning_rate": 4.755771391528655e-06, "loss": 0.5069, "step": 7582 }, { "epoch": 2.9793035317362313, "grad_norm": 0.4640142389222412, "learning_rate": 4.755703847120152e-06, "loss": 0.5164, "step": 7583 }, { "epoch": 2.9796986910348235, "grad_norm": 0.45381043122965947, "learning_rate": 4.7556362938526124e-06, "loss": 0.5041, "step": 7584 }, { "epoch": 2.980093850333416, "grad_norm": 0.4587590971637779, "learning_rate": 4.755568731726304e-06, "loss": 0.5203, "step": 7585 }, { "epoch": 2.980489009632008, "grad_norm": 0.4644178501879429, "learning_rate": 4.755501160741491e-06, "loss": 0.5328, "step": 7586 }, { "epoch": 2.9808841689306003, "grad_norm": 0.44776614224075106, "learning_rate": 4.755433580898439e-06, "loss": 0.5036, "step": 7587 }, { "epoch": 2.9812793282291925, "grad_norm": 0.43844012754058254, "learning_rate": 4.7553659921974134e-06, "loss": 0.4832, "step": 7588 }, { "epoch": 2.9816744875277847, "grad_norm": 0.45286284274574534, "learning_rate": 4.75529839463868e-06, "loss": 0.5185, "step": 7589 }, { "epoch": 2.982069646826377, "grad_norm": 0.46049052268894997, "learning_rate": 4.755230788222504e-06, "loss": 0.5378, "step": 7590 }, { "epoch": 2.9824648061249692, "grad_norm": 0.4485615523268441, "learning_rate": 4.755163172949151e-06, "loss": 0.5009, "step": 7591 }, { "epoch": 2.9828599654235615, "grad_norm": 0.45034777636007545, "learning_rate": 4.755095548818886e-06, "loss": 0.5236, "step": 7592 }, { "epoch": 2.9832551247221537, "grad_norm": 0.4531560958419335, "learning_rate": 4.755027915831975e-06, "loss": 0.5101, "step": 7593 }, { "epoch": 2.983650284020746, "grad_norm": 0.452406575756383, "learning_rate": 4.754960273988684e-06, "loss": 0.51, "step": 7594 }, { "epoch": 2.984045443319338, "grad_norm": 0.4404650508761369, "learning_rate": 4.754892623289279e-06, "loss": 0.498, "step": 7595 }, { "epoch": 2.9844406026179304, "grad_norm": 0.45105701449061847, "learning_rate": 4.754824963734024e-06, "loss": 0.5247, "step": 7596 }, { "epoch": 2.9848357619165227, "grad_norm": 0.4544785032506456, "learning_rate": 4.754757295323186e-06, "loss": 0.5006, "step": 7597 }, { "epoch": 2.985230921215115, "grad_norm": 0.44648615459844027, "learning_rate": 4.7546896180570305e-06, "loss": 0.5139, "step": 7598 }, { "epoch": 2.985626080513707, "grad_norm": 0.46035057899107057, "learning_rate": 4.754621931935823e-06, "loss": 0.5129, "step": 7599 }, { "epoch": 2.9860212398122994, "grad_norm": 0.468475157789722, "learning_rate": 4.75455423695983e-06, "loss": 0.5284, "step": 7600 }, { "epoch": 2.9864163991108916, "grad_norm": 0.455654479720452, "learning_rate": 4.7544865331293175e-06, "loss": 0.5141, "step": 7601 }, { "epoch": 2.986811558409484, "grad_norm": 0.43877314753613916, "learning_rate": 4.75441882044455e-06, "loss": 0.5102, "step": 7602 }, { "epoch": 2.987206717708076, "grad_norm": 0.44066884059445266, "learning_rate": 4.754351098905795e-06, "loss": 0.5096, "step": 7603 }, { "epoch": 2.9876018770066683, "grad_norm": 0.455561747753586, "learning_rate": 4.754283368513317e-06, "loss": 0.5194, "step": 7604 }, { "epoch": 2.9879970363052606, "grad_norm": 0.4532632517976643, "learning_rate": 4.754215629267384e-06, "loss": 0.5145, "step": 7605 }, { "epoch": 2.988392195603853, "grad_norm": 0.4469539479625391, "learning_rate": 4.75414788116826e-06, "loss": 0.5018, "step": 7606 }, { "epoch": 2.988787354902445, "grad_norm": 0.45541116390015846, "learning_rate": 4.754080124216212e-06, "loss": 0.4859, "step": 7607 }, { "epoch": 2.9891825142010373, "grad_norm": 0.4496311984557622, "learning_rate": 4.754012358411506e-06, "loss": 0.5018, "step": 7608 }, { "epoch": 2.9895776734996296, "grad_norm": 0.4530431443004678, "learning_rate": 4.753944583754408e-06, "loss": 0.489, "step": 7609 }, { "epoch": 2.989972832798222, "grad_norm": 0.4475247994930158, "learning_rate": 4.753876800245186e-06, "loss": 0.4946, "step": 7610 }, { "epoch": 2.990367992096814, "grad_norm": 0.44677019584511535, "learning_rate": 4.753809007884103e-06, "loss": 0.5155, "step": 7611 }, { "epoch": 2.9907631513954063, "grad_norm": 0.4491717613748345, "learning_rate": 4.753741206671426e-06, "loss": 0.489, "step": 7612 }, { "epoch": 2.9911583106939985, "grad_norm": 0.4718935616370409, "learning_rate": 4.753673396607423e-06, "loss": 0.5084, "step": 7613 }, { "epoch": 2.9915534699925908, "grad_norm": 0.4526066949527833, "learning_rate": 4.7536055776923596e-06, "loss": 0.5151, "step": 7614 }, { "epoch": 2.991948629291183, "grad_norm": 0.4506375450777332, "learning_rate": 4.753537749926502e-06, "loss": 0.5064, "step": 7615 }, { "epoch": 2.9923437885897752, "grad_norm": 0.45034114642880246, "learning_rate": 4.753469913310116e-06, "loss": 0.5166, "step": 7616 }, { "epoch": 2.9927389478883675, "grad_norm": 0.4407471329242633, "learning_rate": 4.753402067843469e-06, "loss": 0.5098, "step": 7617 }, { "epoch": 2.9931341071869597, "grad_norm": 0.4761783641018543, "learning_rate": 4.753334213526827e-06, "loss": 0.5045, "step": 7618 }, { "epoch": 2.993529266485552, "grad_norm": 0.4549004819674926, "learning_rate": 4.7532663503604566e-06, "loss": 0.5019, "step": 7619 }, { "epoch": 2.993924425784144, "grad_norm": 0.43524384467655386, "learning_rate": 4.753198478344624e-06, "loss": 0.4918, "step": 7620 }, { "epoch": 2.9943195850827364, "grad_norm": 0.45140783826694003, "learning_rate": 4.753130597479596e-06, "loss": 0.5067, "step": 7621 }, { "epoch": 2.9947147443813287, "grad_norm": 0.4481553706282452, "learning_rate": 4.753062707765639e-06, "loss": 0.5192, "step": 7622 }, { "epoch": 2.995109903679921, "grad_norm": 0.44777175097676636, "learning_rate": 4.7529948092030204e-06, "loss": 0.5129, "step": 7623 }, { "epoch": 2.995505062978513, "grad_norm": 0.4479442802590514, "learning_rate": 4.752926901792006e-06, "loss": 0.4877, "step": 7624 }, { "epoch": 2.9959002222771054, "grad_norm": 0.446793554178131, "learning_rate": 4.752858985532862e-06, "loss": 0.5011, "step": 7625 }, { "epoch": 2.9962953815756976, "grad_norm": 0.45259107436589663, "learning_rate": 4.7527910604258575e-06, "loss": 0.4992, "step": 7626 }, { "epoch": 2.99669054087429, "grad_norm": 0.4445697317032285, "learning_rate": 4.752723126471257e-06, "loss": 0.5161, "step": 7627 }, { "epoch": 2.997085700172882, "grad_norm": 0.46322756286869876, "learning_rate": 4.752655183669327e-06, "loss": 0.5021, "step": 7628 }, { "epoch": 2.9974808594714744, "grad_norm": 0.4483882532172694, "learning_rate": 4.752587232020337e-06, "loss": 0.4896, "step": 7629 }, { "epoch": 2.9978760187700666, "grad_norm": 0.4474447067182486, "learning_rate": 4.7525192715245505e-06, "loss": 0.5, "step": 7630 }, { "epoch": 2.998271178068659, "grad_norm": 0.46092387125951806, "learning_rate": 4.752451302182237e-06, "loss": 0.5081, "step": 7631 }, { "epoch": 2.998666337367251, "grad_norm": 0.44451538632392057, "learning_rate": 4.752383323993663e-06, "loss": 0.506, "step": 7632 }, { "epoch": 2.9990614966658433, "grad_norm": 0.44559816658293144, "learning_rate": 4.752315336959094e-06, "loss": 0.4877, "step": 7633 }, { "epoch": 2.9994566559644356, "grad_norm": 0.4729115645935803, "learning_rate": 4.752247341078798e-06, "loss": 0.5108, "step": 7634 }, { "epoch": 2.999851815263028, "grad_norm": 0.4634195899356373, "learning_rate": 4.752179336353043e-06, "loss": 0.507, "step": 7635 }, { "epoch": 3.0002469745616205, "grad_norm": 0.44336385846437987, "learning_rate": 4.752111322782095e-06, "loss": 0.4778, "step": 7636 }, { "epoch": 3.0006421338602127, "grad_norm": 0.4516001642956355, "learning_rate": 4.752043300366222e-06, "loss": 0.5019, "step": 7637 }, { "epoch": 3.001037293158805, "grad_norm": 0.45717790797056945, "learning_rate": 4.751975269105689e-06, "loss": 0.5165, "step": 7638 }, { "epoch": 3.001432452457397, "grad_norm": 0.4584460619343716, "learning_rate": 4.751907229000765e-06, "loss": 0.5139, "step": 7639 }, { "epoch": 3.0018276117559894, "grad_norm": 0.4505928965434931, "learning_rate": 4.751839180051717e-06, "loss": 0.4879, "step": 7640 }, { "epoch": 3.0022227710545817, "grad_norm": 0.4550585108130313, "learning_rate": 4.751771122258812e-06, "loss": 0.518, "step": 7641 }, { "epoch": 3.002617930353174, "grad_norm": 0.4641841812808131, "learning_rate": 4.751703055622317e-06, "loss": 0.5098, "step": 7642 }, { "epoch": 3.003013089651766, "grad_norm": 0.4619224304919582, "learning_rate": 4.7516349801424995e-06, "loss": 0.492, "step": 7643 }, { "epoch": 3.0034082489503584, "grad_norm": 0.45126132444360667, "learning_rate": 4.751566895819628e-06, "loss": 0.5081, "step": 7644 }, { "epoch": 3.0038034082489506, "grad_norm": 0.4397768300217093, "learning_rate": 4.7514988026539686e-06, "loss": 0.4917, "step": 7645 }, { "epoch": 3.004198567547543, "grad_norm": 0.45619992289347067, "learning_rate": 4.75143070064579e-06, "loss": 0.4798, "step": 7646 }, { "epoch": 3.004593726846135, "grad_norm": 0.4498728600811053, "learning_rate": 4.751362589795358e-06, "loss": 0.4953, "step": 7647 }, { "epoch": 3.0049888861447274, "grad_norm": 0.451458140159821, "learning_rate": 4.751294470102941e-06, "loss": 0.4945, "step": 7648 }, { "epoch": 3.0053840454433196, "grad_norm": 0.44656047500039336, "learning_rate": 4.751226341568806e-06, "loss": 0.4945, "step": 7649 }, { "epoch": 3.005779204741912, "grad_norm": 0.44674554953771023, "learning_rate": 4.75115820419322e-06, "loss": 0.5066, "step": 7650 }, { "epoch": 3.006174364040504, "grad_norm": 0.5042336016390171, "learning_rate": 4.751090057976453e-06, "loss": 0.5143, "step": 7651 }, { "epoch": 3.0065695233390963, "grad_norm": 0.4703862669838775, "learning_rate": 4.751021902918771e-06, "loss": 0.5049, "step": 7652 }, { "epoch": 3.0069646826376886, "grad_norm": 0.44280468476812773, "learning_rate": 4.750953739020441e-06, "loss": 0.5, "step": 7653 }, { "epoch": 3.007359841936281, "grad_norm": 0.4423726279571279, "learning_rate": 4.7508855662817325e-06, "loss": 0.5138, "step": 7654 }, { "epoch": 3.007755001234873, "grad_norm": 0.4668415881963999, "learning_rate": 4.750817384702912e-06, "loss": 0.5075, "step": 7655 }, { "epoch": 3.0081501605334653, "grad_norm": 0.4395316160725124, "learning_rate": 4.750749194284248e-06, "loss": 0.4892, "step": 7656 }, { "epoch": 3.0085453198320575, "grad_norm": 0.4523097575996378, "learning_rate": 4.750680995026007e-06, "loss": 0.5155, "step": 7657 }, { "epoch": 3.0089404791306498, "grad_norm": 0.43939023061801286, "learning_rate": 4.7506127869284585e-06, "loss": 0.4957, "step": 7658 }, { "epoch": 3.009335638429242, "grad_norm": 0.46277152916188, "learning_rate": 4.7505445699918695e-06, "loss": 0.4936, "step": 7659 }, { "epoch": 3.0097307977278343, "grad_norm": 0.46615632583881483, "learning_rate": 4.750476344216508e-06, "loss": 0.5201, "step": 7660 }, { "epoch": 3.0101259570264265, "grad_norm": 0.4914314812774924, "learning_rate": 4.750408109602641e-06, "loss": 0.5254, "step": 7661 }, { "epoch": 3.0105211163250187, "grad_norm": 0.459305044150223, "learning_rate": 4.7503398661505386e-06, "loss": 0.5076, "step": 7662 }, { "epoch": 3.010916275623611, "grad_norm": 0.46075158979928715, "learning_rate": 4.750271613860468e-06, "loss": 0.5264, "step": 7663 }, { "epoch": 3.011311434922203, "grad_norm": 0.450653814827898, "learning_rate": 4.750203352732696e-06, "loss": 0.5151, "step": 7664 }, { "epoch": 3.0117065942207955, "grad_norm": 0.458283864427654, "learning_rate": 4.750135082767492e-06, "loss": 0.5008, "step": 7665 }, { "epoch": 3.0121017535193877, "grad_norm": 0.4685611930993023, "learning_rate": 4.750066803965124e-06, "loss": 0.5187, "step": 7666 }, { "epoch": 3.01249691281798, "grad_norm": 0.47791128196843446, "learning_rate": 4.749998516325859e-06, "loss": 0.5019, "step": 7667 }, { "epoch": 3.012892072116572, "grad_norm": 0.4872509174602875, "learning_rate": 4.749930219849967e-06, "loss": 0.5231, "step": 7668 }, { "epoch": 3.0132872314151644, "grad_norm": 0.4684674472270326, "learning_rate": 4.749861914537715e-06, "loss": 0.5076, "step": 7669 }, { "epoch": 3.0136823907137567, "grad_norm": 0.4421540054512149, "learning_rate": 4.749793600389372e-06, "loss": 0.528, "step": 7670 }, { "epoch": 3.014077550012349, "grad_norm": 0.46434385398269223, "learning_rate": 4.749725277405205e-06, "loss": 0.5189, "step": 7671 }, { "epoch": 3.014472709310941, "grad_norm": 0.45238786433034117, "learning_rate": 4.749656945585484e-06, "loss": 0.4985, "step": 7672 }, { "epoch": 3.0148678686095334, "grad_norm": 0.4740826575920035, "learning_rate": 4.749588604930476e-06, "loss": 0.4961, "step": 7673 }, { "epoch": 3.0152630279081256, "grad_norm": 0.4356051895472756, "learning_rate": 4.749520255440451e-06, "loss": 0.5006, "step": 7674 }, { "epoch": 3.015658187206718, "grad_norm": 0.44354071567582887, "learning_rate": 4.749451897115675e-06, "loss": 0.4959, "step": 7675 }, { "epoch": 3.01605334650531, "grad_norm": 0.4561647234991173, "learning_rate": 4.749383529956419e-06, "loss": 0.4928, "step": 7676 }, { "epoch": 3.0164485058039023, "grad_norm": 0.4883457852318272, "learning_rate": 4.74931515396295e-06, "loss": 0.5144, "step": 7677 }, { "epoch": 3.0168436651024946, "grad_norm": 0.44882770949870965, "learning_rate": 4.749246769135537e-06, "loss": 0.4993, "step": 7678 }, { "epoch": 3.000222277105458, "grad_norm": 0.7010342589000016, "learning_rate": 4.749178375474448e-06, "loss": 0.473, "step": 7679 }, { "epoch": 3.0006174364040503, "grad_norm": 0.915590098727133, "learning_rate": 4.749109972979953e-06, "loss": 0.45, "step": 7680 }, { "epoch": 3.0010125957026426, "grad_norm": 0.6897379840405359, "learning_rate": 4.74904156165232e-06, "loss": 0.4431, "step": 7681 }, { "epoch": 3.001407755001235, "grad_norm": 0.5249346162970593, "learning_rate": 4.748973141491816e-06, "loss": 0.4328, "step": 7682 }, { "epoch": 3.001802914299827, "grad_norm": 0.9709224152272217, "learning_rate": 4.748904712498712e-06, "loss": 0.4507, "step": 7683 }, { "epoch": 3.0021980735984193, "grad_norm": 0.9257083172881498, "learning_rate": 4.748836274673275e-06, "loss": 0.4171, "step": 7684 }, { "epoch": 3.0025932328970115, "grad_norm": 0.6654293170865759, "learning_rate": 4.748767828015777e-06, "loss": 0.4453, "step": 7685 }, { "epoch": 3.0029883921956038, "grad_norm": 0.5713696408874206, "learning_rate": 4.7486993725264824e-06, "loss": 0.4407, "step": 7686 }, { "epoch": 3.003383551494196, "grad_norm": 0.6629984546742934, "learning_rate": 4.748630908205663e-06, "loss": 0.4266, "step": 7687 }, { "epoch": 3.0037787107927882, "grad_norm": 0.7031964288055295, "learning_rate": 4.748562435053587e-06, "loss": 0.4571, "step": 7688 }, { "epoch": 3.0041738700913805, "grad_norm": 0.8005034174097257, "learning_rate": 4.748493953070522e-06, "loss": 0.441, "step": 7689 }, { "epoch": 3.0045690293899727, "grad_norm": 0.5656503152080794, "learning_rate": 4.748425462256739e-06, "loss": 0.4482, "step": 7690 }, { "epoch": 3.004964188688565, "grad_norm": 0.5805979657928287, "learning_rate": 4.748356962612506e-06, "loss": 0.4308, "step": 7691 }, { "epoch": 3.005359347987157, "grad_norm": 0.6569090685400837, "learning_rate": 4.7482884541380915e-06, "loss": 0.4519, "step": 7692 }, { "epoch": 3.0057545072857494, "grad_norm": 0.6217734324861036, "learning_rate": 4.748219936833766e-06, "loss": 0.4349, "step": 7693 }, { "epoch": 3.0061496665843417, "grad_norm": 0.5796522323129869, "learning_rate": 4.748151410699798e-06, "loss": 0.4479, "step": 7694 }, { "epoch": 3.006544825882934, "grad_norm": 0.5356679945336663, "learning_rate": 4.7480828757364555e-06, "loss": 0.4209, "step": 7695 }, { "epoch": 3.006939985181526, "grad_norm": 0.5544815975849889, "learning_rate": 4.7480143319440094e-06, "loss": 0.4321, "step": 7696 }, { "epoch": 3.0073351444801184, "grad_norm": 0.5615701121311352, "learning_rate": 4.747945779322727e-06, "loss": 0.4498, "step": 7697 }, { "epoch": 3.0077303037787106, "grad_norm": 0.5591511570043203, "learning_rate": 4.747877217872879e-06, "loss": 0.4434, "step": 7698 }, { "epoch": 3.008125463077303, "grad_norm": 0.5231502732146598, "learning_rate": 4.747808647594735e-06, "loss": 0.4454, "step": 7699 }, { "epoch": 3.008520622375895, "grad_norm": 0.5749999982521911, "learning_rate": 4.747740068488563e-06, "loss": 0.4335, "step": 7700 }, { "epoch": 3.0089157816744874, "grad_norm": 0.5594741958286711, "learning_rate": 4.747671480554633e-06, "loss": 0.4479, "step": 7701 }, { "epoch": 3.0093109409730796, "grad_norm": 0.5229530513831508, "learning_rate": 4.747602883793215e-06, "loss": 0.4206, "step": 7702 }, { "epoch": 3.009706100271672, "grad_norm": 0.5269098553306883, "learning_rate": 4.747534278204576e-06, "loss": 0.4259, "step": 7703 }, { "epoch": 3.010101259570264, "grad_norm": 0.5743914914071026, "learning_rate": 4.747465663788989e-06, "loss": 0.4304, "step": 7704 }, { "epoch": 3.0104964188688563, "grad_norm": 0.5032330836219913, "learning_rate": 4.747397040546721e-06, "loss": 0.4369, "step": 7705 }, { "epoch": 3.0108915781674486, "grad_norm": 0.5170294237714139, "learning_rate": 4.747328408478042e-06, "loss": 0.442, "step": 7706 }, { "epoch": 3.011286737466041, "grad_norm": 0.5325099151723756, "learning_rate": 4.747259767583221e-06, "loss": 0.4423, "step": 7707 }, { "epoch": 3.011681896764633, "grad_norm": 0.5263448352266229, "learning_rate": 4.7471911178625285e-06, "loss": 0.4553, "step": 7708 }, { "epoch": 3.0120770560632253, "grad_norm": 0.5935074925411176, "learning_rate": 4.747122459316235e-06, "loss": 0.4524, "step": 7709 }, { "epoch": 3.0124722153618175, "grad_norm": 0.5162191886079706, "learning_rate": 4.747053791944607e-06, "loss": 0.4536, "step": 7710 }, { "epoch": 3.0128673746604098, "grad_norm": 0.48679105836545394, "learning_rate": 4.746985115747918e-06, "loss": 0.4128, "step": 7711 }, { "epoch": 3.013262533959002, "grad_norm": 0.5158017050260368, "learning_rate": 4.746916430726435e-06, "loss": 0.4413, "step": 7712 }, { "epoch": 3.0136576932575943, "grad_norm": 0.5255387088096938, "learning_rate": 4.746847736880429e-06, "loss": 0.4331, "step": 7713 }, { "epoch": 3.014052852556187, "grad_norm": 0.49833669861127644, "learning_rate": 4.746779034210169e-06, "loss": 0.4546, "step": 7714 }, { "epoch": 3.014448011854779, "grad_norm": 0.5013002795411056, "learning_rate": 4.746710322715926e-06, "loss": 0.4338, "step": 7715 }, { "epoch": 3.0148431711533714, "grad_norm": 0.47380997163306915, "learning_rate": 4.746641602397969e-06, "loss": 0.452, "step": 7716 }, { "epoch": 3.0152383304519637, "grad_norm": 0.48815835539987246, "learning_rate": 4.746572873256568e-06, "loss": 0.4325, "step": 7717 }, { "epoch": 3.015633489750556, "grad_norm": 0.5234825947101446, "learning_rate": 4.746504135291992e-06, "loss": 0.4627, "step": 7718 }, { "epoch": 3.016028649049148, "grad_norm": 0.46528150114040756, "learning_rate": 4.746435388504513e-06, "loss": 0.4135, "step": 7719 }, { "epoch": 3.0164238083477404, "grad_norm": 0.5186401009350103, "learning_rate": 4.746366632894399e-06, "loss": 0.4434, "step": 7720 }, { "epoch": 3.0168189676463326, "grad_norm": 0.48449546596605336, "learning_rate": 4.746297868461922e-06, "loss": 0.4275, "step": 7721 }, { "epoch": 3.017214126944925, "grad_norm": 0.4972566211579247, "learning_rate": 4.74622909520735e-06, "loss": 0.4352, "step": 7722 }, { "epoch": 3.017609286243517, "grad_norm": 0.49817262550108005, "learning_rate": 4.746160313130955e-06, "loss": 0.4427, "step": 7723 }, { "epoch": 3.0180044455421093, "grad_norm": 0.4807900493796438, "learning_rate": 4.746091522233006e-06, "loss": 0.4458, "step": 7724 }, { "epoch": 3.0183996048407016, "grad_norm": 0.486178408026208, "learning_rate": 4.746022722513772e-06, "loss": 0.4375, "step": 7725 }, { "epoch": 3.018794764139294, "grad_norm": 0.47609321433727503, "learning_rate": 4.745953913973526e-06, "loss": 0.439, "step": 7726 }, { "epoch": 3.019189923437886, "grad_norm": 0.48600695149007456, "learning_rate": 4.745885096612537e-06, "loss": 0.427, "step": 7727 }, { "epoch": 3.0195850827364783, "grad_norm": 0.509073376294911, "learning_rate": 4.745816270431075e-06, "loss": 0.4388, "step": 7728 }, { "epoch": 3.0199802420350705, "grad_norm": 0.5057289776617495, "learning_rate": 4.74574743542941e-06, "loss": 0.4388, "step": 7729 }, { "epoch": 3.020375401333663, "grad_norm": 0.506257402966116, "learning_rate": 4.745678591607813e-06, "loss": 0.4337, "step": 7730 }, { "epoch": 3.020770560632255, "grad_norm": 0.5051592228444768, "learning_rate": 4.745609738966554e-06, "loss": 0.4439, "step": 7731 }, { "epoch": 3.0211657199308473, "grad_norm": 0.5014567224089678, "learning_rate": 4.745540877505904e-06, "loss": 0.4341, "step": 7732 }, { "epoch": 3.0215608792294395, "grad_norm": 0.5016734580118337, "learning_rate": 4.745472007226133e-06, "loss": 0.437, "step": 7733 }, { "epoch": 3.0219560385280317, "grad_norm": 0.5017266865893028, "learning_rate": 4.7454031281275105e-06, "loss": 0.4297, "step": 7734 }, { "epoch": 3.022351197826624, "grad_norm": 0.4925195971174838, "learning_rate": 4.745334240210309e-06, "loss": 0.4397, "step": 7735 }, { "epoch": 3.022746357125216, "grad_norm": 0.47846300333645714, "learning_rate": 4.745265343474797e-06, "loss": 0.4587, "step": 7736 }, { "epoch": 3.0231415164238085, "grad_norm": 0.4619490312270768, "learning_rate": 4.745196437921247e-06, "loss": 0.4376, "step": 7737 }, { "epoch": 3.0235366757224007, "grad_norm": 0.4863155493658263, "learning_rate": 4.745127523549928e-06, "loss": 0.486, "step": 7738 }, { "epoch": 3.023931835020993, "grad_norm": 0.5009948111270373, "learning_rate": 4.7450586003611124e-06, "loss": 0.4433, "step": 7739 }, { "epoch": 3.024326994319585, "grad_norm": 0.49175225322505095, "learning_rate": 4.744989668355069e-06, "loss": 0.4466, "step": 7740 }, { "epoch": 3.0247221536181774, "grad_norm": 0.4997137132091148, "learning_rate": 4.744920727532069e-06, "loss": 0.4422, "step": 7741 }, { "epoch": 3.0251173129167697, "grad_norm": 0.49782214688948523, "learning_rate": 4.744851777892386e-06, "loss": 0.4215, "step": 7742 }, { "epoch": 3.025512472215362, "grad_norm": 0.4878799150602817, "learning_rate": 4.744782819436287e-06, "loss": 0.4689, "step": 7743 }, { "epoch": 3.025907631513954, "grad_norm": 0.4801197425799182, "learning_rate": 4.7447138521640435e-06, "loss": 0.4133, "step": 7744 }, { "epoch": 3.0263027908125464, "grad_norm": 0.5153722734142074, "learning_rate": 4.744644876075926e-06, "loss": 0.4328, "step": 7745 }, { "epoch": 3.0266979501111386, "grad_norm": 0.48115731489986924, "learning_rate": 4.744575891172209e-06, "loss": 0.4477, "step": 7746 }, { "epoch": 3.027093109409731, "grad_norm": 0.5030409292416705, "learning_rate": 4.74450689745316e-06, "loss": 0.458, "step": 7747 }, { "epoch": 3.027488268708323, "grad_norm": 0.47695935750014723, "learning_rate": 4.7444378949190505e-06, "loss": 0.4647, "step": 7748 }, { "epoch": 3.0278834280069153, "grad_norm": 0.4898314891709876, "learning_rate": 4.744368883570152e-06, "loss": 0.4581, "step": 7749 }, { "epoch": 3.0282785873055076, "grad_norm": 0.5003360236778067, "learning_rate": 4.7442998634067356e-06, "loss": 0.4249, "step": 7750 }, { "epoch": 3.0286737466041, "grad_norm": 0.5091015290593945, "learning_rate": 4.744230834429071e-06, "loss": 0.4614, "step": 7751 }, { "epoch": 3.029068905902692, "grad_norm": 0.49509617467245814, "learning_rate": 4.744161796637432e-06, "loss": 0.4538, "step": 7752 }, { "epoch": 3.0294640652012843, "grad_norm": 0.49300299029182093, "learning_rate": 4.7440927500320875e-06, "loss": 0.4303, "step": 7753 }, { "epoch": 3.0298592244998765, "grad_norm": 0.49820644043525975, "learning_rate": 4.74402369461331e-06, "loss": 0.4286, "step": 7754 }, { "epoch": 3.030254383798469, "grad_norm": 0.5079411624836448, "learning_rate": 4.743954630381369e-06, "loss": 0.4414, "step": 7755 }, { "epoch": 3.030649543097061, "grad_norm": 0.4842722586826298, "learning_rate": 4.743885557336537e-06, "loss": 0.4296, "step": 7756 }, { "epoch": 3.0310447023956533, "grad_norm": 0.48422624836220324, "learning_rate": 4.743816475479086e-06, "loss": 0.4456, "step": 7757 }, { "epoch": 3.0314398616942455, "grad_norm": 0.5143163370660467, "learning_rate": 4.743747384809286e-06, "loss": 0.4369, "step": 7758 }, { "epoch": 3.0318350209928377, "grad_norm": 0.48054033529354206, "learning_rate": 4.743678285327409e-06, "loss": 0.4414, "step": 7759 }, { "epoch": 3.03223018029143, "grad_norm": 0.47545118742637443, "learning_rate": 4.743609177033725e-06, "loss": 0.4391, "step": 7760 }, { "epoch": 3.0326253395900222, "grad_norm": 0.500071350352214, "learning_rate": 4.7435400599285075e-06, "loss": 0.4591, "step": 7761 }, { "epoch": 3.0330204988886145, "grad_norm": 0.49736275171993133, "learning_rate": 4.743470934012026e-06, "loss": 0.4451, "step": 7762 }, { "epoch": 3.0334156581872067, "grad_norm": 0.48902933971503654, "learning_rate": 4.7434017992845536e-06, "loss": 0.444, "step": 7763 }, { "epoch": 3.033810817485799, "grad_norm": 0.5017317318287491, "learning_rate": 4.743332655746362e-06, "loss": 0.4433, "step": 7764 }, { "epoch": 3.034205976784391, "grad_norm": 0.49852541132644085, "learning_rate": 4.743263503397721e-06, "loss": 0.443, "step": 7765 }, { "epoch": 3.0346011360829834, "grad_norm": 0.49514495883125453, "learning_rate": 4.743194342238904e-06, "loss": 0.4434, "step": 7766 }, { "epoch": 3.0349962953815757, "grad_norm": 0.5065607691043884, "learning_rate": 4.743125172270181e-06, "loss": 0.4557, "step": 7767 }, { "epoch": 3.035391454680168, "grad_norm": 0.500892295601084, "learning_rate": 4.743055993491824e-06, "loss": 0.438, "step": 7768 }, { "epoch": 3.03578661397876, "grad_norm": 0.4840852701954542, "learning_rate": 4.7429868059041065e-06, "loss": 0.4334, "step": 7769 }, { "epoch": 3.0361817732773524, "grad_norm": 0.5112220036213672, "learning_rate": 4.742917609507298e-06, "loss": 0.4411, "step": 7770 }, { "epoch": 3.0365769325759446, "grad_norm": 0.48860572596776, "learning_rate": 4.742848404301671e-06, "loss": 0.4502, "step": 7771 }, { "epoch": 3.036972091874537, "grad_norm": 0.4819193894547075, "learning_rate": 4.742779190287497e-06, "loss": 0.4429, "step": 7772 }, { "epoch": 3.037367251173129, "grad_norm": 0.4943547849084416, "learning_rate": 4.742709967465049e-06, "loss": 0.4421, "step": 7773 }, { "epoch": 3.0377624104717214, "grad_norm": 0.4917250752903298, "learning_rate": 4.742640735834599e-06, "loss": 0.4425, "step": 7774 }, { "epoch": 3.0381575697703136, "grad_norm": 0.46606336140539156, "learning_rate": 4.742571495396415e-06, "loss": 0.4378, "step": 7775 }, { "epoch": 3.038552729068906, "grad_norm": 0.47545852725279386, "learning_rate": 4.742502246150775e-06, "loss": 0.4333, "step": 7776 }, { "epoch": 3.038947888367498, "grad_norm": 0.48667928367880847, "learning_rate": 4.742432988097946e-06, "loss": 0.4436, "step": 7777 }, { "epoch": 3.0393430476660903, "grad_norm": 0.4984876343406036, "learning_rate": 4.742363721238203e-06, "loss": 0.4413, "step": 7778 }, { "epoch": 3.0397382069646826, "grad_norm": 0.4810472084691766, "learning_rate": 4.742294445571817e-06, "loss": 0.4297, "step": 7779 }, { "epoch": 3.040133366263275, "grad_norm": 0.49611193782024826, "learning_rate": 4.742225161099059e-06, "loss": 0.4536, "step": 7780 }, { "epoch": 3.040528525561867, "grad_norm": 0.5043827439085407, "learning_rate": 4.7421558678202025e-06, "loss": 0.4358, "step": 7781 }, { "epoch": 3.0409236848604593, "grad_norm": 0.48913922839386753, "learning_rate": 4.7420865657355195e-06, "loss": 0.4348, "step": 7782 }, { "epoch": 3.0413188441590515, "grad_norm": 0.5045463746963759, "learning_rate": 4.742017254845282e-06, "loss": 0.4381, "step": 7783 }, { "epoch": 3.0417140034576438, "grad_norm": 0.4955607123203069, "learning_rate": 4.741947935149762e-06, "loss": 0.439, "step": 7784 }, { "epoch": 3.042109162756236, "grad_norm": 0.49656116436176884, "learning_rate": 4.741878606649232e-06, "loss": 0.4373, "step": 7785 }, { "epoch": 3.0425043220548282, "grad_norm": 0.48383805086599924, "learning_rate": 4.741809269343964e-06, "loss": 0.4417, "step": 7786 }, { "epoch": 3.0428994813534205, "grad_norm": 0.48644485198914195, "learning_rate": 4.741739923234231e-06, "loss": 0.4348, "step": 7787 }, { "epoch": 3.0432946406520127, "grad_norm": 0.5051982041050768, "learning_rate": 4.741670568320304e-06, "loss": 0.4462, "step": 7788 }, { "epoch": 3.043689799950605, "grad_norm": 0.5048908640256319, "learning_rate": 4.741601204602457e-06, "loss": 0.4518, "step": 7789 }, { "epoch": 3.044084959249197, "grad_norm": 0.4944058502455239, "learning_rate": 4.741531832080961e-06, "loss": 0.4481, "step": 7790 }, { "epoch": 3.0444801185477894, "grad_norm": 0.5375482439420921, "learning_rate": 4.741462450756089e-06, "loss": 0.4556, "step": 7791 }, { "epoch": 3.0448752778463817, "grad_norm": 0.483762343858191, "learning_rate": 4.741393060628115e-06, "loss": 0.4258, "step": 7792 }, { "epoch": 3.045270437144974, "grad_norm": 0.47995611425569656, "learning_rate": 4.741323661697308e-06, "loss": 0.4364, "step": 7793 }, { "epoch": 3.045665596443566, "grad_norm": 0.47885796380315865, "learning_rate": 4.741254253963944e-06, "loss": 0.4335, "step": 7794 }, { "epoch": 3.0460607557421584, "grad_norm": 0.5094045890304211, "learning_rate": 4.741184837428294e-06, "loss": 0.4501, "step": 7795 }, { "epoch": 3.0464559150407506, "grad_norm": 0.4896089467966352, "learning_rate": 4.741115412090631e-06, "loss": 0.448, "step": 7796 }, { "epoch": 3.046851074339343, "grad_norm": 0.4870799703211307, "learning_rate": 4.7410459779512276e-06, "loss": 0.4455, "step": 7797 }, { "epoch": 3.047246233637935, "grad_norm": 0.5102806429622584, "learning_rate": 4.740976535010355e-06, "loss": 0.431, "step": 7798 }, { "epoch": 3.0476413929365274, "grad_norm": 0.4899387354627377, "learning_rate": 4.740907083268289e-06, "loss": 0.4552, "step": 7799 }, { "epoch": 3.0480365522351196, "grad_norm": 0.4979089977106902, "learning_rate": 4.740837622725301e-06, "loss": 0.4383, "step": 7800 }, { "epoch": 3.048431711533712, "grad_norm": 0.49833292526111833, "learning_rate": 4.7407681533816624e-06, "loss": 0.4373, "step": 7801 }, { "epoch": 3.048826870832304, "grad_norm": 0.49821399658843996, "learning_rate": 4.7406986752376475e-06, "loss": 0.4479, "step": 7802 }, { "epoch": 3.0492220301308963, "grad_norm": 0.4744560438026494, "learning_rate": 4.740629188293529e-06, "loss": 0.4276, "step": 7803 }, { "epoch": 3.0496171894294886, "grad_norm": 0.47806331822046966, "learning_rate": 4.740559692549579e-06, "loss": 0.4253, "step": 7804 }, { "epoch": 3.050012348728081, "grad_norm": 0.48603559179075806, "learning_rate": 4.7404901880060725e-06, "loss": 0.4541, "step": 7805 }, { "epoch": 3.050407508026673, "grad_norm": 0.4910984242297495, "learning_rate": 4.74042067466328e-06, "loss": 0.4299, "step": 7806 }, { "epoch": 3.0508026673252653, "grad_norm": 0.4851618865150965, "learning_rate": 4.740351152521475e-06, "loss": 0.4374, "step": 7807 }, { "epoch": 3.0511978266238575, "grad_norm": 0.4889763206502265, "learning_rate": 4.740281621580932e-06, "loss": 0.436, "step": 7808 }, { "epoch": 3.0515929859224498, "grad_norm": 0.5012899815645088, "learning_rate": 4.740212081841924e-06, "loss": 0.4457, "step": 7809 }, { "epoch": 3.0519881452210424, "grad_norm": 0.5082796466392632, "learning_rate": 4.7401425333047215e-06, "loss": 0.4409, "step": 7810 }, { "epoch": 3.0523833045196347, "grad_norm": 0.4982145445419337, "learning_rate": 4.7400729759696e-06, "loss": 0.4436, "step": 7811 }, { "epoch": 3.052778463818227, "grad_norm": 0.4956899805603572, "learning_rate": 4.7400034098368325e-06, "loss": 0.4269, "step": 7812 }, { "epoch": 3.053173623116819, "grad_norm": 0.48480193413501993, "learning_rate": 4.739933834906692e-06, "loss": 0.4479, "step": 7813 }, { "epoch": 3.0535687824154114, "grad_norm": 0.4806050679449309, "learning_rate": 4.73986425117945e-06, "loss": 0.4367, "step": 7814 }, { "epoch": 3.0539639417140036, "grad_norm": 0.4855626404004344, "learning_rate": 4.739794658655383e-06, "loss": 0.4482, "step": 7815 }, { "epoch": 3.054359101012596, "grad_norm": 0.5103646887287417, "learning_rate": 4.739725057334762e-06, "loss": 0.4485, "step": 7816 }, { "epoch": 3.054754260311188, "grad_norm": 0.48745678622976013, "learning_rate": 4.7396554472178615e-06, "loss": 0.4548, "step": 7817 }, { "epoch": 3.0551494196097804, "grad_norm": 0.512946784611527, "learning_rate": 4.739585828304953e-06, "loss": 0.4438, "step": 7818 }, { "epoch": 3.0555445789083726, "grad_norm": 0.49787233339609754, "learning_rate": 4.739516200596313e-06, "loss": 0.4412, "step": 7819 }, { "epoch": 3.055939738206965, "grad_norm": 0.4923640029971268, "learning_rate": 4.739446564092213e-06, "loss": 0.4266, "step": 7820 }, { "epoch": 3.056334897505557, "grad_norm": 0.4922770070573321, "learning_rate": 4.739376918792926e-06, "loss": 0.4234, "step": 7821 }, { "epoch": 3.0567300568041493, "grad_norm": 0.48256515830579066, "learning_rate": 4.7393072646987266e-06, "loss": 0.4283, "step": 7822 }, { "epoch": 3.0571252161027416, "grad_norm": 0.4871301212135088, "learning_rate": 4.739237601809889e-06, "loss": 0.4347, "step": 7823 }, { "epoch": 3.057520375401334, "grad_norm": 0.48509606892345153, "learning_rate": 4.739167930126684e-06, "loss": 0.4345, "step": 7824 }, { "epoch": 3.057915534699926, "grad_norm": 0.6165399120475508, "learning_rate": 4.739098249649388e-06, "loss": 0.4507, "step": 7825 }, { "epoch": 3.0583106939985183, "grad_norm": 0.4921339621782017, "learning_rate": 4.739028560378274e-06, "loss": 0.4406, "step": 7826 }, { "epoch": 3.0587058532971105, "grad_norm": 0.5054560093119399, "learning_rate": 4.738958862313615e-06, "loss": 0.4542, "step": 7827 }, { "epoch": 3.0591010125957028, "grad_norm": 0.4886778814315178, "learning_rate": 4.7388891554556845e-06, "loss": 0.4393, "step": 7828 }, { "epoch": 3.059496171894295, "grad_norm": 0.4860239144323546, "learning_rate": 4.7388194398047585e-06, "loss": 0.4282, "step": 7829 }, { "epoch": 3.0598913311928873, "grad_norm": 0.4924325835486165, "learning_rate": 4.738749715361108e-06, "loss": 0.4533, "step": 7830 }, { "epoch": 3.0602864904914795, "grad_norm": 0.48932648303037984, "learning_rate": 4.738679982125008e-06, "loss": 0.4453, "step": 7831 }, { "epoch": 3.0606816497900717, "grad_norm": 0.48237789375682943, "learning_rate": 4.738610240096733e-06, "loss": 0.4447, "step": 7832 }, { "epoch": 3.061076809088664, "grad_norm": 0.506439179071494, "learning_rate": 4.7385404892765565e-06, "loss": 0.4499, "step": 7833 }, { "epoch": 3.061471968387256, "grad_norm": 0.4977813662126531, "learning_rate": 4.738470729664753e-06, "loss": 0.4369, "step": 7834 }, { "epoch": 3.0618671276858485, "grad_norm": 0.5122710738027177, "learning_rate": 4.738400961261594e-06, "loss": 0.4395, "step": 7835 }, { "epoch": 3.0622622869844407, "grad_norm": 0.4846403050899975, "learning_rate": 4.7383311840673565e-06, "loss": 0.4371, "step": 7836 }, { "epoch": 3.062657446283033, "grad_norm": 0.5032833387552864, "learning_rate": 4.738261398082313e-06, "loss": 0.4389, "step": 7837 }, { "epoch": 3.063052605581625, "grad_norm": 0.4923216050566678, "learning_rate": 4.738191603306738e-06, "loss": 0.4339, "step": 7838 }, { "epoch": 3.0634477648802174, "grad_norm": 0.4909668236344491, "learning_rate": 4.738121799740904e-06, "loss": 0.4379, "step": 7839 }, { "epoch": 3.0638429241788097, "grad_norm": 0.5040118071240738, "learning_rate": 4.738051987385088e-06, "loss": 0.44, "step": 7840 }, { "epoch": 3.064238083477402, "grad_norm": 0.5125484179414564, "learning_rate": 4.737982166239563e-06, "loss": 0.4341, "step": 7841 }, { "epoch": 3.064633242775994, "grad_norm": 0.5251112266930515, "learning_rate": 4.737912336304602e-06, "loss": 0.4307, "step": 7842 }, { "epoch": 3.0650284020745864, "grad_norm": 0.481537716871614, "learning_rate": 4.737842497580482e-06, "loss": 0.4342, "step": 7843 }, { "epoch": 3.0654235613731786, "grad_norm": 0.48948821736693127, "learning_rate": 4.737772650067474e-06, "loss": 0.439, "step": 7844 }, { "epoch": 3.065818720671771, "grad_norm": 0.5023038703887029, "learning_rate": 4.737702793765855e-06, "loss": 0.4525, "step": 7845 }, { "epoch": 3.066213879970363, "grad_norm": 0.494523096453894, "learning_rate": 4.737632928675897e-06, "loss": 0.4439, "step": 7846 }, { "epoch": 3.0666090392689553, "grad_norm": 0.48065114804411224, "learning_rate": 4.7375630547978764e-06, "loss": 0.4238, "step": 7847 }, { "epoch": 3.0670041985675476, "grad_norm": 0.49790523236786666, "learning_rate": 4.737493172132067e-06, "loss": 0.4309, "step": 7848 }, { "epoch": 3.06739935786614, "grad_norm": 0.4989996884805779, "learning_rate": 4.737423280678742e-06, "loss": 0.4512, "step": 7849 }, { "epoch": 3.067794517164732, "grad_norm": 0.5016208814260822, "learning_rate": 4.737353380438178e-06, "loss": 0.4539, "step": 7850 }, { "epoch": 3.0681896764633243, "grad_norm": 0.5134079709987914, "learning_rate": 4.737283471410649e-06, "loss": 0.457, "step": 7851 }, { "epoch": 3.0685848357619165, "grad_norm": 0.5067457933406915, "learning_rate": 4.737213553596428e-06, "loss": 0.4481, "step": 7852 }, { "epoch": 3.068979995060509, "grad_norm": 0.4776716218044217, "learning_rate": 4.73714362699579e-06, "loss": 0.4383, "step": 7853 }, { "epoch": 3.069375154359101, "grad_norm": 0.48957989485980835, "learning_rate": 4.737073691609012e-06, "loss": 0.4357, "step": 7854 }, { "epoch": 3.0697703136576933, "grad_norm": 0.47676778659524477, "learning_rate": 4.737003747436366e-06, "loss": 0.4386, "step": 7855 }, { "epoch": 3.0701654729562855, "grad_norm": 0.6191504502491905, "learning_rate": 4.736933794478128e-06, "loss": 0.4543, "step": 7856 }, { "epoch": 3.0705606322548777, "grad_norm": 0.4942925858350992, "learning_rate": 4.736863832734573e-06, "loss": 0.4353, "step": 7857 }, { "epoch": 3.07095579155347, "grad_norm": 0.5216216205160417, "learning_rate": 4.736793862205974e-06, "loss": 0.4269, "step": 7858 }, { "epoch": 3.0713509508520622, "grad_norm": 0.4838627707703042, "learning_rate": 4.7367238828926075e-06, "loss": 0.4415, "step": 7859 }, { "epoch": 3.0717461101506545, "grad_norm": 0.48668009920048644, "learning_rate": 4.736653894794748e-06, "loss": 0.4303, "step": 7860 }, { "epoch": 3.0721412694492467, "grad_norm": 0.5167979871026906, "learning_rate": 4.7365838979126696e-06, "loss": 0.4665, "step": 7861 }, { "epoch": 3.072536428747839, "grad_norm": 0.5067480201831851, "learning_rate": 4.736513892246648e-06, "loss": 0.4592, "step": 7862 }, { "epoch": 3.072931588046431, "grad_norm": 0.503091144854992, "learning_rate": 4.736443877796959e-06, "loss": 0.4509, "step": 7863 }, { "epoch": 3.0733267473450234, "grad_norm": 0.4966903825101766, "learning_rate": 4.736373854563875e-06, "loss": 0.4393, "step": 7864 }, { "epoch": 3.0737219066436157, "grad_norm": 0.5053933985734216, "learning_rate": 4.736303822547673e-06, "loss": 0.4505, "step": 7865 }, { "epoch": 3.074117065942208, "grad_norm": 0.47840381289455225, "learning_rate": 4.736233781748627e-06, "loss": 0.4346, "step": 7866 }, { "epoch": 3.0745122252408, "grad_norm": 0.5006357088048857, "learning_rate": 4.736163732167014e-06, "loss": 0.4538, "step": 7867 }, { "epoch": 3.0749073845393924, "grad_norm": 0.47835652192235645, "learning_rate": 4.736093673803106e-06, "loss": 0.4365, "step": 7868 }, { "epoch": 3.0753025438379846, "grad_norm": 0.5171050130456775, "learning_rate": 4.736023606657181e-06, "loss": 0.4422, "step": 7869 }, { "epoch": 3.075697703136577, "grad_norm": 0.4937403544438971, "learning_rate": 4.735953530729514e-06, "loss": 0.4316, "step": 7870 }, { "epoch": 3.076092862435169, "grad_norm": 0.4959244380471904, "learning_rate": 4.735883446020377e-06, "loss": 0.4553, "step": 7871 }, { "epoch": 3.0764880217337613, "grad_norm": 0.5016759159070576, "learning_rate": 4.7358133525300484e-06, "loss": 0.4445, "step": 7872 }, { "epoch": 3.0768831810323536, "grad_norm": 0.48705287229754113, "learning_rate": 4.735743250258803e-06, "loss": 0.4469, "step": 7873 }, { "epoch": 3.077278340330946, "grad_norm": 0.5092392088990833, "learning_rate": 4.735673139206915e-06, "loss": 0.4592, "step": 7874 }, { "epoch": 3.077673499629538, "grad_norm": 0.47669040144749475, "learning_rate": 4.735603019374661e-06, "loss": 0.4275, "step": 7875 }, { "epoch": 3.0780686589281303, "grad_norm": 0.4924439478857504, "learning_rate": 4.735532890762316e-06, "loss": 0.4395, "step": 7876 }, { "epoch": 3.0784638182267225, "grad_norm": 0.5123885580239934, "learning_rate": 4.735462753370156e-06, "loss": 0.4501, "step": 7877 }, { "epoch": 3.078858977525315, "grad_norm": 0.4823099934221924, "learning_rate": 4.735392607198455e-06, "loss": 0.4275, "step": 7878 }, { "epoch": 3.079254136823907, "grad_norm": 0.4857701642431153, "learning_rate": 4.735322452247489e-06, "loss": 0.4373, "step": 7879 }, { "epoch": 3.0796492961224993, "grad_norm": 0.49163839390605096, "learning_rate": 4.7352522885175345e-06, "loss": 0.4455, "step": 7880 }, { "epoch": 3.0800444554210915, "grad_norm": 0.4884031270257798, "learning_rate": 4.735182116008866e-06, "loss": 0.4582, "step": 7881 }, { "epoch": 3.0804396147196837, "grad_norm": 0.4899553724792646, "learning_rate": 4.7351119347217585e-06, "loss": 0.4355, "step": 7882 }, { "epoch": 3.080834774018276, "grad_norm": 0.4949749498592674, "learning_rate": 4.73504174465649e-06, "loss": 0.4525, "step": 7883 }, { "epoch": 3.0812299333168682, "grad_norm": 0.4988182734709485, "learning_rate": 4.734971545813334e-06, "loss": 0.4498, "step": 7884 }, { "epoch": 3.0816250926154605, "grad_norm": 0.49792382572578914, "learning_rate": 4.734901338192567e-06, "loss": 0.4562, "step": 7885 }, { "epoch": 3.0820202519140527, "grad_norm": 0.4915536561332589, "learning_rate": 4.734831121794464e-06, "loss": 0.4463, "step": 7886 }, { "epoch": 3.082415411212645, "grad_norm": 0.4970561396674575, "learning_rate": 4.734760896619302e-06, "loss": 0.4476, "step": 7887 }, { "epoch": 3.082810570511237, "grad_norm": 0.4997419639637845, "learning_rate": 4.734690662667356e-06, "loss": 0.4337, "step": 7888 }, { "epoch": 3.0832057298098294, "grad_norm": 0.48823979322917044, "learning_rate": 4.734620419938902e-06, "loss": 0.4684, "step": 7889 }, { "epoch": 3.0836008891084217, "grad_norm": 0.5036100358314696, "learning_rate": 4.734550168434216e-06, "loss": 0.4457, "step": 7890 }, { "epoch": 3.083996048407014, "grad_norm": 0.4941812609213103, "learning_rate": 4.734479908153574e-06, "loss": 0.4448, "step": 7891 }, { "epoch": 3.084391207705606, "grad_norm": 0.5061906057278033, "learning_rate": 4.734409639097253e-06, "loss": 0.4486, "step": 7892 }, { "epoch": 3.0847863670041984, "grad_norm": 0.5100975450233716, "learning_rate": 4.734339361265526e-06, "loss": 0.4425, "step": 7893 }, { "epoch": 3.0851815263027906, "grad_norm": 0.49861233440246766, "learning_rate": 4.7342690746586714e-06, "loss": 0.4356, "step": 7894 }, { "epoch": 3.085576685601383, "grad_norm": 0.574904471137688, "learning_rate": 4.734198779276964e-06, "loss": 0.4409, "step": 7895 }, { "epoch": 3.085971844899975, "grad_norm": 0.4873731771876203, "learning_rate": 4.734128475120681e-06, "loss": 0.4405, "step": 7896 }, { "epoch": 3.0863670041985674, "grad_norm": 0.4951592433754978, "learning_rate": 4.7340581621900985e-06, "loss": 0.4501, "step": 7897 }, { "epoch": 3.0867621634971596, "grad_norm": 0.4766631688452397, "learning_rate": 4.733987840485491e-06, "loss": 0.4333, "step": 7898 }, { "epoch": 3.087157322795752, "grad_norm": 0.4923906837500623, "learning_rate": 4.733917510007137e-06, "loss": 0.4383, "step": 7899 }, { "epoch": 3.087552482094344, "grad_norm": 0.48241702014723586, "learning_rate": 4.73384717075531e-06, "loss": 0.4313, "step": 7900 }, { "epoch": 3.0879476413929363, "grad_norm": 0.48868586859443763, "learning_rate": 4.733776822730289e-06, "loss": 0.4409, "step": 7901 }, { "epoch": 3.088342800691529, "grad_norm": 0.5123299558820024, "learning_rate": 4.733706465932349e-06, "loss": 0.4523, "step": 7902 }, { "epoch": 3.0887379599901212, "grad_norm": 0.4933673322520625, "learning_rate": 4.733636100361766e-06, "loss": 0.4312, "step": 7903 }, { "epoch": 3.0891331192887135, "grad_norm": 0.5081997308648453, "learning_rate": 4.733565726018817e-06, "loss": 0.4357, "step": 7904 }, { "epoch": 3.0895282785873057, "grad_norm": 0.4975799368385473, "learning_rate": 4.733495342903778e-06, "loss": 0.4392, "step": 7905 }, { "epoch": 3.089923437885898, "grad_norm": 0.5025137130985103, "learning_rate": 4.733424951016925e-06, "loss": 0.4734, "step": 7906 }, { "epoch": 3.09031859718449, "grad_norm": 0.4977488175268691, "learning_rate": 4.733354550358536e-06, "loss": 0.4412, "step": 7907 }, { "epoch": 3.0907137564830824, "grad_norm": 0.48503718006898866, "learning_rate": 4.733284140928886e-06, "loss": 0.4577, "step": 7908 }, { "epoch": 3.0911089157816747, "grad_norm": 0.49140511027996614, "learning_rate": 4.733213722728251e-06, "loss": 0.4249, "step": 7909 }, { "epoch": 3.091504075080267, "grad_norm": 0.4854895181626003, "learning_rate": 4.73314329575691e-06, "loss": 0.4417, "step": 7910 }, { "epoch": 3.091899234378859, "grad_norm": 0.49472508088997874, "learning_rate": 4.733072860015138e-06, "loss": 0.4447, "step": 7911 }, { "epoch": 3.0922943936774514, "grad_norm": 0.5209744346067812, "learning_rate": 4.7330024155032115e-06, "loss": 0.4517, "step": 7912 }, { "epoch": 3.0926895529760436, "grad_norm": 0.5376195020380707, "learning_rate": 4.732931962221407e-06, "loss": 0.4473, "step": 7913 }, { "epoch": 3.093084712274636, "grad_norm": 0.48849504260879484, "learning_rate": 4.732861500170003e-06, "loss": 0.4362, "step": 7914 }, { "epoch": 3.093479871573228, "grad_norm": 0.5121173737122551, "learning_rate": 4.732791029349274e-06, "loss": 0.4364, "step": 7915 }, { "epoch": 3.0938750308718204, "grad_norm": 0.5013991610345373, "learning_rate": 4.7327205497594975e-06, "loss": 0.4413, "step": 7916 }, { "epoch": 3.0942701901704126, "grad_norm": 0.5132348413742815, "learning_rate": 4.732650061400951e-06, "loss": 0.4609, "step": 7917 }, { "epoch": 3.094665349469005, "grad_norm": 0.5140445000952766, "learning_rate": 4.73257956427391e-06, "loss": 0.4384, "step": 7918 }, { "epoch": 3.095060508767597, "grad_norm": 0.4805026819173038, "learning_rate": 4.732509058378653e-06, "loss": 0.4353, "step": 7919 }, { "epoch": 3.0954556680661893, "grad_norm": 0.4998603496611781, "learning_rate": 4.732438543715456e-06, "loss": 0.4513, "step": 7920 }, { "epoch": 3.0958508273647816, "grad_norm": 0.5345924785899526, "learning_rate": 4.732368020284596e-06, "loss": 0.4452, "step": 7921 }, { "epoch": 3.096245986663374, "grad_norm": 0.5074168698408299, "learning_rate": 4.732297488086349e-06, "loss": 0.4556, "step": 7922 }, { "epoch": 3.096641145961966, "grad_norm": 0.4959304495710035, "learning_rate": 4.732226947120995e-06, "loss": 0.4633, "step": 7923 }, { "epoch": 3.0970363052605583, "grad_norm": 0.5729612706103528, "learning_rate": 4.732156397388807e-06, "loss": 0.4323, "step": 7924 }, { "epoch": 3.0974314645591505, "grad_norm": 0.48429647430521955, "learning_rate": 4.732085838890064e-06, "loss": 0.4401, "step": 7925 }, { "epoch": 3.0978266238577428, "grad_norm": 0.49722917342119355, "learning_rate": 4.732015271625045e-06, "loss": 0.4468, "step": 7926 }, { "epoch": 3.098221783156335, "grad_norm": 0.5045870249115458, "learning_rate": 4.731944695594024e-06, "loss": 0.4511, "step": 7927 }, { "epoch": 3.0986169424549272, "grad_norm": 0.5022592905161292, "learning_rate": 4.731874110797281e-06, "loss": 0.4506, "step": 7928 }, { "epoch": 3.0990121017535195, "grad_norm": 0.5111603683659854, "learning_rate": 4.73180351723509e-06, "loss": 0.4524, "step": 7929 }, { "epoch": 3.0994072610521117, "grad_norm": 0.5267537367598453, "learning_rate": 4.731732914907731e-06, "loss": 0.4576, "step": 7930 }, { "epoch": 3.099802420350704, "grad_norm": 0.5723867018529993, "learning_rate": 4.731662303815479e-06, "loss": 0.4464, "step": 7931 }, { "epoch": 3.100197579649296, "grad_norm": 0.5002231014728444, "learning_rate": 4.7315916839586144e-06, "loss": 0.4518, "step": 7932 }, { "epoch": 3.1005927389478884, "grad_norm": 0.49456541023282674, "learning_rate": 4.731521055337412e-06, "loss": 0.4395, "step": 7933 }, { "epoch": 3.1009878982464807, "grad_norm": 0.48028211739122306, "learning_rate": 4.73145041795215e-06, "loss": 0.4491, "step": 7934 }, { "epoch": 3.101383057545073, "grad_norm": 0.49265412815653176, "learning_rate": 4.731379771803106e-06, "loss": 0.4479, "step": 7935 }, { "epoch": 3.101778216843665, "grad_norm": 0.488033887759252, "learning_rate": 4.731309116890556e-06, "loss": 0.433, "step": 7936 }, { "epoch": 3.1021733761422574, "grad_norm": 0.4979653413020749, "learning_rate": 4.731238453214781e-06, "loss": 0.4585, "step": 7937 }, { "epoch": 3.1025685354408497, "grad_norm": 0.5024424180826177, "learning_rate": 4.731167780776055e-06, "loss": 0.4695, "step": 7938 }, { "epoch": 3.102963694739442, "grad_norm": 0.49050531548527476, "learning_rate": 4.731097099574656e-06, "loss": 0.4434, "step": 7939 }, { "epoch": 3.103358854038034, "grad_norm": 0.4993603539723156, "learning_rate": 4.731026409610863e-06, "loss": 0.4562, "step": 7940 }, { "epoch": 3.1037540133366264, "grad_norm": 0.4852146754915784, "learning_rate": 4.7309557108849535e-06, "loss": 0.4528, "step": 7941 }, { "epoch": 3.1041491726352186, "grad_norm": 0.47989325470415545, "learning_rate": 4.730885003397204e-06, "loss": 0.4435, "step": 7942 }, { "epoch": 3.104544331933811, "grad_norm": 0.5012231199309242, "learning_rate": 4.730814287147893e-06, "loss": 0.4349, "step": 7943 }, { "epoch": 3.104939491232403, "grad_norm": 0.5077310439859108, "learning_rate": 4.730743562137299e-06, "loss": 0.4444, "step": 7944 }, { "epoch": 3.1053346505309953, "grad_norm": 0.5353226020763748, "learning_rate": 4.7306728283656976e-06, "loss": 0.4208, "step": 7945 }, { "epoch": 3.1057298098295876, "grad_norm": 0.4789749092687642, "learning_rate": 4.7306020858333685e-06, "loss": 0.4351, "step": 7946 }, { "epoch": 3.10612496912818, "grad_norm": 0.49123192197023, "learning_rate": 4.730531334540589e-06, "loss": 0.4511, "step": 7947 }, { "epoch": 3.106520128426772, "grad_norm": 0.503885581742825, "learning_rate": 4.730460574487636e-06, "loss": 0.4593, "step": 7948 }, { "epoch": 3.1069152877253643, "grad_norm": 0.4910430506949811, "learning_rate": 4.7303898056747895e-06, "loss": 0.4606, "step": 7949 }, { "epoch": 3.1073104470239565, "grad_norm": 0.4997712623504369, "learning_rate": 4.730319028102326e-06, "loss": 0.4524, "step": 7950 }, { "epoch": 3.1077056063225488, "grad_norm": 0.4998843657763657, "learning_rate": 4.730248241770523e-06, "loss": 0.4575, "step": 7951 }, { "epoch": 3.108100765621141, "grad_norm": 0.5030067437860954, "learning_rate": 4.730177446679659e-06, "loss": 0.4403, "step": 7952 }, { "epoch": 3.1084959249197333, "grad_norm": 0.5010628496154773, "learning_rate": 4.730106642830013e-06, "loss": 0.4491, "step": 7953 }, { "epoch": 3.1088910842183255, "grad_norm": 0.49750569778277126, "learning_rate": 4.730035830221862e-06, "loss": 0.4488, "step": 7954 }, { "epoch": 3.1092862435169177, "grad_norm": 0.5133141372969336, "learning_rate": 4.729965008855485e-06, "loss": 0.4502, "step": 7955 }, { "epoch": 3.10968140281551, "grad_norm": 0.47517558280691385, "learning_rate": 4.729894178731159e-06, "loss": 0.4254, "step": 7956 }, { "epoch": 3.110076562114102, "grad_norm": 0.5159606553817537, "learning_rate": 4.7298233398491625e-06, "loss": 0.4435, "step": 7957 }, { "epoch": 3.1104717214126945, "grad_norm": 0.4979703809694994, "learning_rate": 4.729752492209774e-06, "loss": 0.459, "step": 7958 }, { "epoch": 3.1108668807112867, "grad_norm": 0.49169143247666647, "learning_rate": 4.729681635813272e-06, "loss": 0.454, "step": 7959 }, { "epoch": 3.111262040009879, "grad_norm": 0.5197648875864174, "learning_rate": 4.729610770659934e-06, "loss": 0.4627, "step": 7960 }, { "epoch": 3.111657199308471, "grad_norm": 0.5146456770573503, "learning_rate": 4.729539896750039e-06, "loss": 0.4386, "step": 7961 }, { "epoch": 3.1120523586070634, "grad_norm": 0.5152953570832932, "learning_rate": 4.729469014083865e-06, "loss": 0.4338, "step": 7962 }, { "epoch": 3.1124475179056557, "grad_norm": 0.5029822022584626, "learning_rate": 4.729398122661692e-06, "loss": 0.4484, "step": 7963 }, { "epoch": 3.112842677204248, "grad_norm": 0.48762300399924735, "learning_rate": 4.729327222483795e-06, "loss": 0.4572, "step": 7964 }, { "epoch": 3.11323783650284, "grad_norm": 0.5078757083645274, "learning_rate": 4.7292563135504545e-06, "loss": 0.4435, "step": 7965 }, { "epoch": 3.1136329958014324, "grad_norm": 0.4895329614863369, "learning_rate": 4.72918539586195e-06, "loss": 0.4424, "step": 7966 }, { "epoch": 3.1140281551000246, "grad_norm": 0.48685840804033426, "learning_rate": 4.729114469418559e-06, "loss": 0.4529, "step": 7967 }, { "epoch": 3.114423314398617, "grad_norm": 0.5011426297857062, "learning_rate": 4.729043534220559e-06, "loss": 0.4412, "step": 7968 }, { "epoch": 3.114818473697209, "grad_norm": 0.5086734828450132, "learning_rate": 4.728972590268229e-06, "loss": 0.4444, "step": 7969 }, { "epoch": 3.1152136329958013, "grad_norm": 0.4872906939792074, "learning_rate": 4.728901637561849e-06, "loss": 0.4318, "step": 7970 }, { "epoch": 3.1156087922943936, "grad_norm": 0.4894151559799161, "learning_rate": 4.7288306761016976e-06, "loss": 0.4377, "step": 7971 }, { "epoch": 3.116003951592986, "grad_norm": 0.5055954518080974, "learning_rate": 4.7287597058880516e-06, "loss": 0.4496, "step": 7972 }, { "epoch": 3.116399110891578, "grad_norm": 0.5148624398253207, "learning_rate": 4.728688726921191e-06, "loss": 0.4341, "step": 7973 }, { "epoch": 3.1167942701901703, "grad_norm": 0.5155539975890019, "learning_rate": 4.728617739201396e-06, "loss": 0.4545, "step": 7974 }, { "epoch": 3.1171894294887625, "grad_norm": 0.4864073054526971, "learning_rate": 4.728546742728941e-06, "loss": 0.4356, "step": 7975 }, { "epoch": 3.117584588787355, "grad_norm": 0.48348184096928454, "learning_rate": 4.728475737504109e-06, "loss": 0.4346, "step": 7976 }, { "epoch": 3.117979748085947, "grad_norm": 0.4769632382044736, "learning_rate": 4.728404723527178e-06, "loss": 0.4325, "step": 7977 }, { "epoch": 3.1183749073845393, "grad_norm": 0.4977904405800571, "learning_rate": 4.728333700798427e-06, "loss": 0.4431, "step": 7978 }, { "epoch": 3.1187700666831315, "grad_norm": 0.4786988797880768, "learning_rate": 4.728262669318133e-06, "loss": 0.4377, "step": 7979 }, { "epoch": 3.1191652259817237, "grad_norm": 0.49376051451383535, "learning_rate": 4.728191629086576e-06, "loss": 0.4405, "step": 7980 }, { "epoch": 3.119560385280316, "grad_norm": 0.49204273770267093, "learning_rate": 4.728120580104036e-06, "loss": 0.4542, "step": 7981 }, { "epoch": 3.1199555445789082, "grad_norm": 0.47099976061807647, "learning_rate": 4.728049522370791e-06, "loss": 0.4385, "step": 7982 }, { "epoch": 3.1203507038775005, "grad_norm": 0.5025481626978274, "learning_rate": 4.727978455887121e-06, "loss": 0.462, "step": 7983 }, { "epoch": 3.1207458631760927, "grad_norm": 0.5017733662110474, "learning_rate": 4.727907380653305e-06, "loss": 0.4428, "step": 7984 }, { "epoch": 3.121141022474685, "grad_norm": 0.483833290731428, "learning_rate": 4.7278362966696204e-06, "loss": 0.431, "step": 7985 }, { "epoch": 3.121536181773277, "grad_norm": 0.5083971021184884, "learning_rate": 4.727765203936348e-06, "loss": 0.4337, "step": 7986 }, { "epoch": 3.1219313410718694, "grad_norm": 0.49782182938515346, "learning_rate": 4.727694102453767e-06, "loss": 0.4353, "step": 7987 }, { "epoch": 3.1223265003704617, "grad_norm": 0.5092377334599689, "learning_rate": 4.727622992222156e-06, "loss": 0.4453, "step": 7988 }, { "epoch": 3.122721659669054, "grad_norm": 0.4911234394591529, "learning_rate": 4.7275518732417945e-06, "loss": 0.4348, "step": 7989 }, { "epoch": 3.123116818967646, "grad_norm": 0.4745444703266985, "learning_rate": 4.727480745512962e-06, "loss": 0.4433, "step": 7990 }, { "epoch": 3.1235119782662384, "grad_norm": 0.4882371574102391, "learning_rate": 4.727409609035938e-06, "loss": 0.444, "step": 7991 }, { "epoch": 3.1239071375648306, "grad_norm": 0.49239501433303656, "learning_rate": 4.727338463811002e-06, "loss": 0.4373, "step": 7992 }, { "epoch": 3.124302296863423, "grad_norm": 0.48170385902308394, "learning_rate": 4.727267309838432e-06, "loss": 0.442, "step": 7993 }, { "epoch": 3.124697456162015, "grad_norm": 0.49440788201479674, "learning_rate": 4.727196147118509e-06, "loss": 0.4366, "step": 7994 }, { "epoch": 3.1250926154606073, "grad_norm": 0.48255932560969295, "learning_rate": 4.727124975651512e-06, "loss": 0.4361, "step": 7995 }, { "epoch": 3.1254877747591996, "grad_norm": 0.49476276790071294, "learning_rate": 4.727053795437721e-06, "loss": 0.4573, "step": 7996 }, { "epoch": 3.125882934057792, "grad_norm": 0.49896678550201234, "learning_rate": 4.726982606477414e-06, "loss": 0.4553, "step": 7997 }, { "epoch": 3.126278093356384, "grad_norm": 0.47886145773353866, "learning_rate": 4.7269114087708714e-06, "loss": 0.4255, "step": 7998 }, { "epoch": 3.1266732526549763, "grad_norm": 0.50038796905005, "learning_rate": 4.7268402023183736e-06, "loss": 0.444, "step": 7999 }, { "epoch": 3.1270684119535685, "grad_norm": 0.49176360838395583, "learning_rate": 4.7267689871201995e-06, "loss": 0.4473, "step": 8000 }, { "epoch": 3.1274635712521612, "grad_norm": 0.4995031801626146, "learning_rate": 4.72669776317663e-06, "loss": 0.4583, "step": 8001 }, { "epoch": 3.1278587305507535, "grad_norm": 0.48056503087598096, "learning_rate": 4.726626530487943e-06, "loss": 0.4377, "step": 8002 }, { "epoch": 3.1282538898493457, "grad_norm": 0.5014416609905591, "learning_rate": 4.726555289054419e-06, "loss": 0.4422, "step": 8003 }, { "epoch": 3.128649049147938, "grad_norm": 0.49552073766113186, "learning_rate": 4.726484038876338e-06, "loss": 0.4479, "step": 8004 }, { "epoch": 3.12904420844653, "grad_norm": 0.5238464019017981, "learning_rate": 4.726412779953979e-06, "loss": 0.4569, "step": 8005 }, { "epoch": 3.1294393677451224, "grad_norm": 0.5077356869403138, "learning_rate": 4.726341512287623e-06, "loss": 0.4641, "step": 8006 }, { "epoch": 3.1298345270437147, "grad_norm": 0.5100490606005, "learning_rate": 4.72627023587755e-06, "loss": 0.4478, "step": 8007 }, { "epoch": 3.130229686342307, "grad_norm": 0.48960448306534565, "learning_rate": 4.726198950724039e-06, "loss": 0.437, "step": 8008 }, { "epoch": 3.130624845640899, "grad_norm": 0.483619465818084, "learning_rate": 4.726127656827371e-06, "loss": 0.4476, "step": 8009 }, { "epoch": 3.1310200049394914, "grad_norm": 0.5085324589140995, "learning_rate": 4.726056354187825e-06, "loss": 0.4336, "step": 8010 }, { "epoch": 3.1314151642380836, "grad_norm": 0.5078606194501115, "learning_rate": 4.725985042805681e-06, "loss": 0.4465, "step": 8011 }, { "epoch": 3.131810323536676, "grad_norm": 0.49723065197368327, "learning_rate": 4.725913722681219e-06, "loss": 0.4538, "step": 8012 }, { "epoch": 3.132205482835268, "grad_norm": 0.5007007217832884, "learning_rate": 4.72584239381472e-06, "loss": 0.4738, "step": 8013 }, { "epoch": 3.1326006421338604, "grad_norm": 0.4898205436347479, "learning_rate": 4.725771056206464e-06, "loss": 0.4344, "step": 8014 }, { "epoch": 3.1329958014324526, "grad_norm": 0.5166831822498245, "learning_rate": 4.725699709856731e-06, "loss": 0.4412, "step": 8015 }, { "epoch": 3.133390960731045, "grad_norm": 0.48591617847536656, "learning_rate": 4.7256283547658e-06, "loss": 0.4383, "step": 8016 }, { "epoch": 3.133786120029637, "grad_norm": 0.49374057323016374, "learning_rate": 4.725556990933953e-06, "loss": 0.4509, "step": 8017 }, { "epoch": 3.1341812793282293, "grad_norm": 0.48914627951452694, "learning_rate": 4.72548561836147e-06, "loss": 0.432, "step": 8018 }, { "epoch": 3.1345764386268216, "grad_norm": 0.48980290819549344, "learning_rate": 4.72541423704863e-06, "loss": 0.4362, "step": 8019 }, { "epoch": 3.134971597925414, "grad_norm": 0.49924757675061804, "learning_rate": 4.7253428469957144e-06, "loss": 0.4405, "step": 8020 }, { "epoch": 3.135366757224006, "grad_norm": 0.5080074429751118, "learning_rate": 4.725271448203003e-06, "loss": 0.4477, "step": 8021 }, { "epoch": 3.1357619165225983, "grad_norm": 0.5140176692672513, "learning_rate": 4.7252000406707775e-06, "loss": 0.4527, "step": 8022 }, { "epoch": 3.1361570758211905, "grad_norm": 0.506032454034494, "learning_rate": 4.725128624399318e-06, "loss": 0.4488, "step": 8023 }, { "epoch": 3.1365522351197828, "grad_norm": 0.49308027132547305, "learning_rate": 4.725057199388903e-06, "loss": 0.4351, "step": 8024 }, { "epoch": 3.136947394418375, "grad_norm": 0.5104007123637837, "learning_rate": 4.724985765639815e-06, "loss": 0.4602, "step": 8025 }, { "epoch": 3.1373425537169672, "grad_norm": 0.48895688295994194, "learning_rate": 4.7249143231523345e-06, "loss": 0.4409, "step": 8026 }, { "epoch": 3.1377377130155595, "grad_norm": 0.5064246682765903, "learning_rate": 4.724842871926741e-06, "loss": 0.4379, "step": 8027 }, { "epoch": 3.1381328723141517, "grad_norm": 0.5069265300363764, "learning_rate": 4.724771411963316e-06, "loss": 0.4647, "step": 8028 }, { "epoch": 3.138528031612744, "grad_norm": 0.5030257723209636, "learning_rate": 4.72469994326234e-06, "loss": 0.4447, "step": 8029 }, { "epoch": 3.138923190911336, "grad_norm": 0.5051475543956928, "learning_rate": 4.724628465824093e-06, "loss": 0.4457, "step": 8030 }, { "epoch": 3.1393183502099284, "grad_norm": 0.47809073280195014, "learning_rate": 4.724556979648856e-06, "loss": 0.4483, "step": 8031 }, { "epoch": 3.1397135095085207, "grad_norm": 0.5238988459465759, "learning_rate": 4.724485484736911e-06, "loss": 0.4503, "step": 8032 }, { "epoch": 3.140108668807113, "grad_norm": 0.4964702713402975, "learning_rate": 4.724413981088537e-06, "loss": 0.4453, "step": 8033 }, { "epoch": 3.140503828105705, "grad_norm": 0.49721437049653233, "learning_rate": 4.724342468704016e-06, "loss": 0.4476, "step": 8034 }, { "epoch": 3.1408989874042974, "grad_norm": 0.47752057571805373, "learning_rate": 4.724270947583628e-06, "loss": 0.4351, "step": 8035 }, { "epoch": 3.1412941467028896, "grad_norm": 0.4937090612310495, "learning_rate": 4.724199417727654e-06, "loss": 0.4579, "step": 8036 }, { "epoch": 3.141689306001482, "grad_norm": 0.48947682743996235, "learning_rate": 4.724127879136377e-06, "loss": 0.4379, "step": 8037 }, { "epoch": 3.142084465300074, "grad_norm": 0.5001718495358742, "learning_rate": 4.7240563318100755e-06, "loss": 0.438, "step": 8038 }, { "epoch": 3.1424796245986664, "grad_norm": 0.5014707511961164, "learning_rate": 4.723984775749031e-06, "loss": 0.4457, "step": 8039 }, { "epoch": 3.1428747838972586, "grad_norm": 0.5028261244621974, "learning_rate": 4.7239132109535245e-06, "loss": 0.4604, "step": 8040 }, { "epoch": 3.143269943195851, "grad_norm": 0.4934107578955984, "learning_rate": 4.723841637423837e-06, "loss": 0.4377, "step": 8041 }, { "epoch": 3.143665102494443, "grad_norm": 0.48165968429853356, "learning_rate": 4.723770055160251e-06, "loss": 0.4519, "step": 8042 }, { "epoch": 3.1440602617930353, "grad_norm": 0.49320305911875595, "learning_rate": 4.723698464163046e-06, "loss": 0.4551, "step": 8043 }, { "epoch": 3.1444554210916276, "grad_norm": 0.5151414275313321, "learning_rate": 4.723626864432504e-06, "loss": 0.4411, "step": 8044 }, { "epoch": 3.14485058039022, "grad_norm": 0.48841016631789297, "learning_rate": 4.723555255968906e-06, "loss": 0.4375, "step": 8045 }, { "epoch": 3.145245739688812, "grad_norm": 0.49755737281596757, "learning_rate": 4.723483638772532e-06, "loss": 0.4511, "step": 8046 }, { "epoch": 3.1456408989874043, "grad_norm": 0.5133698387807278, "learning_rate": 4.723412012843666e-06, "loss": 0.4507, "step": 8047 }, { "epoch": 3.1460360582859965, "grad_norm": 0.5113782564797575, "learning_rate": 4.723340378182587e-06, "loss": 0.4471, "step": 8048 }, { "epoch": 3.1464312175845888, "grad_norm": 0.5025143915593622, "learning_rate": 4.7232687347895775e-06, "loss": 0.4506, "step": 8049 }, { "epoch": 3.146826376883181, "grad_norm": 0.5090332482162311, "learning_rate": 4.7231970826649185e-06, "loss": 0.4601, "step": 8050 }, { "epoch": 3.1472215361817732, "grad_norm": 0.5129767234895644, "learning_rate": 4.7231254218088906e-06, "loss": 0.4339, "step": 8051 }, { "epoch": 3.1476166954803655, "grad_norm": 0.4931726199572382, "learning_rate": 4.723053752221777e-06, "loss": 0.4628, "step": 8052 }, { "epoch": 3.1480118547789577, "grad_norm": 0.4761021361758315, "learning_rate": 4.722982073903857e-06, "loss": 0.4111, "step": 8053 }, { "epoch": 3.14840701407755, "grad_norm": 0.49121750684116305, "learning_rate": 4.722910386855414e-06, "loss": 0.4332, "step": 8054 }, { "epoch": 3.148802173376142, "grad_norm": 0.49386756521790587, "learning_rate": 4.722838691076729e-06, "loss": 0.432, "step": 8055 }, { "epoch": 3.1491973326747345, "grad_norm": 0.4962985239368982, "learning_rate": 4.722766986568083e-06, "loss": 0.4472, "step": 8056 }, { "epoch": 3.1495924919733267, "grad_norm": 0.49652014240893927, "learning_rate": 4.722695273329758e-06, "loss": 0.4585, "step": 8057 }, { "epoch": 3.149987651271919, "grad_norm": 0.4984442502510313, "learning_rate": 4.722623551362036e-06, "loss": 0.4285, "step": 8058 }, { "epoch": 3.150382810570511, "grad_norm": 0.4923794585945551, "learning_rate": 4.7225518206651975e-06, "loss": 0.451, "step": 8059 }, { "epoch": 3.1507779698691034, "grad_norm": 0.492056259052196, "learning_rate": 4.722480081239527e-06, "loss": 0.4423, "step": 8060 }, { "epoch": 3.1511731291676957, "grad_norm": 0.4936357538899125, "learning_rate": 4.7224083330853025e-06, "loss": 0.4449, "step": 8061 }, { "epoch": 3.151568288466288, "grad_norm": 0.4891054280607878, "learning_rate": 4.722336576202808e-06, "loss": 0.4607, "step": 8062 }, { "epoch": 3.15196344776488, "grad_norm": 0.5006228316341875, "learning_rate": 4.722264810592325e-06, "loss": 0.4578, "step": 8063 }, { "epoch": 3.1523586070634724, "grad_norm": 0.49583466507150603, "learning_rate": 4.722193036254135e-06, "loss": 0.4505, "step": 8064 }, { "epoch": 3.1527537663620646, "grad_norm": 0.5029343842600947, "learning_rate": 4.722121253188521e-06, "loss": 0.4449, "step": 8065 }, { "epoch": 3.153148925660657, "grad_norm": 0.5118215295573837, "learning_rate": 4.722049461395763e-06, "loss": 0.434, "step": 8066 }, { "epoch": 3.153544084959249, "grad_norm": 0.4861987832676822, "learning_rate": 4.721977660876144e-06, "loss": 0.4308, "step": 8067 }, { "epoch": 3.1539392442578413, "grad_norm": 0.5021670172513321, "learning_rate": 4.721905851629947e-06, "loss": 0.447, "step": 8068 }, { "epoch": 3.1543344035564336, "grad_norm": 0.4869993622406486, "learning_rate": 4.721834033657452e-06, "loss": 0.4596, "step": 8069 }, { "epoch": 3.154729562855026, "grad_norm": 0.515713215408121, "learning_rate": 4.721762206958943e-06, "loss": 0.4579, "step": 8070 }, { "epoch": 3.155124722153618, "grad_norm": 0.5056235944038723, "learning_rate": 4.7216903715347005e-06, "loss": 0.4692, "step": 8071 }, { "epoch": 3.1555198814522103, "grad_norm": 0.506279979442091, "learning_rate": 4.721618527385008e-06, "loss": 0.4411, "step": 8072 }, { "epoch": 3.1559150407508025, "grad_norm": 0.507576731306934, "learning_rate": 4.721546674510146e-06, "loss": 0.4476, "step": 8073 }, { "epoch": 3.1563102000493948, "grad_norm": 0.4959496994776162, "learning_rate": 4.721474812910398e-06, "loss": 0.4491, "step": 8074 }, { "epoch": 3.156705359347987, "grad_norm": 0.48307075497150315, "learning_rate": 4.721402942586046e-06, "loss": 0.4518, "step": 8075 }, { "epoch": 3.1571005186465793, "grad_norm": 0.5009750695914092, "learning_rate": 4.721331063537372e-06, "loss": 0.4293, "step": 8076 }, { "epoch": 3.1574956779451715, "grad_norm": 0.5279332021896078, "learning_rate": 4.721259175764659e-06, "loss": 0.4506, "step": 8077 }, { "epoch": 3.1578908372437637, "grad_norm": 0.5166606359510496, "learning_rate": 4.721187279268189e-06, "loss": 0.4785, "step": 8078 }, { "epoch": 3.158285996542356, "grad_norm": 0.4957762206244525, "learning_rate": 4.721115374048243e-06, "loss": 0.4429, "step": 8079 }, { "epoch": 3.158681155840948, "grad_norm": 0.5130296220814176, "learning_rate": 4.721043460105106e-06, "loss": 0.4582, "step": 8080 }, { "epoch": 3.1590763151395405, "grad_norm": 0.5084021122302967, "learning_rate": 4.720971537439058e-06, "loss": 0.4468, "step": 8081 }, { "epoch": 3.1594714744381327, "grad_norm": 0.5024239703569898, "learning_rate": 4.720899606050382e-06, "loss": 0.4391, "step": 8082 }, { "epoch": 3.159866633736725, "grad_norm": 0.49038806705570115, "learning_rate": 4.720827665939362e-06, "loss": 0.4273, "step": 8083 }, { "epoch": 3.160261793035317, "grad_norm": 0.5006231223006984, "learning_rate": 4.720755717106278e-06, "loss": 0.4549, "step": 8084 }, { "epoch": 3.1606569523339094, "grad_norm": 0.5146917436143081, "learning_rate": 4.7206837595514155e-06, "loss": 0.4446, "step": 8085 }, { "epoch": 3.1610521116325017, "grad_norm": 0.502995584917966, "learning_rate": 4.720611793275055e-06, "loss": 0.4453, "step": 8086 }, { "epoch": 3.161447270931094, "grad_norm": 0.49610937331042165, "learning_rate": 4.7205398182774806e-06, "loss": 0.4585, "step": 8087 }, { "epoch": 3.161842430229686, "grad_norm": 0.4843164971279493, "learning_rate": 4.720467834558973e-06, "loss": 0.4481, "step": 8088 }, { "epoch": 3.1622375895282784, "grad_norm": 0.5128044968276422, "learning_rate": 4.720395842119817e-06, "loss": 0.4687, "step": 8089 }, { "epoch": 3.162632748826871, "grad_norm": 0.48798274809495723, "learning_rate": 4.7203238409602936e-06, "loss": 0.4476, "step": 8090 }, { "epoch": 3.1630279081254633, "grad_norm": 0.4928931238418614, "learning_rate": 4.720251831080687e-06, "loss": 0.4345, "step": 8091 }, { "epoch": 3.1634230674240555, "grad_norm": 0.5669879530273516, "learning_rate": 4.720179812481279e-06, "loss": 0.4412, "step": 8092 }, { "epoch": 3.163818226722648, "grad_norm": 0.5067692284751482, "learning_rate": 4.720107785162353e-06, "loss": 0.4427, "step": 8093 }, { "epoch": 3.16421338602124, "grad_norm": 0.48670841000596843, "learning_rate": 4.7200357491241925e-06, "loss": 0.4391, "step": 8094 }, { "epoch": 3.1646085453198323, "grad_norm": 0.49696643464950435, "learning_rate": 4.71996370436708e-06, "loss": 0.4545, "step": 8095 }, { "epoch": 3.1650037046184245, "grad_norm": 0.5074530215661209, "learning_rate": 4.719891650891296e-06, "loss": 0.4583, "step": 8096 }, { "epoch": 3.1653988639170167, "grad_norm": 0.49354704231252805, "learning_rate": 4.719819588697127e-06, "loss": 0.4461, "step": 8097 }, { "epoch": 3.165794023215609, "grad_norm": 0.4963673335562395, "learning_rate": 4.719747517784854e-06, "loss": 0.4349, "step": 8098 }, { "epoch": 3.1661891825142012, "grad_norm": 0.4904154243050707, "learning_rate": 4.719675438154761e-06, "loss": 0.4493, "step": 8099 }, { "epoch": 3.1665843418127935, "grad_norm": 0.47953064206220075, "learning_rate": 4.719603349807132e-06, "loss": 0.4484, "step": 8100 }, { "epoch": 3.1669795011113857, "grad_norm": 0.47907940594638293, "learning_rate": 4.719531252742246e-06, "loss": 0.4405, "step": 8101 }, { "epoch": 3.167374660409978, "grad_norm": 0.4898293576798917, "learning_rate": 4.7194591469603915e-06, "loss": 0.4465, "step": 8102 }, { "epoch": 3.16776981970857, "grad_norm": 0.4894519791280581, "learning_rate": 4.7193870324618486e-06, "loss": 0.4411, "step": 8103 }, { "epoch": 3.1681649790071624, "grad_norm": 0.5194593246729222, "learning_rate": 4.7193149092469e-06, "loss": 0.4674, "step": 8104 }, { "epoch": 3.1685601383057547, "grad_norm": 0.4838702824968695, "learning_rate": 4.719242777315831e-06, "loss": 0.4505, "step": 8105 }, { "epoch": 3.168955297604347, "grad_norm": 0.506659585187492, "learning_rate": 4.719170636668924e-06, "loss": 0.4573, "step": 8106 }, { "epoch": 3.169350456902939, "grad_norm": 0.487503557571576, "learning_rate": 4.719098487306463e-06, "loss": 0.4403, "step": 8107 }, { "epoch": 3.1697456162015314, "grad_norm": 0.4876179319034556, "learning_rate": 4.7190263292287296e-06, "loss": 0.4379, "step": 8108 }, { "epoch": 3.1701407755001236, "grad_norm": 0.5106552416620477, "learning_rate": 4.718954162436008e-06, "loss": 0.4282, "step": 8109 }, { "epoch": 3.170535934798716, "grad_norm": 0.48302060495170895, "learning_rate": 4.718881986928583e-06, "loss": 0.4424, "step": 8110 }, { "epoch": 3.170931094097308, "grad_norm": 0.4958854915432569, "learning_rate": 4.718809802706736e-06, "loss": 0.4507, "step": 8111 }, { "epoch": 3.1713262533959004, "grad_norm": 0.4932554091257428, "learning_rate": 4.7187376097707515e-06, "loss": 0.4481, "step": 8112 }, { "epoch": 3.1717214126944926, "grad_norm": 0.48904571684029674, "learning_rate": 4.718665408120913e-06, "loss": 0.4415, "step": 8113 }, { "epoch": 3.172116571993085, "grad_norm": 0.5168961069991919, "learning_rate": 4.718593197757505e-06, "loss": 0.4599, "step": 8114 }, { "epoch": 3.172511731291677, "grad_norm": 0.5063108372391634, "learning_rate": 4.718520978680809e-06, "loss": 0.4517, "step": 8115 }, { "epoch": 3.1729068905902693, "grad_norm": 0.5007293842416772, "learning_rate": 4.71844875089111e-06, "loss": 0.4267, "step": 8116 }, { "epoch": 3.1733020498888616, "grad_norm": 0.48180519881194206, "learning_rate": 4.718376514388691e-06, "loss": 0.4348, "step": 8117 }, { "epoch": 3.173697209187454, "grad_norm": 0.5089904727964285, "learning_rate": 4.718304269173837e-06, "loss": 0.4541, "step": 8118 }, { "epoch": 3.174092368486046, "grad_norm": 0.48877774632457816, "learning_rate": 4.718232015246831e-06, "loss": 0.4376, "step": 8119 }, { "epoch": 3.1744875277846383, "grad_norm": 0.4891031842232021, "learning_rate": 4.718159752607955e-06, "loss": 0.4461, "step": 8120 }, { "epoch": 3.1748826870832305, "grad_norm": 0.5040292533478185, "learning_rate": 4.718087481257496e-06, "loss": 0.4475, "step": 8121 }, { "epoch": 3.1752778463818228, "grad_norm": 0.4981159124676147, "learning_rate": 4.718015201195736e-06, "loss": 0.45, "step": 8122 }, { "epoch": 3.175673005680415, "grad_norm": 0.489987198323066, "learning_rate": 4.717942912422958e-06, "loss": 0.4469, "step": 8123 }, { "epoch": 3.1760681649790072, "grad_norm": 0.5132570158383588, "learning_rate": 4.717870614939449e-06, "loss": 0.4671, "step": 8124 }, { "epoch": 3.1764633242775995, "grad_norm": 0.4838179306406595, "learning_rate": 4.717798308745489e-06, "loss": 0.4483, "step": 8125 }, { "epoch": 3.1768584835761917, "grad_norm": 0.49436482089246664, "learning_rate": 4.717725993841366e-06, "loss": 0.4505, "step": 8126 }, { "epoch": 3.177253642874784, "grad_norm": 0.4927294185258886, "learning_rate": 4.71765367022736e-06, "loss": 0.4607, "step": 8127 }, { "epoch": 3.177648802173376, "grad_norm": 0.5023211446663597, "learning_rate": 4.717581337903759e-06, "loss": 0.4412, "step": 8128 }, { "epoch": 3.1780439614719684, "grad_norm": 0.4962184544429156, "learning_rate": 4.717508996870843e-06, "loss": 0.4472, "step": 8129 }, { "epoch": 3.1784391207705607, "grad_norm": 0.5060665727054257, "learning_rate": 4.717436647128899e-06, "loss": 0.4483, "step": 8130 }, { "epoch": 3.178834280069153, "grad_norm": 0.4856256691010014, "learning_rate": 4.717364288678211e-06, "loss": 0.4531, "step": 8131 }, { "epoch": 3.179229439367745, "grad_norm": 0.5031699859568227, "learning_rate": 4.717291921519062e-06, "loss": 0.4601, "step": 8132 }, { "epoch": 3.1796245986663374, "grad_norm": 0.49801437420123507, "learning_rate": 4.717219545651736e-06, "loss": 0.4482, "step": 8133 }, { "epoch": 3.1800197579649296, "grad_norm": 0.5050121211282538, "learning_rate": 4.71714716107652e-06, "loss": 0.4642, "step": 8134 }, { "epoch": 3.180414917263522, "grad_norm": 0.5261624095797713, "learning_rate": 4.717074767793695e-06, "loss": 0.4363, "step": 8135 }, { "epoch": 3.180810076562114, "grad_norm": 0.5003002025261412, "learning_rate": 4.717002365803547e-06, "loss": 0.443, "step": 8136 }, { "epoch": 3.1812052358607064, "grad_norm": 0.5162727913171745, "learning_rate": 4.716929955106359e-06, "loss": 0.4556, "step": 8137 }, { "epoch": 3.1816003951592986, "grad_norm": 0.50383251369054, "learning_rate": 4.716857535702417e-06, "loss": 0.4476, "step": 8138 }, { "epoch": 3.181995554457891, "grad_norm": 0.4989731953152965, "learning_rate": 4.716785107592005e-06, "loss": 0.4505, "step": 8139 }, { "epoch": 3.182390713756483, "grad_norm": 0.4985898123702961, "learning_rate": 4.716712670775407e-06, "loss": 0.4513, "step": 8140 }, { "epoch": 3.1827858730550753, "grad_norm": 0.4949371574614923, "learning_rate": 4.7166402252529075e-06, "loss": 0.4486, "step": 8141 }, { "epoch": 3.1831810323536676, "grad_norm": 0.5050331893640804, "learning_rate": 4.716567771024792e-06, "loss": 0.457, "step": 8142 }, { "epoch": 3.18357619165226, "grad_norm": 0.4860144273306709, "learning_rate": 4.716495308091343e-06, "loss": 0.4353, "step": 8143 }, { "epoch": 3.183971350950852, "grad_norm": 0.48399721239140636, "learning_rate": 4.716422836452846e-06, "loss": 0.4373, "step": 8144 }, { "epoch": 3.1843665102494443, "grad_norm": 0.4806840238583811, "learning_rate": 4.716350356109588e-06, "loss": 0.4395, "step": 8145 }, { "epoch": 3.1847616695480365, "grad_norm": 0.4905696278198881, "learning_rate": 4.716277867061851e-06, "loss": 0.4521, "step": 8146 }, { "epoch": 3.1851568288466288, "grad_norm": 0.5052154287634858, "learning_rate": 4.716205369309919e-06, "loss": 0.4654, "step": 8147 }, { "epoch": 3.185551988145221, "grad_norm": 0.4976670668769747, "learning_rate": 4.716132862854079e-06, "loss": 0.4656, "step": 8148 }, { "epoch": 3.1859471474438132, "grad_norm": 0.4856606394398254, "learning_rate": 4.716060347694615e-06, "loss": 0.4312, "step": 8149 }, { "epoch": 3.1863423067424055, "grad_norm": 0.47616575166124564, "learning_rate": 4.7159878238318116e-06, "loss": 0.4321, "step": 8150 }, { "epoch": 3.1867374660409977, "grad_norm": 0.4993342307580878, "learning_rate": 4.715915291265954e-06, "loss": 0.4534, "step": 8151 }, { "epoch": 3.18713262533959, "grad_norm": 0.5057600356168487, "learning_rate": 4.715842749997325e-06, "loss": 0.454, "step": 8152 }, { "epoch": 3.187527784638182, "grad_norm": 0.5076197913058856, "learning_rate": 4.715770200026213e-06, "loss": 0.449, "step": 8153 }, { "epoch": 3.1879229439367744, "grad_norm": 0.47904704822140237, "learning_rate": 4.7156976413529e-06, "loss": 0.4405, "step": 8154 }, { "epoch": 3.1883181032353667, "grad_norm": 0.478740457872185, "learning_rate": 4.7156250739776725e-06, "loss": 0.4527, "step": 8155 }, { "epoch": 3.188713262533959, "grad_norm": 0.4865375368517737, "learning_rate": 4.715552497900815e-06, "loss": 0.4664, "step": 8156 }, { "epoch": 3.189108421832551, "grad_norm": 0.49057170271705297, "learning_rate": 4.715479913122613e-06, "loss": 0.4498, "step": 8157 }, { "epoch": 3.1895035811311434, "grad_norm": 0.48932211501442835, "learning_rate": 4.715407319643352e-06, "loss": 0.438, "step": 8158 }, { "epoch": 3.1898987404297356, "grad_norm": 0.4986907746805224, "learning_rate": 4.715334717463314e-06, "loss": 0.4437, "step": 8159 }, { "epoch": 3.190293899728328, "grad_norm": 0.5107324129303034, "learning_rate": 4.715262106582788e-06, "loss": 0.4663, "step": 8160 }, { "epoch": 3.19068905902692, "grad_norm": 0.4816038125829416, "learning_rate": 4.715189487002057e-06, "loss": 0.4459, "step": 8161 }, { "epoch": 3.1910842183255124, "grad_norm": 0.48589022124625236, "learning_rate": 4.715116858721408e-06, "loss": 0.4596, "step": 8162 }, { "epoch": 3.1914793776241046, "grad_norm": 0.5059117225587179, "learning_rate": 4.715044221741125e-06, "loss": 0.4424, "step": 8163 }, { "epoch": 3.191874536922697, "grad_norm": 0.4902259551321021, "learning_rate": 4.714971576061492e-06, "loss": 0.4436, "step": 8164 }, { "epoch": 3.192269696221289, "grad_norm": 0.49921816165596244, "learning_rate": 4.714898921682797e-06, "loss": 0.4341, "step": 8165 }, { "epoch": 3.1926648555198813, "grad_norm": 0.4873589691710802, "learning_rate": 4.714826258605323e-06, "loss": 0.447, "step": 8166 }, { "epoch": 3.1930600148184736, "grad_norm": 0.4947509154503643, "learning_rate": 4.714753586829357e-06, "loss": 0.4531, "step": 8167 }, { "epoch": 3.193455174117066, "grad_norm": 0.49979557568227245, "learning_rate": 4.714680906355184e-06, "loss": 0.4499, "step": 8168 }, { "epoch": 3.193850333415658, "grad_norm": 0.47965466768323384, "learning_rate": 4.714608217183088e-06, "loss": 0.4294, "step": 8169 }, { "epoch": 3.1942454927142503, "grad_norm": 0.5380227850408551, "learning_rate": 4.7145355193133566e-06, "loss": 0.44, "step": 8170 }, { "epoch": 3.1946406520128425, "grad_norm": 0.4778666409695035, "learning_rate": 4.714462812746275e-06, "loss": 0.4518, "step": 8171 }, { "epoch": 3.1950358113114348, "grad_norm": 0.498563645161947, "learning_rate": 4.714390097482127e-06, "loss": 0.4512, "step": 8172 }, { "epoch": 3.195430970610027, "grad_norm": 0.502694986736014, "learning_rate": 4.714317373521199e-06, "loss": 0.4703, "step": 8173 }, { "epoch": 3.1958261299086193, "grad_norm": 0.4955421090319872, "learning_rate": 4.714244640863778e-06, "loss": 0.4601, "step": 8174 }, { "epoch": 3.1962212892072115, "grad_norm": 0.47703481341356674, "learning_rate": 4.714171899510148e-06, "loss": 0.4363, "step": 8175 }, { "epoch": 3.1966164485058037, "grad_norm": 0.488778683807109, "learning_rate": 4.714099149460596e-06, "loss": 0.4309, "step": 8176 }, { "epoch": 3.197011607804396, "grad_norm": 0.49243205733047013, "learning_rate": 4.714026390715407e-06, "loss": 0.4421, "step": 8177 }, { "epoch": 3.197406767102988, "grad_norm": 0.47901350686825167, "learning_rate": 4.7139536232748665e-06, "loss": 0.4475, "step": 8178 }, { "epoch": 3.1978019264015805, "grad_norm": 0.4873600139430021, "learning_rate": 4.71388084713926e-06, "loss": 0.4365, "step": 8179 }, { "epoch": 3.1981970857001727, "grad_norm": 0.4750978959852484, "learning_rate": 4.713808062308874e-06, "loss": 0.4329, "step": 8180 }, { "epoch": 3.198592244998765, "grad_norm": 0.49005747817347883, "learning_rate": 4.713735268783995e-06, "loss": 0.44, "step": 8181 }, { "epoch": 3.198987404297357, "grad_norm": 0.4897517319745767, "learning_rate": 4.713662466564908e-06, "loss": 0.4418, "step": 8182 }, { "epoch": 3.1993825635959494, "grad_norm": 0.4937199361984085, "learning_rate": 4.713589655651898e-06, "loss": 0.445, "step": 8183 }, { "epoch": 3.1997777228945417, "grad_norm": 0.5006257765582313, "learning_rate": 4.713516836045253e-06, "loss": 0.4578, "step": 8184 }, { "epoch": 3.200172882193134, "grad_norm": 0.5102363063233484, "learning_rate": 4.713444007745258e-06, "loss": 0.4649, "step": 8185 }, { "epoch": 3.200568041491726, "grad_norm": 0.5619926568257341, "learning_rate": 4.713371170752198e-06, "loss": 0.4512, "step": 8186 }, { "epoch": 3.2009632007903184, "grad_norm": 0.49017178589540195, "learning_rate": 4.713298325066361e-06, "loss": 0.4482, "step": 8187 }, { "epoch": 3.2013583600889106, "grad_norm": 0.5075841792186128, "learning_rate": 4.713225470688032e-06, "loss": 0.4637, "step": 8188 }, { "epoch": 3.201753519387503, "grad_norm": 0.4851504244889839, "learning_rate": 4.713152607617497e-06, "loss": 0.4354, "step": 8189 }, { "epoch": 3.2021486786860955, "grad_norm": 0.49603485658612206, "learning_rate": 4.713079735855043e-06, "loss": 0.4669, "step": 8190 }, { "epoch": 3.202543837984688, "grad_norm": 0.5131941929666564, "learning_rate": 4.713006855400955e-06, "loss": 0.4363, "step": 8191 }, { "epoch": 3.20293899728328, "grad_norm": 0.5327331196587508, "learning_rate": 4.712933966255521e-06, "loss": 0.4517, "step": 8192 }, { "epoch": 3.2033341565818723, "grad_norm": 0.47967217544059954, "learning_rate": 4.7128610684190255e-06, "loss": 0.4363, "step": 8193 }, { "epoch": 3.2037293158804645, "grad_norm": 0.5032954106990305, "learning_rate": 4.712788161891755e-06, "loss": 0.4555, "step": 8194 }, { "epoch": 3.2041244751790567, "grad_norm": 0.49443372271224756, "learning_rate": 4.712715246673997e-06, "loss": 0.4432, "step": 8195 }, { "epoch": 3.204519634477649, "grad_norm": 0.5048761667286308, "learning_rate": 4.712642322766037e-06, "loss": 0.4636, "step": 8196 }, { "epoch": 3.204914793776241, "grad_norm": 0.5114603132884818, "learning_rate": 4.71256939016816e-06, "loss": 0.4678, "step": 8197 }, { "epoch": 3.2053099530748335, "grad_norm": 0.4783233894068138, "learning_rate": 4.712496448880656e-06, "loss": 0.4546, "step": 8198 }, { "epoch": 3.2057051123734257, "grad_norm": 0.489295057789308, "learning_rate": 4.712423498903809e-06, "loss": 0.4391, "step": 8199 }, { "epoch": 3.206100271672018, "grad_norm": 0.4876753870656324, "learning_rate": 4.712350540237906e-06, "loss": 0.4552, "step": 8200 }, { "epoch": 3.20649543097061, "grad_norm": 0.4833416370700385, "learning_rate": 4.712277572883233e-06, "loss": 0.4461, "step": 8201 }, { "epoch": 3.2068905902692024, "grad_norm": 0.4966315262459833, "learning_rate": 4.712204596840077e-06, "loss": 0.464, "step": 8202 }, { "epoch": 3.2072857495677947, "grad_norm": 0.477804878380041, "learning_rate": 4.712131612108724e-06, "loss": 0.4378, "step": 8203 }, { "epoch": 3.207680908866387, "grad_norm": 0.4842718555370584, "learning_rate": 4.7120586186894626e-06, "loss": 0.4488, "step": 8204 }, { "epoch": 3.208076068164979, "grad_norm": 0.48752539718009136, "learning_rate": 4.711985616582578e-06, "loss": 0.4583, "step": 8205 }, { "epoch": 3.2084712274635714, "grad_norm": 0.493674287690774, "learning_rate": 4.711912605788357e-06, "loss": 0.4461, "step": 8206 }, { "epoch": 3.2088663867621636, "grad_norm": 0.5940079173022275, "learning_rate": 4.711839586307086e-06, "loss": 0.4796, "step": 8207 }, { "epoch": 3.209261546060756, "grad_norm": 0.4727016450653654, "learning_rate": 4.711766558139053e-06, "loss": 0.4313, "step": 8208 }, { "epoch": 3.209656705359348, "grad_norm": 0.4985753825854796, "learning_rate": 4.711693521284543e-06, "loss": 0.4622, "step": 8209 }, { "epoch": 3.2100518646579403, "grad_norm": 0.5947833225156005, "learning_rate": 4.711620475743845e-06, "loss": 0.4576, "step": 8210 }, { "epoch": 3.2104470239565326, "grad_norm": 0.4991842972271697, "learning_rate": 4.711547421517244e-06, "loss": 0.4429, "step": 8211 }, { "epoch": 3.210842183255125, "grad_norm": 0.49646570990011646, "learning_rate": 4.711474358605027e-06, "loss": 0.4589, "step": 8212 }, { "epoch": 3.211237342553717, "grad_norm": 0.49028663463091515, "learning_rate": 4.711401287007482e-06, "loss": 0.4321, "step": 8213 }, { "epoch": 3.2116325018523093, "grad_norm": 0.5228254931374301, "learning_rate": 4.711328206724897e-06, "loss": 0.457, "step": 8214 }, { "epoch": 3.2120276611509015, "grad_norm": 0.48805769076484595, "learning_rate": 4.7112551177575564e-06, "loss": 0.4406, "step": 8215 }, { "epoch": 3.212422820449494, "grad_norm": 0.48960247238176013, "learning_rate": 4.711182020105748e-06, "loss": 0.4567, "step": 8216 }, { "epoch": 3.212817979748086, "grad_norm": 0.4988948172821916, "learning_rate": 4.71110891376976e-06, "loss": 0.4662, "step": 8217 }, { "epoch": 3.2132131390466783, "grad_norm": 0.502843794988854, "learning_rate": 4.711035798749879e-06, "loss": 0.4696, "step": 8218 }, { "epoch": 3.2136082983452705, "grad_norm": 0.4867119474038113, "learning_rate": 4.710962675046392e-06, "loss": 0.4547, "step": 8219 }, { "epoch": 3.2140034576438627, "grad_norm": 0.496603147591944, "learning_rate": 4.710889542659586e-06, "loss": 0.4643, "step": 8220 }, { "epoch": 3.214398616942455, "grad_norm": 0.5108147174349558, "learning_rate": 4.710816401589748e-06, "loss": 0.4659, "step": 8221 }, { "epoch": 3.2147937762410472, "grad_norm": 0.5331702363488228, "learning_rate": 4.710743251837166e-06, "loss": 0.4392, "step": 8222 }, { "epoch": 3.2151889355396395, "grad_norm": 0.48329930143617106, "learning_rate": 4.710670093402127e-06, "loss": 0.4466, "step": 8223 }, { "epoch": 3.2155840948382317, "grad_norm": 0.5172967429519062, "learning_rate": 4.7105969262849185e-06, "loss": 0.4705, "step": 8224 }, { "epoch": 3.215979254136824, "grad_norm": 0.47907504810620555, "learning_rate": 4.710523750485827e-06, "loss": 0.435, "step": 8225 }, { "epoch": 3.216374413435416, "grad_norm": 0.4878844771396085, "learning_rate": 4.71045056600514e-06, "loss": 0.4418, "step": 8226 }, { "epoch": 3.2167695727340084, "grad_norm": 0.4937392208878633, "learning_rate": 4.710377372843147e-06, "loss": 0.4466, "step": 8227 }, { "epoch": 3.2171647320326007, "grad_norm": 0.5193745711511666, "learning_rate": 4.710304171000133e-06, "loss": 0.445, "step": 8228 }, { "epoch": 3.217559891331193, "grad_norm": 0.5017673107772458, "learning_rate": 4.7102309604763865e-06, "loss": 0.4535, "step": 8229 }, { "epoch": 3.217955050629785, "grad_norm": 0.47724701872972525, "learning_rate": 4.710157741272195e-06, "loss": 0.4236, "step": 8230 }, { "epoch": 3.2183502099283774, "grad_norm": 0.5029142492827028, "learning_rate": 4.710084513387846e-06, "loss": 0.46, "step": 8231 }, { "epoch": 3.2187453692269696, "grad_norm": 0.4976804393277097, "learning_rate": 4.710011276823627e-06, "loss": 0.4601, "step": 8232 }, { "epoch": 3.219140528525562, "grad_norm": 0.5261258641015674, "learning_rate": 4.709938031579825e-06, "loss": 0.4587, "step": 8233 }, { "epoch": 3.219535687824154, "grad_norm": 0.5200984670015669, "learning_rate": 4.70986477765673e-06, "loss": 0.4571, "step": 8234 }, { "epoch": 3.2199308471227464, "grad_norm": 0.4894476159865528, "learning_rate": 4.709791515054627e-06, "loss": 0.4442, "step": 8235 }, { "epoch": 3.2203260064213386, "grad_norm": 0.49521421527173004, "learning_rate": 4.709718243773805e-06, "loss": 0.4359, "step": 8236 }, { "epoch": 3.220721165719931, "grad_norm": 0.4927693522476051, "learning_rate": 4.7096449638145504e-06, "loss": 0.445, "step": 8237 }, { "epoch": 3.221116325018523, "grad_norm": 0.5172144682625665, "learning_rate": 4.709571675177154e-06, "loss": 0.4584, "step": 8238 }, { "epoch": 3.2215114843171153, "grad_norm": 0.5154003710753569, "learning_rate": 4.709498377861901e-06, "loss": 0.4433, "step": 8239 }, { "epoch": 3.2219066436157076, "grad_norm": 0.4855890206340475, "learning_rate": 4.709425071869079e-06, "loss": 0.4501, "step": 8240 }, { "epoch": 3.2223018029143, "grad_norm": 0.4830024442204952, "learning_rate": 4.709351757198979e-06, "loss": 0.4416, "step": 8241 }, { "epoch": 3.222696962212892, "grad_norm": 0.5008192768633752, "learning_rate": 4.7092784338518856e-06, "loss": 0.4403, "step": 8242 }, { "epoch": 3.2230921215114843, "grad_norm": 0.5174554129182379, "learning_rate": 4.7092051018280886e-06, "loss": 0.4436, "step": 8243 }, { "epoch": 3.2234872808100765, "grad_norm": 0.49631332057852817, "learning_rate": 4.709131761127875e-06, "loss": 0.466, "step": 8244 }, { "epoch": 3.2238824401086688, "grad_norm": 0.4879530471104053, "learning_rate": 4.709058411751533e-06, "loss": 0.4498, "step": 8245 }, { "epoch": 3.224277599407261, "grad_norm": 0.5093815291047132, "learning_rate": 4.708985053699351e-06, "loss": 0.4828, "step": 8246 }, { "epoch": 3.2246727587058532, "grad_norm": 0.48848100859749605, "learning_rate": 4.708911686971618e-06, "loss": 0.4388, "step": 8247 }, { "epoch": 3.2250679180044455, "grad_norm": 0.49097358270784774, "learning_rate": 4.708838311568621e-06, "loss": 0.4417, "step": 8248 }, { "epoch": 3.2254630773030377, "grad_norm": 0.5073726289639382, "learning_rate": 4.708764927490648e-06, "loss": 0.4715, "step": 8249 }, { "epoch": 3.22585823660163, "grad_norm": 0.49923080085782845, "learning_rate": 4.708691534737987e-06, "loss": 0.4672, "step": 8250 }, { "epoch": 3.226253395900222, "grad_norm": 0.4983917363183614, "learning_rate": 4.7086181333109286e-06, "loss": 0.4526, "step": 8251 }, { "epoch": 3.2266485551988144, "grad_norm": 0.5111375734969307, "learning_rate": 4.708544723209758e-06, "loss": 0.452, "step": 8252 }, { "epoch": 3.2270437144974067, "grad_norm": 0.48716751738116665, "learning_rate": 4.708471304434765e-06, "loss": 0.4564, "step": 8253 }, { "epoch": 3.227438873795999, "grad_norm": 0.5069026621643061, "learning_rate": 4.708397876986238e-06, "loss": 0.4528, "step": 8254 }, { "epoch": 3.227834033094591, "grad_norm": 0.5152378887836355, "learning_rate": 4.708324440864465e-06, "loss": 0.4355, "step": 8255 }, { "epoch": 3.2282291923931834, "grad_norm": 0.4962324262994311, "learning_rate": 4.7082509960697345e-06, "loss": 0.442, "step": 8256 }, { "epoch": 3.2286243516917756, "grad_norm": 0.5028634303397124, "learning_rate": 4.708177542602335e-06, "loss": 0.4553, "step": 8257 }, { "epoch": 3.229019510990368, "grad_norm": 0.5001078547401838, "learning_rate": 4.708104080462555e-06, "loss": 0.4557, "step": 8258 }, { "epoch": 3.22941467028896, "grad_norm": 0.4960036879535328, "learning_rate": 4.708030609650683e-06, "loss": 0.448, "step": 8259 }, { "epoch": 3.2298098295875524, "grad_norm": 0.5050479554627619, "learning_rate": 4.7079571301670076e-06, "loss": 0.4655, "step": 8260 }, { "epoch": 3.2302049888861446, "grad_norm": 0.5006203296479564, "learning_rate": 4.707883642011818e-06, "loss": 0.4653, "step": 8261 }, { "epoch": 3.230600148184737, "grad_norm": 0.4999078554437849, "learning_rate": 4.707810145185401e-06, "loss": 0.4574, "step": 8262 }, { "epoch": 3.230995307483329, "grad_norm": 0.47956978838455655, "learning_rate": 4.707736639688047e-06, "loss": 0.4366, "step": 8263 }, { "epoch": 3.2313904667819213, "grad_norm": 0.5070009335133143, "learning_rate": 4.707663125520044e-06, "loss": 0.4479, "step": 8264 }, { "epoch": 3.2317856260805136, "grad_norm": 0.5087445068694287, "learning_rate": 4.707589602681681e-06, "loss": 0.4596, "step": 8265 }, { "epoch": 3.232180785379106, "grad_norm": 0.5718442258693521, "learning_rate": 4.707516071173246e-06, "loss": 0.4542, "step": 8266 }, { "epoch": 3.232575944677698, "grad_norm": 0.5011292864269645, "learning_rate": 4.707442530995029e-06, "loss": 0.4525, "step": 8267 }, { "epoch": 3.2329711039762903, "grad_norm": 0.5026210431492106, "learning_rate": 4.707368982147318e-06, "loss": 0.4431, "step": 8268 }, { "epoch": 3.2333662632748825, "grad_norm": 0.5127765787850526, "learning_rate": 4.707295424630402e-06, "loss": 0.4665, "step": 8269 }, { "epoch": 3.2337614225734748, "grad_norm": 0.5086511264451964, "learning_rate": 4.707221858444569e-06, "loss": 0.45, "step": 8270 }, { "epoch": 3.234156581872067, "grad_norm": 0.5232352547690865, "learning_rate": 4.70714828359011e-06, "loss": 0.4685, "step": 8271 }, { "epoch": 3.2345517411706592, "grad_norm": 0.49163545831580857, "learning_rate": 4.707074700067312e-06, "loss": 0.4508, "step": 8272 }, { "epoch": 3.2349469004692515, "grad_norm": 0.5145261335572505, "learning_rate": 4.707001107876466e-06, "loss": 0.4693, "step": 8273 }, { "epoch": 3.2353420597678437, "grad_norm": 0.5026474655303093, "learning_rate": 4.7069275070178586e-06, "loss": 0.4629, "step": 8274 }, { "epoch": 3.235737219066436, "grad_norm": 0.486565673024086, "learning_rate": 4.706853897491781e-06, "loss": 0.4526, "step": 8275 }, { "epoch": 3.236132378365028, "grad_norm": 0.47935306570145486, "learning_rate": 4.70678027929852e-06, "loss": 0.4466, "step": 8276 }, { "epoch": 3.2365275376636204, "grad_norm": 0.4991778284300973, "learning_rate": 4.706706652438368e-06, "loss": 0.4549, "step": 8277 }, { "epoch": 3.2369226969622127, "grad_norm": 0.5067651241856679, "learning_rate": 4.706633016911611e-06, "loss": 0.4634, "step": 8278 }, { "epoch": 3.2373178562608054, "grad_norm": 0.49959265144088416, "learning_rate": 4.70655937271854e-06, "loss": 0.4477, "step": 8279 }, { "epoch": 3.2377130155593976, "grad_norm": 0.4730123377567548, "learning_rate": 4.706485719859443e-06, "loss": 0.4348, "step": 8280 }, { "epoch": 3.23810817485799, "grad_norm": 0.5011163271286927, "learning_rate": 4.706412058334611e-06, "loss": 0.469, "step": 8281 }, { "epoch": 3.238503334156582, "grad_norm": 0.5118563019208421, "learning_rate": 4.706338388144331e-06, "loss": 0.4536, "step": 8282 }, { "epoch": 3.2388984934551743, "grad_norm": 0.49785560194791934, "learning_rate": 4.706264709288894e-06, "loss": 0.4398, "step": 8283 }, { "epoch": 3.2392936527537666, "grad_norm": 0.48918931904116053, "learning_rate": 4.7061910217685895e-06, "loss": 0.458, "step": 8284 }, { "epoch": 3.239688812052359, "grad_norm": 0.4991786615442405, "learning_rate": 4.7061173255837054e-06, "loss": 0.4522, "step": 8285 }, { "epoch": 3.240083971350951, "grad_norm": 0.48813754093841744, "learning_rate": 4.706043620734533e-06, "loss": 0.454, "step": 8286 }, { "epoch": 3.2404791306495433, "grad_norm": 0.6162776390081206, "learning_rate": 4.705969907221361e-06, "loss": 0.4252, "step": 8287 }, { "epoch": 3.2408742899481355, "grad_norm": 0.4916252788914689, "learning_rate": 4.705896185044477e-06, "loss": 0.4541, "step": 8288 }, { "epoch": 3.2412694492467278, "grad_norm": 0.49006414916965146, "learning_rate": 4.705822454204173e-06, "loss": 0.4498, "step": 8289 }, { "epoch": 3.24166460854532, "grad_norm": 0.5058325689652357, "learning_rate": 4.705748714700739e-06, "loss": 0.45, "step": 8290 }, { "epoch": 3.2420597678439123, "grad_norm": 0.4876491731015844, "learning_rate": 4.705674966534462e-06, "loss": 0.4163, "step": 8291 }, { "epoch": 3.2424549271425045, "grad_norm": 0.5021867760606261, "learning_rate": 4.705601209705635e-06, "loss": 0.441, "step": 8292 }, { "epoch": 3.2428500864410967, "grad_norm": 0.5038352555082803, "learning_rate": 4.705527444214543e-06, "loss": 0.4469, "step": 8293 }, { "epoch": 3.243245245739689, "grad_norm": 0.5013117536018146, "learning_rate": 4.705453670061481e-06, "loss": 0.4492, "step": 8294 }, { "epoch": 3.243640405038281, "grad_norm": 0.47282887919141153, "learning_rate": 4.705379887246735e-06, "loss": 0.4431, "step": 8295 }, { "epoch": 3.2440355643368735, "grad_norm": 0.49337905075174315, "learning_rate": 4.705306095770596e-06, "loss": 0.4572, "step": 8296 }, { "epoch": 3.2444307236354657, "grad_norm": 0.49412301657250673, "learning_rate": 4.705232295633355e-06, "loss": 0.4463, "step": 8297 }, { "epoch": 3.244825882934058, "grad_norm": 0.5031343987855506, "learning_rate": 4.7051584868353e-06, "loss": 0.4502, "step": 8298 }, { "epoch": 3.24522104223265, "grad_norm": 0.5181152188198329, "learning_rate": 4.705084669376721e-06, "loss": 0.4397, "step": 8299 }, { "epoch": 3.2456162015312424, "grad_norm": 0.4882928530529042, "learning_rate": 4.7050108432579075e-06, "loss": 0.4515, "step": 8300 }, { "epoch": 3.2460113608298347, "grad_norm": 0.6015511647794045, "learning_rate": 4.704937008479152e-06, "loss": 0.4449, "step": 8301 }, { "epoch": 3.246406520128427, "grad_norm": 0.5137381986143884, "learning_rate": 4.704863165040742e-06, "loss": 0.4537, "step": 8302 }, { "epoch": 3.246801679427019, "grad_norm": 0.5065385080859512, "learning_rate": 4.704789312942969e-06, "loss": 0.4507, "step": 8303 }, { "epoch": 3.2471968387256114, "grad_norm": 0.5006646215975414, "learning_rate": 4.704715452186122e-06, "loss": 0.4413, "step": 8304 }, { "epoch": 3.2475919980242036, "grad_norm": 0.4925281165489277, "learning_rate": 4.704641582770492e-06, "loss": 0.4418, "step": 8305 }, { "epoch": 3.247987157322796, "grad_norm": 0.4935778234372788, "learning_rate": 4.704567704696368e-06, "loss": 0.465, "step": 8306 }, { "epoch": 3.248382316621388, "grad_norm": 0.5077023224379424, "learning_rate": 4.704493817964041e-06, "loss": 0.4327, "step": 8307 }, { "epoch": 3.2487774759199803, "grad_norm": 0.5015739358541178, "learning_rate": 4.7044199225738e-06, "loss": 0.4464, "step": 8308 }, { "epoch": 3.2491726352185726, "grad_norm": 0.5022006581016725, "learning_rate": 4.704346018525937e-06, "loss": 0.4532, "step": 8309 }, { "epoch": 3.249567794517165, "grad_norm": 0.5067055633348441, "learning_rate": 4.704272105820741e-06, "loss": 0.4597, "step": 8310 }, { "epoch": 3.249962953815757, "grad_norm": 0.4829044817680482, "learning_rate": 4.704198184458503e-06, "loss": 0.4245, "step": 8311 }, { "epoch": 3.2503581131143493, "grad_norm": 0.502784983843347, "learning_rate": 4.704124254439512e-06, "loss": 0.4682, "step": 8312 }, { "epoch": 3.2507532724129415, "grad_norm": 0.5025327631494758, "learning_rate": 4.704050315764062e-06, "loss": 0.4532, "step": 8313 }, { "epoch": 3.251148431711534, "grad_norm": 0.4827438502841128, "learning_rate": 4.703976368432438e-06, "loss": 0.4582, "step": 8314 }, { "epoch": 3.251543591010126, "grad_norm": 0.5001402118199502, "learning_rate": 4.703902412444935e-06, "loss": 0.4512, "step": 8315 }, { "epoch": 3.2519387503087183, "grad_norm": 0.5073201743090425, "learning_rate": 4.7038284478018405e-06, "loss": 0.458, "step": 8316 }, { "epoch": 3.2523339096073105, "grad_norm": 0.5044450071518284, "learning_rate": 4.703754474503446e-06, "loss": 0.4518, "step": 8317 }, { "epoch": 3.2527290689059027, "grad_norm": 0.5019360105195412, "learning_rate": 4.703680492550043e-06, "loss": 0.4544, "step": 8318 }, { "epoch": 3.253124228204495, "grad_norm": 0.49174623590569816, "learning_rate": 4.703606501941921e-06, "loss": 0.4361, "step": 8319 }, { "epoch": 3.2535193875030872, "grad_norm": 0.5000322070806993, "learning_rate": 4.70353250267937e-06, "loss": 0.4647, "step": 8320 }, { "epoch": 3.2539145468016795, "grad_norm": 0.4949672092349716, "learning_rate": 4.7034584947626815e-06, "loss": 0.4498, "step": 8321 }, { "epoch": 3.2543097061002717, "grad_norm": 0.48215269520295684, "learning_rate": 4.703384478192146e-06, "loss": 0.4461, "step": 8322 }, { "epoch": 3.254704865398864, "grad_norm": 0.4874422999560892, "learning_rate": 4.703310452968055e-06, "loss": 0.4434, "step": 8323 }, { "epoch": 3.255100024697456, "grad_norm": 0.4842366003915545, "learning_rate": 4.703236419090699e-06, "loss": 0.4338, "step": 8324 }, { "epoch": 3.2554951839960484, "grad_norm": 0.47835189607454215, "learning_rate": 4.7031623765603665e-06, "loss": 0.4463, "step": 8325 }, { "epoch": 3.2558903432946407, "grad_norm": 0.48308358621451114, "learning_rate": 4.7030883253773504e-06, "loss": 0.455, "step": 8326 }, { "epoch": 3.256285502593233, "grad_norm": 0.5094922118559343, "learning_rate": 4.703014265541942e-06, "loss": 0.447, "step": 8327 }, { "epoch": 3.256680661891825, "grad_norm": 0.49550690331076896, "learning_rate": 4.702940197054431e-06, "loss": 0.4543, "step": 8328 }, { "epoch": 3.2570758211904174, "grad_norm": 0.49464702815935846, "learning_rate": 4.702866119915108e-06, "loss": 0.443, "step": 8329 }, { "epoch": 3.2574709804890096, "grad_norm": 0.48344035746473757, "learning_rate": 4.702792034124265e-06, "loss": 0.4422, "step": 8330 }, { "epoch": 3.257866139787602, "grad_norm": 0.49796852073676307, "learning_rate": 4.702717939682193e-06, "loss": 0.4518, "step": 8331 }, { "epoch": 3.258261299086194, "grad_norm": 0.49891388022773675, "learning_rate": 4.702643836589182e-06, "loss": 0.4536, "step": 8332 }, { "epoch": 3.2586564583847863, "grad_norm": 0.5017290426062102, "learning_rate": 4.702569724845523e-06, "loss": 0.4492, "step": 8333 }, { "epoch": 3.2590516176833786, "grad_norm": 0.5055247142680904, "learning_rate": 4.702495604451508e-06, "loss": 0.4646, "step": 8334 }, { "epoch": 3.259446776981971, "grad_norm": 0.48822789965751323, "learning_rate": 4.702421475407428e-06, "loss": 0.4509, "step": 8335 }, { "epoch": 3.259841936280563, "grad_norm": 0.4932198897185812, "learning_rate": 4.7023473377135735e-06, "loss": 0.4464, "step": 8336 }, { "epoch": 3.2602370955791553, "grad_norm": 0.5137369946580147, "learning_rate": 4.702273191370236e-06, "loss": 0.452, "step": 8337 }, { "epoch": 3.2606322548777475, "grad_norm": 0.5049765007477618, "learning_rate": 4.702199036377707e-06, "loss": 0.4344, "step": 8338 }, { "epoch": 3.26102741417634, "grad_norm": 0.48715675795622143, "learning_rate": 4.702124872736277e-06, "loss": 0.4688, "step": 8339 }, { "epoch": 3.261422573474932, "grad_norm": 0.5058798989467141, "learning_rate": 4.702050700446238e-06, "loss": 0.461, "step": 8340 }, { "epoch": 3.2618177327735243, "grad_norm": 0.4875539335588554, "learning_rate": 4.701976519507881e-06, "loss": 0.4517, "step": 8341 }, { "epoch": 3.2622128920721165, "grad_norm": 0.49540858670627236, "learning_rate": 4.7019023299214974e-06, "loss": 0.4436, "step": 8342 }, { "epoch": 3.2626080513707088, "grad_norm": 0.48452284870049883, "learning_rate": 4.701828131687378e-06, "loss": 0.4563, "step": 8343 }, { "epoch": 3.263003210669301, "grad_norm": 0.5064967286836504, "learning_rate": 4.701753924805815e-06, "loss": 0.4517, "step": 8344 }, { "epoch": 3.2633983699678932, "grad_norm": 0.5049107650163969, "learning_rate": 4.7016797092771e-06, "loss": 0.4641, "step": 8345 }, { "epoch": 3.2637935292664855, "grad_norm": 0.5286126343914365, "learning_rate": 4.701605485101524e-06, "loss": 0.4564, "step": 8346 }, { "epoch": 3.2641886885650777, "grad_norm": 0.5224526170379453, "learning_rate": 4.701531252279379e-06, "loss": 0.4418, "step": 8347 }, { "epoch": 3.26458384786367, "grad_norm": 0.5085290561882606, "learning_rate": 4.701457010810955e-06, "loss": 0.4561, "step": 8348 }, { "epoch": 3.264979007162262, "grad_norm": 0.4860433808053769, "learning_rate": 4.7013827606965446e-06, "loss": 0.4365, "step": 8349 }, { "epoch": 3.2653741664608544, "grad_norm": 0.5362280369050675, "learning_rate": 4.70130850193644e-06, "loss": 0.453, "step": 8350 }, { "epoch": 3.2657693257594467, "grad_norm": 0.5026775324033125, "learning_rate": 4.7012342345309316e-06, "loss": 0.4318, "step": 8351 }, { "epoch": 3.266164485058039, "grad_norm": 0.48284490669371327, "learning_rate": 4.701159958480312e-06, "loss": 0.4375, "step": 8352 }, { "epoch": 3.266559644356631, "grad_norm": 0.511556437758763, "learning_rate": 4.701085673784874e-06, "loss": 0.4607, "step": 8353 }, { "epoch": 3.2669548036552234, "grad_norm": 0.4829094276892418, "learning_rate": 4.701011380444907e-06, "loss": 0.4292, "step": 8354 }, { "epoch": 3.2673499629538156, "grad_norm": 0.51144228894575, "learning_rate": 4.700937078460704e-06, "loss": 0.4603, "step": 8355 }, { "epoch": 3.267745122252408, "grad_norm": 0.4906533832218431, "learning_rate": 4.700862767832557e-06, "loss": 0.4582, "step": 8356 }, { "epoch": 3.268140281551, "grad_norm": 0.4930805349262283, "learning_rate": 4.7007884485607565e-06, "loss": 0.458, "step": 8357 }, { "epoch": 3.2685354408495924, "grad_norm": 0.5009831870584155, "learning_rate": 4.700714120645596e-06, "loss": 0.4687, "step": 8358 }, { "epoch": 3.2689306001481846, "grad_norm": 0.49092555374069324, "learning_rate": 4.700639784087366e-06, "loss": 0.4359, "step": 8359 }, { "epoch": 3.269325759446777, "grad_norm": 0.500292655543431, "learning_rate": 4.700565438886361e-06, "loss": 0.4487, "step": 8360 }, { "epoch": 3.269720918745369, "grad_norm": 0.4855070468612074, "learning_rate": 4.70049108504287e-06, "loss": 0.4411, "step": 8361 }, { "epoch": 3.2701160780439613, "grad_norm": 0.5023057055550552, "learning_rate": 4.700416722557186e-06, "loss": 0.4455, "step": 8362 }, { "epoch": 3.2705112373425536, "grad_norm": 0.509131608944125, "learning_rate": 4.700342351429601e-06, "loss": 0.4516, "step": 8363 }, { "epoch": 3.270906396641146, "grad_norm": 0.49337058153224017, "learning_rate": 4.700267971660408e-06, "loss": 0.4675, "step": 8364 }, { "epoch": 3.271301555939738, "grad_norm": 0.4860684297948216, "learning_rate": 4.700193583249899e-06, "loss": 0.4467, "step": 8365 }, { "epoch": 3.2716967152383303, "grad_norm": 0.49228337368850594, "learning_rate": 4.700119186198365e-06, "loss": 0.4579, "step": 8366 }, { "epoch": 3.2720918745369225, "grad_norm": 0.4845358764762339, "learning_rate": 4.700044780506099e-06, "loss": 0.449, "step": 8367 }, { "epoch": 3.2724870338355148, "grad_norm": 0.5068451001379258, "learning_rate": 4.699970366173393e-06, "loss": 0.4348, "step": 8368 }, { "epoch": 3.272882193134107, "grad_norm": 0.4914022566989148, "learning_rate": 4.699895943200539e-06, "loss": 0.4503, "step": 8369 }, { "epoch": 3.2732773524326992, "grad_norm": 0.4992738042869505, "learning_rate": 4.69982151158783e-06, "loss": 0.4432, "step": 8370 }, { "epoch": 3.2736725117312915, "grad_norm": 0.4938604279465015, "learning_rate": 4.6997470713355574e-06, "loss": 0.4414, "step": 8371 }, { "epoch": 3.2740676710298837, "grad_norm": 0.4839800775793246, "learning_rate": 4.699672622444015e-06, "loss": 0.4349, "step": 8372 }, { "epoch": 3.274462830328476, "grad_norm": 0.5024998565000468, "learning_rate": 4.699598164913493e-06, "loss": 0.4402, "step": 8373 }, { "epoch": 3.274857989627068, "grad_norm": 0.49637224096799104, "learning_rate": 4.699523698744286e-06, "loss": 0.4483, "step": 8374 }, { "epoch": 3.2752531489256604, "grad_norm": 0.49260375994924005, "learning_rate": 4.699449223936686e-06, "loss": 0.4474, "step": 8375 }, { "epoch": 3.2756483082242527, "grad_norm": 0.6533471619862731, "learning_rate": 4.699374740490984e-06, "loss": 0.4655, "step": 8376 }, { "epoch": 3.276043467522845, "grad_norm": 0.5234139605987889, "learning_rate": 4.699300248407474e-06, "loss": 0.4672, "step": 8377 }, { "epoch": 3.276438626821437, "grad_norm": 0.4971511724210842, "learning_rate": 4.6992257476864475e-06, "loss": 0.4598, "step": 8378 }, { "epoch": 3.2768337861200294, "grad_norm": 0.49100800872845307, "learning_rate": 4.699151238328198e-06, "loss": 0.47, "step": 8379 }, { "epoch": 3.2772289454186216, "grad_norm": 0.4796505235609134, "learning_rate": 4.6990767203330185e-06, "loss": 0.4321, "step": 8380 }, { "epoch": 3.277624104717214, "grad_norm": 0.5035218618276991, "learning_rate": 4.6990021937012e-06, "loss": 0.4544, "step": 8381 }, { "epoch": 3.278019264015806, "grad_norm": 0.4912630675638253, "learning_rate": 4.6989276584330365e-06, "loss": 0.4311, "step": 8382 }, { "epoch": 3.278414423314399, "grad_norm": 0.5050629349004524, "learning_rate": 4.6988531145288204e-06, "loss": 0.4416, "step": 8383 }, { "epoch": 3.278809582612991, "grad_norm": 0.5106795613363712, "learning_rate": 4.698778561988845e-06, "loss": 0.4728, "step": 8384 }, { "epoch": 3.2792047419115833, "grad_norm": 0.4961344250576797, "learning_rate": 4.698704000813403e-06, "loss": 0.4555, "step": 8385 }, { "epoch": 3.2795999012101755, "grad_norm": 0.5105963304245603, "learning_rate": 4.698629431002786e-06, "loss": 0.4758, "step": 8386 }, { "epoch": 3.2799950605087678, "grad_norm": 0.49603685469527325, "learning_rate": 4.698554852557288e-06, "loss": 0.4394, "step": 8387 }, { "epoch": 3.28039021980736, "grad_norm": 0.51768754874356, "learning_rate": 4.698480265477201e-06, "loss": 0.4477, "step": 8388 }, { "epoch": 3.2807853791059522, "grad_norm": 0.4988292744521313, "learning_rate": 4.698405669762819e-06, "loss": 0.4575, "step": 8389 }, { "epoch": 3.2811805384045445, "grad_norm": 0.4906957305488342, "learning_rate": 4.6983310654144345e-06, "loss": 0.4463, "step": 8390 }, { "epoch": 3.2815756977031367, "grad_norm": 0.4974751379220848, "learning_rate": 4.6982564524323396e-06, "loss": 0.4454, "step": 8391 }, { "epoch": 3.281970857001729, "grad_norm": 0.4872843211030378, "learning_rate": 4.698181830816829e-06, "loss": 0.4311, "step": 8392 }, { "epoch": 3.282366016300321, "grad_norm": 0.49418859390881315, "learning_rate": 4.698107200568195e-06, "loss": 0.4415, "step": 8393 }, { "epoch": 3.2827611755989135, "grad_norm": 0.5037917521978581, "learning_rate": 4.698032561686731e-06, "loss": 0.4554, "step": 8394 }, { "epoch": 3.2831563348975057, "grad_norm": 0.643760942178025, "learning_rate": 4.697957914172728e-06, "loss": 0.444, "step": 8395 }, { "epoch": 3.283551494196098, "grad_norm": 0.49670328228290767, "learning_rate": 4.697883258026483e-06, "loss": 0.4514, "step": 8396 }, { "epoch": 3.28394665349469, "grad_norm": 0.49682146132488997, "learning_rate": 4.697808593248287e-06, "loss": 0.4398, "step": 8397 }, { "epoch": 3.2843418127932824, "grad_norm": 0.5166333354777709, "learning_rate": 4.697733919838433e-06, "loss": 0.4618, "step": 8398 }, { "epoch": 3.2847369720918747, "grad_norm": 0.4806098054963327, "learning_rate": 4.697659237797214e-06, "loss": 0.4501, "step": 8399 }, { "epoch": 3.285132131390467, "grad_norm": 0.49062496009639534, "learning_rate": 4.697584547124925e-06, "loss": 0.4476, "step": 8400 }, { "epoch": 3.285527290689059, "grad_norm": 0.5057710961550459, "learning_rate": 4.697509847821858e-06, "loss": 0.4487, "step": 8401 }, { "epoch": 3.2859224499876514, "grad_norm": 0.5096432840136818, "learning_rate": 4.697435139888307e-06, "loss": 0.4468, "step": 8402 }, { "epoch": 3.2863176092862436, "grad_norm": 0.5099785625996293, "learning_rate": 4.697360423324564e-06, "loss": 0.4736, "step": 8403 }, { "epoch": 3.286712768584836, "grad_norm": 0.4840586188305653, "learning_rate": 4.6972856981309245e-06, "loss": 0.4569, "step": 8404 }, { "epoch": 3.287107927883428, "grad_norm": 0.49475695398984937, "learning_rate": 4.697210964307681e-06, "loss": 0.465, "step": 8405 }, { "epoch": 3.2875030871820203, "grad_norm": 0.5199796400658511, "learning_rate": 4.697136221855127e-06, "loss": 0.4667, "step": 8406 }, { "epoch": 3.2878982464806126, "grad_norm": 0.5003237789978858, "learning_rate": 4.697061470773556e-06, "loss": 0.457, "step": 8407 }, { "epoch": 3.288293405779205, "grad_norm": 0.49819670220659273, "learning_rate": 4.696986711063261e-06, "loss": 0.4494, "step": 8408 }, { "epoch": 3.288688565077797, "grad_norm": 0.49271546941029926, "learning_rate": 4.696911942724538e-06, "loss": 0.458, "step": 8409 }, { "epoch": 3.2890837243763893, "grad_norm": 0.5002339935967224, "learning_rate": 4.6968371657576774e-06, "loss": 0.4463, "step": 8410 }, { "epoch": 3.2894788836749815, "grad_norm": 0.49334667002247623, "learning_rate": 4.696762380162975e-06, "loss": 0.4486, "step": 8411 }, { "epoch": 3.2898740429735738, "grad_norm": 0.496514797921124, "learning_rate": 4.696687585940724e-06, "loss": 0.4538, "step": 8412 }, { "epoch": 3.290269202272166, "grad_norm": 0.4964892015506934, "learning_rate": 4.696612783091218e-06, "loss": 0.4445, "step": 8413 }, { "epoch": 3.2906643615707583, "grad_norm": 0.5015291860594303, "learning_rate": 4.696537971614751e-06, "loss": 0.4537, "step": 8414 }, { "epoch": 3.2910595208693505, "grad_norm": 0.4896264954525798, "learning_rate": 4.696463151511617e-06, "loss": 0.4516, "step": 8415 }, { "epoch": 3.2914546801679427, "grad_norm": 0.5160967672675386, "learning_rate": 4.696388322782108e-06, "loss": 0.472, "step": 8416 }, { "epoch": 3.291849839466535, "grad_norm": 0.5176580403198294, "learning_rate": 4.69631348542652e-06, "loss": 0.4505, "step": 8417 }, { "epoch": 3.292244998765127, "grad_norm": 0.5070289013247464, "learning_rate": 4.696238639445147e-06, "loss": 0.4678, "step": 8418 }, { "epoch": 3.2926401580637195, "grad_norm": 0.5280810466683207, "learning_rate": 4.696163784838282e-06, "loss": 0.4643, "step": 8419 }, { "epoch": 3.2930353173623117, "grad_norm": 0.5182007906795619, "learning_rate": 4.696088921606219e-06, "loss": 0.4367, "step": 8420 }, { "epoch": 3.293430476660904, "grad_norm": 0.49458421124688884, "learning_rate": 4.696014049749252e-06, "loss": 0.464, "step": 8421 }, { "epoch": 3.293825635959496, "grad_norm": 0.5055857636265902, "learning_rate": 4.695939169267676e-06, "loss": 0.4492, "step": 8422 }, { "epoch": 3.2942207952580884, "grad_norm": 0.5004862392256088, "learning_rate": 4.695864280161784e-06, "loss": 0.4587, "step": 8423 }, { "epoch": 3.2946159545566807, "grad_norm": 0.484703488880412, "learning_rate": 4.695789382431871e-06, "loss": 0.4476, "step": 8424 }, { "epoch": 3.295011113855273, "grad_norm": 0.4945224351875782, "learning_rate": 4.6957144760782305e-06, "loss": 0.4487, "step": 8425 }, { "epoch": 3.295406273153865, "grad_norm": 0.47918483340578955, "learning_rate": 4.695639561101156e-06, "loss": 0.4509, "step": 8426 }, { "epoch": 3.2958014324524574, "grad_norm": 0.523558981494999, "learning_rate": 4.695564637500944e-06, "loss": 0.4584, "step": 8427 }, { "epoch": 3.2961965917510496, "grad_norm": 0.5135262004505718, "learning_rate": 4.695489705277886e-06, "loss": 0.4713, "step": 8428 }, { "epoch": 3.296591751049642, "grad_norm": 0.5032512376213754, "learning_rate": 4.695414764432278e-06, "loss": 0.4609, "step": 8429 }, { "epoch": 3.296986910348234, "grad_norm": 0.515575257988458, "learning_rate": 4.6953398149644135e-06, "loss": 0.4535, "step": 8430 }, { "epoch": 3.2973820696468263, "grad_norm": 0.4783220407113566, "learning_rate": 4.695264856874589e-06, "loss": 0.4284, "step": 8431 }, { "epoch": 3.2977772289454186, "grad_norm": 0.4996331261411462, "learning_rate": 4.695189890163095e-06, "loss": 0.4642, "step": 8432 }, { "epoch": 3.298172388244011, "grad_norm": 0.4859730761571242, "learning_rate": 4.6951149148302285e-06, "loss": 0.4564, "step": 8433 }, { "epoch": 3.298567547542603, "grad_norm": 0.49508609585829344, "learning_rate": 4.695039930876285e-06, "loss": 0.4478, "step": 8434 }, { "epoch": 3.2989627068411953, "grad_norm": 0.5073958753301407, "learning_rate": 4.694964938301556e-06, "loss": 0.4498, "step": 8435 }, { "epoch": 3.2993578661397875, "grad_norm": 0.5042008611601112, "learning_rate": 4.694889937106338e-06, "loss": 0.4743, "step": 8436 }, { "epoch": 3.29975302543838, "grad_norm": 0.4995949580337922, "learning_rate": 4.694814927290926e-06, "loss": 0.4444, "step": 8437 }, { "epoch": 3.300148184736972, "grad_norm": 0.5010429776804977, "learning_rate": 4.694739908855613e-06, "loss": 0.4519, "step": 8438 }, { "epoch": 3.3005433440355643, "grad_norm": 0.49717228341091885, "learning_rate": 4.6946648818006944e-06, "loss": 0.4661, "step": 8439 }, { "epoch": 3.3009385033341565, "grad_norm": 0.4941824510019, "learning_rate": 4.694589846126465e-06, "loss": 0.4616, "step": 8440 }, { "epoch": 3.3013336626327487, "grad_norm": 0.4962369271830349, "learning_rate": 4.69451480183322e-06, "loss": 0.4617, "step": 8441 }, { "epoch": 3.301728821931341, "grad_norm": 0.4973622352887221, "learning_rate": 4.694439748921253e-06, "loss": 0.458, "step": 8442 }, { "epoch": 3.3021239812299332, "grad_norm": 0.5023833682592578, "learning_rate": 4.694364687390858e-06, "loss": 0.4733, "step": 8443 }, { "epoch": 3.3025191405285255, "grad_norm": 0.48155367010251726, "learning_rate": 4.694289617242331e-06, "loss": 0.4431, "step": 8444 }, { "epoch": 3.3029142998271177, "grad_norm": 0.494415452731697, "learning_rate": 4.694214538475969e-06, "loss": 0.4283, "step": 8445 }, { "epoch": 3.30330945912571, "grad_norm": 0.47365239654389407, "learning_rate": 4.694139451092062e-06, "loss": 0.4505, "step": 8446 }, { "epoch": 3.303704618424302, "grad_norm": 0.485640275190784, "learning_rate": 4.6940643550909096e-06, "loss": 0.4496, "step": 8447 }, { "epoch": 3.3040997777228944, "grad_norm": 0.5680281189182768, "learning_rate": 4.693989250472804e-06, "loss": 0.472, "step": 8448 }, { "epoch": 3.3044949370214867, "grad_norm": 0.5020600876380309, "learning_rate": 4.693914137238042e-06, "loss": 0.4475, "step": 8449 }, { "epoch": 3.304890096320079, "grad_norm": 0.4919611952382774, "learning_rate": 4.693839015386916e-06, "loss": 0.4579, "step": 8450 }, { "epoch": 3.305285255618671, "grad_norm": 0.48835887965110875, "learning_rate": 4.6937638849197225e-06, "loss": 0.455, "step": 8451 }, { "epoch": 3.3056804149172634, "grad_norm": 0.5031462098533465, "learning_rate": 4.6936887458367565e-06, "loss": 0.4576, "step": 8452 }, { "epoch": 3.3060755742158556, "grad_norm": 0.6061653188739415, "learning_rate": 4.693613598138314e-06, "loss": 0.4332, "step": 8453 }, { "epoch": 3.306470733514448, "grad_norm": 0.4888084682391102, "learning_rate": 4.693538441824689e-06, "loss": 0.4495, "step": 8454 }, { "epoch": 3.30686589281304, "grad_norm": 0.4905941903714758, "learning_rate": 4.693463276896177e-06, "loss": 0.4529, "step": 8455 }, { "epoch": 3.3072610521116323, "grad_norm": 0.49490350717422127, "learning_rate": 4.693388103353073e-06, "loss": 0.4439, "step": 8456 }, { "epoch": 3.3076562114102246, "grad_norm": 0.5077115025169423, "learning_rate": 4.693312921195673e-06, "loss": 0.4602, "step": 8457 }, { "epoch": 3.308051370708817, "grad_norm": 0.49228501239192973, "learning_rate": 4.693237730424272e-06, "loss": 0.4479, "step": 8458 }, { "epoch": 3.308446530007409, "grad_norm": 0.4929223392479759, "learning_rate": 4.693162531039163e-06, "loss": 0.4597, "step": 8459 }, { "epoch": 3.3088416893060013, "grad_norm": 0.48557109126722053, "learning_rate": 4.693087323040646e-06, "loss": 0.4667, "step": 8460 }, { "epoch": 3.3092368486045936, "grad_norm": 0.4829968156083153, "learning_rate": 4.693012106429012e-06, "loss": 0.446, "step": 8461 }, { "epoch": 3.309632007903186, "grad_norm": 0.48460924487669355, "learning_rate": 4.692936881204558e-06, "loss": 0.4668, "step": 8462 }, { "epoch": 3.310027167201778, "grad_norm": 0.4841591054021867, "learning_rate": 4.69286164736758e-06, "loss": 0.4478, "step": 8463 }, { "epoch": 3.3104223265003703, "grad_norm": 0.48269716253485223, "learning_rate": 4.692786404918374e-06, "loss": 0.437, "step": 8464 }, { "epoch": 3.310817485798963, "grad_norm": 0.47804342056190563, "learning_rate": 4.692711153857233e-06, "loss": 0.4483, "step": 8465 }, { "epoch": 3.311212645097555, "grad_norm": 0.52764555981506, "learning_rate": 4.6926358941844556e-06, "loss": 0.4563, "step": 8466 }, { "epoch": 3.3116078043961474, "grad_norm": 0.5246940150795155, "learning_rate": 4.692560625900335e-06, "loss": 0.4497, "step": 8467 }, { "epoch": 3.3120029636947397, "grad_norm": 0.489990665703525, "learning_rate": 4.6924853490051666e-06, "loss": 0.4486, "step": 8468 }, { "epoch": 3.312398122993332, "grad_norm": 0.4932042814676327, "learning_rate": 4.692410063499249e-06, "loss": 0.4372, "step": 8469 }, { "epoch": 3.312793282291924, "grad_norm": 0.5058430347314362, "learning_rate": 4.692334769382874e-06, "loss": 0.4471, "step": 8470 }, { "epoch": 3.3131884415905164, "grad_norm": 0.4913325565934741, "learning_rate": 4.6922594666563405e-06, "loss": 0.4635, "step": 8471 }, { "epoch": 3.3135836008891086, "grad_norm": 0.4926565499584854, "learning_rate": 4.692184155319943e-06, "loss": 0.4682, "step": 8472 }, { "epoch": 3.313978760187701, "grad_norm": 0.5075289488806716, "learning_rate": 4.692108835373977e-06, "loss": 0.4619, "step": 8473 }, { "epoch": 3.314373919486293, "grad_norm": 0.4971824312123781, "learning_rate": 4.692033506818739e-06, "loss": 0.4394, "step": 8474 }, { "epoch": 3.3147690787848854, "grad_norm": 0.4789459129843913, "learning_rate": 4.691958169654524e-06, "loss": 0.4312, "step": 8475 }, { "epoch": 3.3151642380834776, "grad_norm": 0.4948838888029013, "learning_rate": 4.691882823881629e-06, "loss": 0.439, "step": 8476 }, { "epoch": 3.31555939738207, "grad_norm": 0.4995649269101974, "learning_rate": 4.691807469500349e-06, "loss": 0.459, "step": 8477 }, { "epoch": 3.315954556680662, "grad_norm": 0.48840216224269173, "learning_rate": 4.69173210651098e-06, "loss": 0.4614, "step": 8478 }, { "epoch": 3.3163497159792543, "grad_norm": 0.49025639250296205, "learning_rate": 4.691656734913818e-06, "loss": 0.4378, "step": 8479 }, { "epoch": 3.3167448752778466, "grad_norm": 0.4776483977009846, "learning_rate": 4.69158135470916e-06, "loss": 0.429, "step": 8480 }, { "epoch": 3.317140034576439, "grad_norm": 0.4836933504478722, "learning_rate": 4.6915059658973e-06, "loss": 0.4487, "step": 8481 }, { "epoch": 3.317535193875031, "grad_norm": 0.47926982677697055, "learning_rate": 4.691430568478536e-06, "loss": 0.4369, "step": 8482 }, { "epoch": 3.3179303531736233, "grad_norm": 0.4824835398113746, "learning_rate": 4.691355162453164e-06, "loss": 0.4546, "step": 8483 }, { "epoch": 3.3183255124722155, "grad_norm": 0.4920472483420201, "learning_rate": 4.691279747821479e-06, "loss": 0.443, "step": 8484 }, { "epoch": 3.3187206717708078, "grad_norm": 0.5110190442880285, "learning_rate": 4.691204324583777e-06, "loss": 0.4742, "step": 8485 }, { "epoch": 3.3191158310694, "grad_norm": 0.4917474251438137, "learning_rate": 4.691128892740356e-06, "loss": 0.4451, "step": 8486 }, { "epoch": 3.3195109903679922, "grad_norm": 0.5022323736398868, "learning_rate": 4.691053452291511e-06, "loss": 0.4384, "step": 8487 }, { "epoch": 3.3199061496665845, "grad_norm": 0.5093861229795713, "learning_rate": 4.690978003237538e-06, "loss": 0.4592, "step": 8488 }, { "epoch": 3.3203013089651767, "grad_norm": 0.49345040595298023, "learning_rate": 4.690902545578735e-06, "loss": 0.4566, "step": 8489 }, { "epoch": 3.320696468263769, "grad_norm": 0.4910573607099517, "learning_rate": 4.690827079315397e-06, "loss": 0.4521, "step": 8490 }, { "epoch": 3.321091627562361, "grad_norm": 0.4847495876246576, "learning_rate": 4.690751604447819e-06, "loss": 0.4428, "step": 8491 }, { "epoch": 3.3214867868609534, "grad_norm": 0.48758326536965474, "learning_rate": 4.6906761209763e-06, "loss": 0.4521, "step": 8492 }, { "epoch": 3.3218819461595457, "grad_norm": 0.4916431762270958, "learning_rate": 4.690600628901136e-06, "loss": 0.4488, "step": 8493 }, { "epoch": 3.322277105458138, "grad_norm": 0.4858608753350482, "learning_rate": 4.690525128222622e-06, "loss": 0.4405, "step": 8494 }, { "epoch": 3.32267226475673, "grad_norm": 0.4863633181170981, "learning_rate": 4.690449618941056e-06, "loss": 0.4491, "step": 8495 }, { "epoch": 3.3230674240553224, "grad_norm": 0.49316937629362895, "learning_rate": 4.690374101056734e-06, "loss": 0.4523, "step": 8496 }, { "epoch": 3.3234625833539146, "grad_norm": 0.5007036354318656, "learning_rate": 4.690298574569952e-06, "loss": 0.4493, "step": 8497 }, { "epoch": 3.323857742652507, "grad_norm": 0.4945456330336425, "learning_rate": 4.690223039481008e-06, "loss": 0.4582, "step": 8498 }, { "epoch": 3.324252901951099, "grad_norm": 0.48559472918192703, "learning_rate": 4.690147495790197e-06, "loss": 0.454, "step": 8499 }, { "epoch": 3.3246480612496914, "grad_norm": 0.5121803446284094, "learning_rate": 4.690071943497818e-06, "loss": 0.4726, "step": 8500 }, { "epoch": 3.3250432205482836, "grad_norm": 0.4910672144158454, "learning_rate": 4.689996382604165e-06, "loss": 0.4425, "step": 8501 }, { "epoch": 3.325438379846876, "grad_norm": 0.5094226150602034, "learning_rate": 4.689920813109537e-06, "loss": 0.4625, "step": 8502 }, { "epoch": 3.325833539145468, "grad_norm": 0.48636282211631227, "learning_rate": 4.6898452350142296e-06, "loss": 0.4548, "step": 8503 }, { "epoch": 3.3262286984440603, "grad_norm": 0.4995052919549064, "learning_rate": 4.68976964831854e-06, "loss": 0.4523, "step": 8504 }, { "epoch": 3.3266238577426526, "grad_norm": 0.501508067836005, "learning_rate": 4.6896940530227645e-06, "loss": 0.4596, "step": 8505 }, { "epoch": 3.327019017041245, "grad_norm": 0.4929069745339944, "learning_rate": 4.6896184491272e-06, "loss": 0.4766, "step": 8506 }, { "epoch": 3.327414176339837, "grad_norm": 0.48439758395238547, "learning_rate": 4.689542836632144e-06, "loss": 0.4558, "step": 8507 }, { "epoch": 3.3278093356384293, "grad_norm": 0.5043297803420848, "learning_rate": 4.689467215537894e-06, "loss": 0.4545, "step": 8508 }, { "epoch": 3.3282044949370215, "grad_norm": 0.4918720068999591, "learning_rate": 4.689391585844745e-06, "loss": 0.4474, "step": 8509 }, { "epoch": 3.3285996542356138, "grad_norm": 0.49767114366249415, "learning_rate": 4.689315947552997e-06, "loss": 0.44, "step": 8510 }, { "epoch": 3.328994813534206, "grad_norm": 0.47267654346989246, "learning_rate": 4.689240300662944e-06, "loss": 0.444, "step": 8511 }, { "epoch": 3.3293899728327983, "grad_norm": 0.502912384208219, "learning_rate": 4.6891646451748855e-06, "loss": 0.4432, "step": 8512 }, { "epoch": 3.3297851321313905, "grad_norm": 0.49847552956035984, "learning_rate": 4.6890889810891175e-06, "loss": 0.4619, "step": 8513 }, { "epoch": 3.3301802914299827, "grad_norm": 0.4986927619398265, "learning_rate": 4.689013308405936e-06, "loss": 0.4815, "step": 8514 }, { "epoch": 3.330575450728575, "grad_norm": 0.481380549992846, "learning_rate": 4.688937627125641e-06, "loss": 0.4617, "step": 8515 }, { "epoch": 3.330970610027167, "grad_norm": 0.4999972238019487, "learning_rate": 4.6888619372485275e-06, "loss": 0.4521, "step": 8516 }, { "epoch": 3.3313657693257595, "grad_norm": 0.49828427414950494, "learning_rate": 4.688786238774893e-06, "loss": 0.4558, "step": 8517 }, { "epoch": 3.3317609286243517, "grad_norm": 0.49747513646012703, "learning_rate": 4.688710531705036e-06, "loss": 0.4397, "step": 8518 }, { "epoch": 3.332156087922944, "grad_norm": 0.4845769352628467, "learning_rate": 4.688634816039253e-06, "loss": 0.4477, "step": 8519 }, { "epoch": 3.332551247221536, "grad_norm": 0.4955335961994695, "learning_rate": 4.688559091777841e-06, "loss": 0.4429, "step": 8520 }, { "epoch": 3.3329464065201284, "grad_norm": 0.5002101662039317, "learning_rate": 4.6884833589210984e-06, "loss": 0.4557, "step": 8521 }, { "epoch": 3.3333415658187207, "grad_norm": 0.5012403566636711, "learning_rate": 4.688407617469321e-06, "loss": 0.4539, "step": 8522 }, { "epoch": 3.333736725117313, "grad_norm": 0.4846544545345259, "learning_rate": 4.688331867422809e-06, "loss": 0.4452, "step": 8523 }, { "epoch": 3.334131884415905, "grad_norm": 0.4966381233591069, "learning_rate": 4.688256108781858e-06, "loss": 0.4487, "step": 8524 }, { "epoch": 3.3345270437144974, "grad_norm": 0.5829217411242708, "learning_rate": 4.688180341546765e-06, "loss": 0.4482, "step": 8525 }, { "epoch": 3.3349222030130896, "grad_norm": 0.511353842249708, "learning_rate": 4.688104565717828e-06, "loss": 0.4382, "step": 8526 }, { "epoch": 3.335317362311682, "grad_norm": 0.5083341149647729, "learning_rate": 4.688028781295346e-06, "loss": 0.4576, "step": 8527 }, { "epoch": 3.335712521610274, "grad_norm": 0.537344546705706, "learning_rate": 4.687952988279615e-06, "loss": 0.4469, "step": 8528 }, { "epoch": 3.3361076809088663, "grad_norm": 0.5068930887011414, "learning_rate": 4.687877186670934e-06, "loss": 0.458, "step": 8529 }, { "epoch": 3.3365028402074586, "grad_norm": 0.5136759214514139, "learning_rate": 4.687801376469599e-06, "loss": 0.4687, "step": 8530 }, { "epoch": 3.336897999506051, "grad_norm": 0.48837862440654095, "learning_rate": 4.687725557675909e-06, "loss": 0.4405, "step": 8531 }, { "epoch": 3.337293158804643, "grad_norm": 0.48565670852398274, "learning_rate": 4.687649730290162e-06, "loss": 0.4434, "step": 8532 }, { "epoch": 3.3376883181032353, "grad_norm": 0.5031092299127129, "learning_rate": 4.6875738943126544e-06, "loss": 0.4517, "step": 8533 }, { "epoch": 3.3380834774018275, "grad_norm": 0.5215776103880958, "learning_rate": 4.687498049743685e-06, "loss": 0.4554, "step": 8534 }, { "epoch": 3.3384786367004198, "grad_norm": 0.5102826341112808, "learning_rate": 4.687422196583552e-06, "loss": 0.4708, "step": 8535 }, { "epoch": 3.338873795999012, "grad_norm": 0.4978853219738474, "learning_rate": 4.6873463348325535e-06, "loss": 0.4542, "step": 8536 }, { "epoch": 3.3392689552976043, "grad_norm": 0.5025990366613098, "learning_rate": 4.687270464490986e-06, "loss": 0.4417, "step": 8537 }, { "epoch": 3.3396641145961965, "grad_norm": 0.49855784405646736, "learning_rate": 4.687194585559148e-06, "loss": 0.4601, "step": 8538 }, { "epoch": 3.3400592738947887, "grad_norm": 0.4932797881481111, "learning_rate": 4.687118698037338e-06, "loss": 0.4471, "step": 8539 }, { "epoch": 3.340454433193381, "grad_norm": 0.4854332267118237, "learning_rate": 4.687042801925854e-06, "loss": 0.4452, "step": 8540 }, { "epoch": 3.340849592491973, "grad_norm": 0.5037097432876378, "learning_rate": 4.686966897224995e-06, "loss": 0.4677, "step": 8541 }, { "epoch": 3.3412447517905655, "grad_norm": 0.4856578335596684, "learning_rate": 4.686890983935057e-06, "loss": 0.4449, "step": 8542 }, { "epoch": 3.3416399110891577, "grad_norm": 0.507337201279644, "learning_rate": 4.686815062056338e-06, "loss": 0.4636, "step": 8543 }, { "epoch": 3.34203507038775, "grad_norm": 0.5033310923165123, "learning_rate": 4.686739131589139e-06, "loss": 0.4504, "step": 8544 }, { "epoch": 3.342430229686342, "grad_norm": 0.5337152714631989, "learning_rate": 4.686663192533756e-06, "loss": 0.4927, "step": 8545 }, { "epoch": 3.3428253889849344, "grad_norm": 0.47904040874221143, "learning_rate": 4.686587244890488e-06, "loss": 0.4503, "step": 8546 }, { "epoch": 3.3432205482835267, "grad_norm": 0.49230347712013833, "learning_rate": 4.686511288659633e-06, "loss": 0.4568, "step": 8547 }, { "epoch": 3.343615707582119, "grad_norm": 0.5166799488026463, "learning_rate": 4.686435323841489e-06, "loss": 0.4562, "step": 8548 }, { "epoch": 3.344010866880711, "grad_norm": 0.4809789988181581, "learning_rate": 4.686359350436355e-06, "loss": 0.4509, "step": 8549 }, { "epoch": 3.3444060261793034, "grad_norm": 0.4984285848482146, "learning_rate": 4.686283368444529e-06, "loss": 0.4519, "step": 8550 }, { "epoch": 3.3448011854778956, "grad_norm": 0.4914707828734001, "learning_rate": 4.68620737786631e-06, "loss": 0.4478, "step": 8551 }, { "epoch": 3.345196344776488, "grad_norm": 0.5038394475829153, "learning_rate": 4.6861313787019955e-06, "loss": 0.4666, "step": 8552 }, { "epoch": 3.34559150407508, "grad_norm": 0.48139717719915776, "learning_rate": 4.686055370951884e-06, "loss": 0.4516, "step": 8553 }, { "epoch": 3.3459866633736723, "grad_norm": 0.48724474236998466, "learning_rate": 4.685979354616275e-06, "loss": 0.4405, "step": 8554 }, { "epoch": 3.3463818226722646, "grad_norm": 0.4840451154072024, "learning_rate": 4.685903329695467e-06, "loss": 0.4385, "step": 8555 }, { "epoch": 3.346776981970857, "grad_norm": 0.4881270901311099, "learning_rate": 4.685827296189757e-06, "loss": 0.4506, "step": 8556 }, { "epoch": 3.347172141269449, "grad_norm": 0.4989552343260758, "learning_rate": 4.6857512540994456e-06, "loss": 0.4698, "step": 8557 }, { "epoch": 3.3475673005680413, "grad_norm": 0.4808629699114711, "learning_rate": 4.68567520342483e-06, "loss": 0.4413, "step": 8558 }, { "epoch": 3.3479624598666335, "grad_norm": 0.5190496818983991, "learning_rate": 4.68559914416621e-06, "loss": 0.4611, "step": 8559 }, { "epoch": 3.348357619165226, "grad_norm": 0.5037425270647897, "learning_rate": 4.685523076323882e-06, "loss": 0.4509, "step": 8560 }, { "epoch": 3.348752778463818, "grad_norm": 0.510585938637351, "learning_rate": 4.685446999898148e-06, "loss": 0.4438, "step": 8561 }, { "epoch": 3.3491479377624103, "grad_norm": 0.4974626944458801, "learning_rate": 4.685370914889305e-06, "loss": 0.4606, "step": 8562 }, { "epoch": 3.3495430970610025, "grad_norm": 0.7314950180759313, "learning_rate": 4.685294821297652e-06, "loss": 0.4673, "step": 8563 }, { "epoch": 3.3499382563595947, "grad_norm": 0.49161103652361915, "learning_rate": 4.685218719123488e-06, "loss": 0.4424, "step": 8564 }, { "epoch": 3.350333415658187, "grad_norm": 0.5024992958323747, "learning_rate": 4.685142608367112e-06, "loss": 0.4678, "step": 8565 }, { "epoch": 3.3507285749567792, "grad_norm": 0.4938192969589111, "learning_rate": 4.685066489028823e-06, "loss": 0.4492, "step": 8566 }, { "epoch": 3.3511237342553715, "grad_norm": 0.4803879916882299, "learning_rate": 4.684990361108919e-06, "loss": 0.4561, "step": 8567 }, { "epoch": 3.3515188935539637, "grad_norm": 0.4949401035088218, "learning_rate": 4.6849142246077e-06, "loss": 0.4442, "step": 8568 }, { "epoch": 3.351914052852556, "grad_norm": 0.5976486404806604, "learning_rate": 4.684838079525465e-06, "loss": 0.4277, "step": 8569 }, { "epoch": 3.352309212151148, "grad_norm": 0.49601388755837394, "learning_rate": 4.684761925862512e-06, "loss": 0.4531, "step": 8570 }, { "epoch": 3.3527043714497404, "grad_norm": 0.5152413906273675, "learning_rate": 4.684685763619141e-06, "loss": 0.4492, "step": 8571 }, { "epoch": 3.353099530748333, "grad_norm": 0.49341639261673015, "learning_rate": 4.684609592795651e-06, "loss": 0.4489, "step": 8572 }, { "epoch": 3.3534946900469254, "grad_norm": 0.48592662223802807, "learning_rate": 4.684533413392341e-06, "loss": 0.455, "step": 8573 }, { "epoch": 3.3538898493455176, "grad_norm": 0.49332844351682664, "learning_rate": 4.684457225409511e-06, "loss": 0.4657, "step": 8574 }, { "epoch": 3.35428500864411, "grad_norm": 0.5037336410016675, "learning_rate": 4.684381028847459e-06, "loss": 0.4659, "step": 8575 }, { "epoch": 3.354680167942702, "grad_norm": 0.506139866036012, "learning_rate": 4.684304823706484e-06, "loss": 0.4483, "step": 8576 }, { "epoch": 3.3550753272412943, "grad_norm": 0.4942492969355824, "learning_rate": 4.6842286099868864e-06, "loss": 0.4454, "step": 8577 }, { "epoch": 3.3554704865398866, "grad_norm": 0.49335672939762426, "learning_rate": 4.684152387688966e-06, "loss": 0.4433, "step": 8578 }, { "epoch": 3.355865645838479, "grad_norm": 0.4969284399321435, "learning_rate": 4.6840761568130204e-06, "loss": 0.4584, "step": 8579 }, { "epoch": 3.356260805137071, "grad_norm": 0.4873906665965171, "learning_rate": 4.68399991735935e-06, "loss": 0.4597, "step": 8580 }, { "epoch": 3.3566559644356633, "grad_norm": 0.4886120652074471, "learning_rate": 4.6839236693282544e-06, "loss": 0.4442, "step": 8581 }, { "epoch": 3.3570511237342555, "grad_norm": 0.4899352867723243, "learning_rate": 4.683847412720033e-06, "loss": 0.4521, "step": 8582 }, { "epoch": 3.3574462830328478, "grad_norm": 0.4933462350667174, "learning_rate": 4.683771147534985e-06, "loss": 0.4438, "step": 8583 }, { "epoch": 3.35784144233144, "grad_norm": 0.4898866455801216, "learning_rate": 4.68369487377341e-06, "loss": 0.4564, "step": 8584 }, { "epoch": 3.3582366016300322, "grad_norm": 0.5332125037232016, "learning_rate": 4.683618591435607e-06, "loss": 0.4515, "step": 8585 }, { "epoch": 3.3586317609286245, "grad_norm": 0.5359679858136057, "learning_rate": 4.683542300521876e-06, "loss": 0.4486, "step": 8586 }, { "epoch": 3.3590269202272167, "grad_norm": 0.4767570634929505, "learning_rate": 4.6834660010325175e-06, "loss": 0.4387, "step": 8587 }, { "epoch": 3.359422079525809, "grad_norm": 0.5056533475750209, "learning_rate": 4.68338969296783e-06, "loss": 0.4585, "step": 8588 }, { "epoch": 3.359817238824401, "grad_norm": 0.48897355907247364, "learning_rate": 4.683313376328113e-06, "loss": 0.4395, "step": 8589 }, { "epoch": 3.3602123981229934, "grad_norm": 0.5203028354845993, "learning_rate": 4.683237051113669e-06, "loss": 0.4614, "step": 8590 }, { "epoch": 3.3606075574215857, "grad_norm": 0.4925183013708426, "learning_rate": 4.683160717324794e-06, "loss": 0.4436, "step": 8591 }, { "epoch": 3.361002716720178, "grad_norm": 0.4909350641925199, "learning_rate": 4.683084374961789e-06, "loss": 0.4459, "step": 8592 }, { "epoch": 3.36139787601877, "grad_norm": 0.4990261941077317, "learning_rate": 4.683008024024955e-06, "loss": 0.4662, "step": 8593 }, { "epoch": 3.3617930353173624, "grad_norm": 0.5042484386336988, "learning_rate": 4.6829316645145905e-06, "loss": 0.4454, "step": 8594 }, { "epoch": 3.3621881946159546, "grad_norm": 0.5110820201232344, "learning_rate": 4.682855296430997e-06, "loss": 0.461, "step": 8595 }, { "epoch": 3.362583353914547, "grad_norm": 0.5119373520344455, "learning_rate": 4.6827789197744725e-06, "loss": 0.4351, "step": 8596 }, { "epoch": 3.362978513213139, "grad_norm": 0.4864074295817185, "learning_rate": 4.682702534545318e-06, "loss": 0.4648, "step": 8597 }, { "epoch": 3.3633736725117314, "grad_norm": 0.5170136763217118, "learning_rate": 4.682626140743833e-06, "loss": 0.4485, "step": 8598 }, { "epoch": 3.3637688318103236, "grad_norm": 0.5124926082111486, "learning_rate": 4.682549738370319e-06, "loss": 0.4701, "step": 8599 }, { "epoch": 3.364163991108916, "grad_norm": 0.4920001975170289, "learning_rate": 4.682473327425074e-06, "loss": 0.4648, "step": 8600 }, { "epoch": 3.364559150407508, "grad_norm": 0.4937153316476773, "learning_rate": 4.682396907908399e-06, "loss": 0.442, "step": 8601 }, { "epoch": 3.3649543097061003, "grad_norm": 0.48028050782341225, "learning_rate": 4.682320479820595e-06, "loss": 0.4459, "step": 8602 }, { "epoch": 3.3653494690046926, "grad_norm": 0.5216497378987135, "learning_rate": 4.68224404316196e-06, "loss": 0.4581, "step": 8603 }, { "epoch": 3.365744628303285, "grad_norm": 0.5014704905655514, "learning_rate": 4.682167597932797e-06, "loss": 0.4563, "step": 8604 }, { "epoch": 3.366139787601877, "grad_norm": 0.478995486285583, "learning_rate": 4.682091144133404e-06, "loss": 0.4382, "step": 8605 }, { "epoch": 3.3665349469004693, "grad_norm": 0.5182832142253619, "learning_rate": 4.682014681764082e-06, "loss": 0.459, "step": 8606 }, { "epoch": 3.3669301061990615, "grad_norm": 0.4961453547341973, "learning_rate": 4.6819382108251316e-06, "loss": 0.4501, "step": 8607 }, { "epoch": 3.3673252654976538, "grad_norm": 0.5111669516032571, "learning_rate": 4.681861731316852e-06, "loss": 0.4616, "step": 8608 }, { "epoch": 3.367720424796246, "grad_norm": 0.49667736409059116, "learning_rate": 4.681785243239546e-06, "loss": 0.4476, "step": 8609 }, { "epoch": 3.3681155840948382, "grad_norm": 0.509991731084816, "learning_rate": 4.681708746593511e-06, "loss": 0.4551, "step": 8610 }, { "epoch": 3.3685107433934305, "grad_norm": 0.4861532008939771, "learning_rate": 4.6816322413790495e-06, "loss": 0.4551, "step": 8611 }, { "epoch": 3.3689059026920227, "grad_norm": 0.47927743937701206, "learning_rate": 4.6815557275964605e-06, "loss": 0.4407, "step": 8612 }, { "epoch": 3.369301061990615, "grad_norm": 0.48329305038335735, "learning_rate": 4.681479205246047e-06, "loss": 0.4317, "step": 8613 }, { "epoch": 3.369696221289207, "grad_norm": 0.5557400243821855, "learning_rate": 4.681402674328106e-06, "loss": 0.4298, "step": 8614 }, { "epoch": 3.3700913805877994, "grad_norm": 0.516015662888361, "learning_rate": 4.681326134842941e-06, "loss": 0.4399, "step": 8615 }, { "epoch": 3.3704865398863917, "grad_norm": 0.51146416218406, "learning_rate": 4.681249586790851e-06, "loss": 0.4549, "step": 8616 }, { "epoch": 3.370881699184984, "grad_norm": 0.5148904737293851, "learning_rate": 4.681173030172138e-06, "loss": 0.4503, "step": 8617 }, { "epoch": 3.371276858483576, "grad_norm": 0.47920716436293814, "learning_rate": 4.681096464987101e-06, "loss": 0.4567, "step": 8618 }, { "epoch": 3.3716720177821684, "grad_norm": 0.48667328112391944, "learning_rate": 4.681019891236042e-06, "loss": 0.4422, "step": 8619 }, { "epoch": 3.3720671770807606, "grad_norm": 0.48135580887321766, "learning_rate": 4.680943308919261e-06, "loss": 0.4675, "step": 8620 }, { "epoch": 3.372462336379353, "grad_norm": 0.5032541309680445, "learning_rate": 4.680866718037058e-06, "loss": 0.4648, "step": 8621 }, { "epoch": 3.372857495677945, "grad_norm": 0.4811650608559039, "learning_rate": 4.680790118589737e-06, "loss": 0.4447, "step": 8622 }, { "epoch": 3.3732526549765374, "grad_norm": 0.5466985608523096, "learning_rate": 4.680713510577594e-06, "loss": 0.457, "step": 8623 }, { "epoch": 3.3736478142751296, "grad_norm": 0.5592565607971153, "learning_rate": 4.680636894000935e-06, "loss": 0.4449, "step": 8624 }, { "epoch": 3.374042973573722, "grad_norm": 0.504991945313021, "learning_rate": 4.680560268860057e-06, "loss": 0.4542, "step": 8625 }, { "epoch": 3.374438132872314, "grad_norm": 0.48890150098264806, "learning_rate": 4.680483635155263e-06, "loss": 0.4502, "step": 8626 }, { "epoch": 3.3748332921709063, "grad_norm": 0.4943593439450679, "learning_rate": 4.680406992886854e-06, "loss": 0.4404, "step": 8627 }, { "epoch": 3.3752284514694986, "grad_norm": 0.5332119219072962, "learning_rate": 4.680330342055129e-06, "loss": 0.4288, "step": 8628 }, { "epoch": 3.375623610768091, "grad_norm": 0.4841185179406501, "learning_rate": 4.68025368266039e-06, "loss": 0.4506, "step": 8629 }, { "epoch": 3.376018770066683, "grad_norm": 0.523621408569861, "learning_rate": 4.68017701470294e-06, "loss": 0.4586, "step": 8630 }, { "epoch": 3.3764139293652753, "grad_norm": 0.46451406844891363, "learning_rate": 4.680100338183078e-06, "loss": 0.4332, "step": 8631 }, { "epoch": 3.3768090886638675, "grad_norm": 0.4905181028363597, "learning_rate": 4.6800236531011055e-06, "loss": 0.4411, "step": 8632 }, { "epoch": 3.3772042479624598, "grad_norm": 0.502652462267723, "learning_rate": 4.679946959457325e-06, "loss": 0.4693, "step": 8633 }, { "epoch": 3.377599407261052, "grad_norm": 0.4911574856877145, "learning_rate": 4.6798702572520345e-06, "loss": 0.44, "step": 8634 }, { "epoch": 3.3779945665596443, "grad_norm": 0.5052945482135779, "learning_rate": 4.6797935464855385e-06, "loss": 0.4649, "step": 8635 }, { "epoch": 3.3783897258582365, "grad_norm": 0.48569037287035166, "learning_rate": 4.679716827158137e-06, "loss": 0.4587, "step": 8636 }, { "epoch": 3.3787848851568287, "grad_norm": 0.5041362195084776, "learning_rate": 4.6796400992701314e-06, "loss": 0.4767, "step": 8637 }, { "epoch": 3.379180044455421, "grad_norm": 0.4813960654671574, "learning_rate": 4.679563362821823e-06, "loss": 0.4568, "step": 8638 }, { "epoch": 3.379575203754013, "grad_norm": 0.5088131032990694, "learning_rate": 4.679486617813513e-06, "loss": 0.4435, "step": 8639 }, { "epoch": 3.3799703630526055, "grad_norm": 0.4861775857012652, "learning_rate": 4.679409864245503e-06, "loss": 0.4632, "step": 8640 }, { "epoch": 3.3803655223511977, "grad_norm": 0.490586606299979, "learning_rate": 4.679333102118095e-06, "loss": 0.4594, "step": 8641 }, { "epoch": 3.38076068164979, "grad_norm": 0.4932899496063389, "learning_rate": 4.67925633143159e-06, "loss": 0.4725, "step": 8642 }, { "epoch": 3.381155840948382, "grad_norm": 0.5069803709886499, "learning_rate": 4.679179552186289e-06, "loss": 0.4608, "step": 8643 }, { "epoch": 3.3815510002469744, "grad_norm": 0.4788990174095867, "learning_rate": 4.679102764382494e-06, "loss": 0.4577, "step": 8644 }, { "epoch": 3.3819461595455667, "grad_norm": 0.49268285139092627, "learning_rate": 4.6790259680205064e-06, "loss": 0.463, "step": 8645 }, { "epoch": 3.382341318844159, "grad_norm": 0.48644575261707157, "learning_rate": 4.678949163100629e-06, "loss": 0.4608, "step": 8646 }, { "epoch": 3.382736478142751, "grad_norm": 0.4884652879626933, "learning_rate": 4.678872349623161e-06, "loss": 0.4498, "step": 8647 }, { "epoch": 3.3831316374413434, "grad_norm": 0.4981392757751588, "learning_rate": 4.678795527588407e-06, "loss": 0.4554, "step": 8648 }, { "epoch": 3.3835267967399356, "grad_norm": 0.4825160629921625, "learning_rate": 4.678718696996666e-06, "loss": 0.447, "step": 8649 }, { "epoch": 3.383921956038528, "grad_norm": 0.496410984109344, "learning_rate": 4.678641857848241e-06, "loss": 0.4894, "step": 8650 }, { "epoch": 3.38431711533712, "grad_norm": 0.48757622681326085, "learning_rate": 4.6785650101434335e-06, "loss": 0.4523, "step": 8651 }, { "epoch": 3.3847122746357123, "grad_norm": 0.49958339915552713, "learning_rate": 4.678488153882546e-06, "loss": 0.4837, "step": 8652 }, { "epoch": 3.3851074339343046, "grad_norm": 0.48280083745483665, "learning_rate": 4.67841128906588e-06, "loss": 0.4733, "step": 8653 }, { "epoch": 3.385502593232897, "grad_norm": 0.4827487271721312, "learning_rate": 4.6783344156937375e-06, "loss": 0.4551, "step": 8654 }, { "epoch": 3.3858977525314895, "grad_norm": 0.5135331354845594, "learning_rate": 4.67825753376642e-06, "loss": 0.4505, "step": 8655 }, { "epoch": 3.3862929118300817, "grad_norm": 0.4950284195537903, "learning_rate": 4.678180643284229e-06, "loss": 0.4456, "step": 8656 }, { "epoch": 3.386688071128674, "grad_norm": 0.49134952539015186, "learning_rate": 4.678103744247468e-06, "loss": 0.4536, "step": 8657 }, { "epoch": 3.3870832304272662, "grad_norm": 0.5177519290341787, "learning_rate": 4.678026836656437e-06, "loss": 0.442, "step": 8658 }, { "epoch": 3.3874783897258585, "grad_norm": 0.5228271177583046, "learning_rate": 4.6779499205114406e-06, "loss": 0.4477, "step": 8659 }, { "epoch": 3.3878735490244507, "grad_norm": 0.48410785050252503, "learning_rate": 4.677872995812778e-06, "loss": 0.4512, "step": 8660 }, { "epoch": 3.388268708323043, "grad_norm": 0.4710781351956366, "learning_rate": 4.677796062560753e-06, "loss": 0.4506, "step": 8661 }, { "epoch": 3.388663867621635, "grad_norm": 0.48111306966615963, "learning_rate": 4.677719120755669e-06, "loss": 0.4598, "step": 8662 }, { "epoch": 3.3890590269202274, "grad_norm": 0.4720947283760529, "learning_rate": 4.6776421703978245e-06, "loss": 0.4468, "step": 8663 }, { "epoch": 3.3894541862188197, "grad_norm": 0.49393506201536114, "learning_rate": 4.677565211487526e-06, "loss": 0.4379, "step": 8664 }, { "epoch": 3.389849345517412, "grad_norm": 0.48345686255173453, "learning_rate": 4.677488244025072e-06, "loss": 0.458, "step": 8665 }, { "epoch": 3.390244504816004, "grad_norm": 0.490727505465697, "learning_rate": 4.677411268010768e-06, "loss": 0.4772, "step": 8666 }, { "epoch": 3.3906396641145964, "grad_norm": 0.5034772743200914, "learning_rate": 4.677334283444913e-06, "loss": 0.4652, "step": 8667 }, { "epoch": 3.3910348234131886, "grad_norm": 0.49435834675571205, "learning_rate": 4.677257290327812e-06, "loss": 0.4475, "step": 8668 }, { "epoch": 3.391429982711781, "grad_norm": 0.5301178874307636, "learning_rate": 4.677180288659766e-06, "loss": 0.4683, "step": 8669 }, { "epoch": 3.391825142010373, "grad_norm": 0.5010243323234773, "learning_rate": 4.677103278441079e-06, "loss": 0.4642, "step": 8670 }, { "epoch": 3.3922203013089653, "grad_norm": 0.4885105136781966, "learning_rate": 4.67702625967205e-06, "loss": 0.4411, "step": 8671 }, { "epoch": 3.3926154606075576, "grad_norm": 0.5011611192314379, "learning_rate": 4.6769492323529865e-06, "loss": 0.4517, "step": 8672 }, { "epoch": 3.39301061990615, "grad_norm": 0.4939700998530884, "learning_rate": 4.6768721964841865e-06, "loss": 0.4498, "step": 8673 }, { "epoch": 3.393405779204742, "grad_norm": 0.4954624935938974, "learning_rate": 4.676795152065955e-06, "loss": 0.4407, "step": 8674 }, { "epoch": 3.3938009385033343, "grad_norm": 0.5119224790406571, "learning_rate": 4.676718099098594e-06, "loss": 0.4485, "step": 8675 }, { "epoch": 3.3941960978019265, "grad_norm": 0.49705423434443835, "learning_rate": 4.676641037582407e-06, "loss": 0.4382, "step": 8676 }, { "epoch": 3.394591257100519, "grad_norm": 0.4752730371851217, "learning_rate": 4.676563967517694e-06, "loss": 0.4412, "step": 8677 }, { "epoch": 3.394986416399111, "grad_norm": 0.49012430064946205, "learning_rate": 4.67648688890476e-06, "loss": 0.4519, "step": 8678 }, { "epoch": 3.3953815756977033, "grad_norm": 0.5050443098707427, "learning_rate": 4.676409801743908e-06, "loss": 0.4619, "step": 8679 }, { "epoch": 3.3957767349962955, "grad_norm": 0.5049807653407213, "learning_rate": 4.676332706035439e-06, "loss": 0.4584, "step": 8680 }, { "epoch": 3.3961718942948877, "grad_norm": 0.4838150340724149, "learning_rate": 4.676255601779656e-06, "loss": 0.4453, "step": 8681 }, { "epoch": 3.39656705359348, "grad_norm": 0.4860542133541303, "learning_rate": 4.676178488976864e-06, "loss": 0.4621, "step": 8682 }, { "epoch": 3.3969622128920722, "grad_norm": 0.4982361920417968, "learning_rate": 4.676101367627364e-06, "loss": 0.4705, "step": 8683 }, { "epoch": 3.3973573721906645, "grad_norm": 0.4923037972317949, "learning_rate": 4.676024237731459e-06, "loss": 0.4545, "step": 8684 }, { "epoch": 3.3977525314892567, "grad_norm": 0.5050487317688797, "learning_rate": 4.675947099289452e-06, "loss": 0.4528, "step": 8685 }, { "epoch": 3.398147690787849, "grad_norm": 0.506471973724114, "learning_rate": 4.675869952301646e-06, "loss": 0.4753, "step": 8686 }, { "epoch": 3.398542850086441, "grad_norm": 0.4709433859416677, "learning_rate": 4.675792796768344e-06, "loss": 0.445, "step": 8687 }, { "epoch": 3.3989380093850334, "grad_norm": 0.49103621457028523, "learning_rate": 4.67571563268985e-06, "loss": 0.4651, "step": 8688 }, { "epoch": 3.3993331686836257, "grad_norm": 0.490208526893036, "learning_rate": 4.675638460066465e-06, "loss": 0.4492, "step": 8689 }, { "epoch": 3.399728327982218, "grad_norm": 0.48711510354854665, "learning_rate": 4.675561278898494e-06, "loss": 0.4417, "step": 8690 }, { "epoch": 3.40012348728081, "grad_norm": 0.48830265701534586, "learning_rate": 4.675484089186239e-06, "loss": 0.449, "step": 8691 }, { "epoch": 3.4005186465794024, "grad_norm": 0.4779197088460178, "learning_rate": 4.6754068909300044e-06, "loss": 0.4609, "step": 8692 }, { "epoch": 3.4009138058779946, "grad_norm": 0.5000217624862712, "learning_rate": 4.675329684130091e-06, "loss": 0.4776, "step": 8693 }, { "epoch": 3.401308965176587, "grad_norm": 0.515046165717711, "learning_rate": 4.675252468786805e-06, "loss": 0.4721, "step": 8694 }, { "epoch": 3.401704124475179, "grad_norm": 0.4887957168364599, "learning_rate": 4.675175244900447e-06, "loss": 0.4457, "step": 8695 }, { "epoch": 3.4020992837737714, "grad_norm": 0.48483411104988006, "learning_rate": 4.675098012471322e-06, "loss": 0.459, "step": 8696 }, { "epoch": 3.4024944430723636, "grad_norm": 0.49370445185103784, "learning_rate": 4.675020771499733e-06, "loss": 0.4517, "step": 8697 }, { "epoch": 3.402889602370956, "grad_norm": 0.5232425366645843, "learning_rate": 4.6749435219859825e-06, "loss": 0.4522, "step": 8698 }, { "epoch": 3.403284761669548, "grad_norm": 0.47812677963839323, "learning_rate": 4.674866263930375e-06, "loss": 0.4373, "step": 8699 }, { "epoch": 3.4036799209681403, "grad_norm": 0.50114037166455, "learning_rate": 4.674788997333214e-06, "loss": 0.4761, "step": 8700 }, { "epoch": 3.4040750802667326, "grad_norm": 0.4990732251105035, "learning_rate": 4.674711722194802e-06, "loss": 0.4305, "step": 8701 }, { "epoch": 3.404470239565325, "grad_norm": 0.4919053285526375, "learning_rate": 4.674634438515443e-06, "loss": 0.4596, "step": 8702 }, { "epoch": 3.404865398863917, "grad_norm": 0.500445117775505, "learning_rate": 4.6745571462954395e-06, "loss": 0.4375, "step": 8703 }, { "epoch": 3.4052605581625093, "grad_norm": 0.48686796875270305, "learning_rate": 4.674479845535097e-06, "loss": 0.4523, "step": 8704 }, { "epoch": 3.4056557174611015, "grad_norm": 0.5030201027639913, "learning_rate": 4.6744025362347175e-06, "loss": 0.4715, "step": 8705 }, { "epoch": 3.4060508767596938, "grad_norm": 0.4858539869831347, "learning_rate": 4.674325218394606e-06, "loss": 0.4542, "step": 8706 }, { "epoch": 3.406446036058286, "grad_norm": 0.48744591028154344, "learning_rate": 4.674247892015065e-06, "loss": 0.4632, "step": 8707 }, { "epoch": 3.4068411953568782, "grad_norm": 0.5305591582260777, "learning_rate": 4.674170557096398e-06, "loss": 0.4471, "step": 8708 }, { "epoch": 3.4072363546554705, "grad_norm": 0.4896593869094789, "learning_rate": 4.674093213638909e-06, "loss": 0.4546, "step": 8709 }, { "epoch": 3.4076315139540627, "grad_norm": 0.5193419476995951, "learning_rate": 4.674015861642903e-06, "loss": 0.4572, "step": 8710 }, { "epoch": 3.408026673252655, "grad_norm": 0.5029397565244852, "learning_rate": 4.673938501108684e-06, "loss": 0.4718, "step": 8711 }, { "epoch": 3.408421832551247, "grad_norm": 0.4966168365599225, "learning_rate": 4.673861132036552e-06, "loss": 0.4393, "step": 8712 }, { "epoch": 3.4088169918498394, "grad_norm": 0.48279757458338685, "learning_rate": 4.673783754426815e-06, "loss": 0.4432, "step": 8713 }, { "epoch": 3.4092121511484317, "grad_norm": 0.47848792729340384, "learning_rate": 4.673706368279775e-06, "loss": 0.4654, "step": 8714 }, { "epoch": 3.409607310447024, "grad_norm": 0.48502060720503026, "learning_rate": 4.6736289735957365e-06, "loss": 0.4351, "step": 8715 }, { "epoch": 3.410002469745616, "grad_norm": 0.517902551374955, "learning_rate": 4.673551570375003e-06, "loss": 0.4608, "step": 8716 }, { "epoch": 3.4103976290442084, "grad_norm": 0.4959576763379469, "learning_rate": 4.673474158617879e-06, "loss": 0.4575, "step": 8717 }, { "epoch": 3.4107927883428006, "grad_norm": 0.487828895869595, "learning_rate": 4.673396738324668e-06, "loss": 0.4381, "step": 8718 }, { "epoch": 3.411187947641393, "grad_norm": 0.4921131542323153, "learning_rate": 4.6733193094956755e-06, "loss": 0.4618, "step": 8719 }, { "epoch": 3.411583106939985, "grad_norm": 0.489989732392488, "learning_rate": 4.6732418721312036e-06, "loss": 0.4712, "step": 8720 }, { "epoch": 3.4119782662385774, "grad_norm": 0.48021030964681694, "learning_rate": 4.6731644262315575e-06, "loss": 0.4398, "step": 8721 }, { "epoch": 3.4123734255371696, "grad_norm": 0.5196748841661029, "learning_rate": 4.673086971797041e-06, "loss": 0.4569, "step": 8722 }, { "epoch": 3.412768584835762, "grad_norm": 0.6391029638584853, "learning_rate": 4.673009508827958e-06, "loss": 0.4584, "step": 8723 }, { "epoch": 3.413163744134354, "grad_norm": 0.49223316026383734, "learning_rate": 4.672932037324614e-06, "loss": 0.4523, "step": 8724 }, { "epoch": 3.4135589034329463, "grad_norm": 0.4865710928676197, "learning_rate": 4.672854557287312e-06, "loss": 0.4411, "step": 8725 }, { "epoch": 3.4139540627315386, "grad_norm": 0.5065983068219876, "learning_rate": 4.672777068716357e-06, "loss": 0.4726, "step": 8726 }, { "epoch": 3.414349222030131, "grad_norm": 0.49208422147691183, "learning_rate": 4.672699571612053e-06, "loss": 0.4772, "step": 8727 }, { "epoch": 3.414744381328723, "grad_norm": 0.4883146901724943, "learning_rate": 4.6726220659747035e-06, "loss": 0.4645, "step": 8728 }, { "epoch": 3.4151395406273153, "grad_norm": 0.4873411772334921, "learning_rate": 4.672544551804615e-06, "loss": 0.453, "step": 8729 }, { "epoch": 3.4155346999259075, "grad_norm": 0.4812888051291896, "learning_rate": 4.67246702910209e-06, "loss": 0.4401, "step": 8730 }, { "epoch": 3.4159298592244998, "grad_norm": 0.48596379658909483, "learning_rate": 4.6723894978674344e-06, "loss": 0.4595, "step": 8731 }, { "epoch": 3.416325018523092, "grad_norm": 0.48806164094782617, "learning_rate": 4.672311958100952e-06, "loss": 0.4559, "step": 8732 }, { "epoch": 3.4167201778216842, "grad_norm": 0.5278943342209049, "learning_rate": 4.672234409802946e-06, "loss": 0.4598, "step": 8733 }, { "epoch": 3.4171153371202765, "grad_norm": 0.48766632378299773, "learning_rate": 4.672156852973724e-06, "loss": 0.447, "step": 8734 }, { "epoch": 3.4175104964188687, "grad_norm": 0.4998558934415389, "learning_rate": 4.672079287613588e-06, "loss": 0.466, "step": 8735 }, { "epoch": 3.417905655717461, "grad_norm": 0.5161927705054742, "learning_rate": 4.672001713722844e-06, "loss": 0.4512, "step": 8736 }, { "epoch": 3.418300815016053, "grad_norm": 0.4993835199850496, "learning_rate": 4.671924131301795e-06, "loss": 0.4667, "step": 8737 }, { "epoch": 3.4186959743146454, "grad_norm": 0.5111345200683314, "learning_rate": 4.671846540350748e-06, "loss": 0.4669, "step": 8738 }, { "epoch": 3.4190911336132377, "grad_norm": 0.4968151170656032, "learning_rate": 4.6717689408700065e-06, "loss": 0.4415, "step": 8739 }, { "epoch": 3.41948629291183, "grad_norm": 0.48927357809806327, "learning_rate": 4.671691332859875e-06, "loss": 0.4454, "step": 8740 }, { "epoch": 3.419881452210422, "grad_norm": 0.4986943442895406, "learning_rate": 4.671613716320658e-06, "loss": 0.4578, "step": 8741 }, { "epoch": 3.4202766115090144, "grad_norm": 0.507640735251368, "learning_rate": 4.671536091252662e-06, "loss": 0.4708, "step": 8742 }, { "epoch": 3.4206717708076066, "grad_norm": 0.4864374605398443, "learning_rate": 4.671458457656191e-06, "loss": 0.4459, "step": 8743 }, { "epoch": 3.421066930106199, "grad_norm": 0.48587924372525426, "learning_rate": 4.671380815531549e-06, "loss": 0.4556, "step": 8744 }, { "epoch": 3.421462089404791, "grad_norm": 0.49368596535285864, "learning_rate": 4.671303164879043e-06, "loss": 0.4518, "step": 8745 }, { "epoch": 3.4218572487033834, "grad_norm": 0.49585373553596823, "learning_rate": 4.671225505698975e-06, "loss": 0.4557, "step": 8746 }, { "epoch": 3.4222524080019756, "grad_norm": 0.48924156726844276, "learning_rate": 4.671147837991653e-06, "loss": 0.4598, "step": 8747 }, { "epoch": 3.422647567300568, "grad_norm": 0.5066355289955341, "learning_rate": 4.67107016175738e-06, "loss": 0.4619, "step": 8748 }, { "epoch": 3.42304272659916, "grad_norm": 0.4998574490378027, "learning_rate": 4.670992476996462e-06, "loss": 0.4628, "step": 8749 }, { "epoch": 3.4234378858977523, "grad_norm": 0.4926776349012955, "learning_rate": 4.670914783709203e-06, "loss": 0.4533, "step": 8750 }, { "epoch": 3.4238330451963446, "grad_norm": 0.48942538427023524, "learning_rate": 4.6708370818959105e-06, "loss": 0.4497, "step": 8751 }, { "epoch": 3.424228204494937, "grad_norm": 0.48790555324765683, "learning_rate": 4.670759371556886e-06, "loss": 0.4497, "step": 8752 }, { "epoch": 3.424623363793529, "grad_norm": 0.5083233533489905, "learning_rate": 4.670681652692439e-06, "loss": 0.4488, "step": 8753 }, { "epoch": 3.4250185230921213, "grad_norm": 0.5216951615222701, "learning_rate": 4.670603925302871e-06, "loss": 0.4747, "step": 8754 }, { "epoch": 3.4254136823907135, "grad_norm": 0.49070908525677087, "learning_rate": 4.6705261893884904e-06, "loss": 0.4622, "step": 8755 }, { "epoch": 3.4258088416893058, "grad_norm": 0.4914126193159798, "learning_rate": 4.6704484449496e-06, "loss": 0.4628, "step": 8756 }, { "epoch": 3.426204000987898, "grad_norm": 0.49544497930427484, "learning_rate": 4.670370691986507e-06, "loss": 0.4443, "step": 8757 }, { "epoch": 3.4265991602864903, "grad_norm": 0.509745151929745, "learning_rate": 4.670292930499515e-06, "loss": 0.4639, "step": 8758 }, { "epoch": 3.4269943195850825, "grad_norm": 0.4992599417209327, "learning_rate": 4.67021516048893e-06, "loss": 0.4588, "step": 8759 }, { "epoch": 3.4273894788836747, "grad_norm": 0.48349844003574116, "learning_rate": 4.670137381955058e-06, "loss": 0.4485, "step": 8760 }, { "epoch": 3.4277846381822674, "grad_norm": 0.492075989647956, "learning_rate": 4.670059594898204e-06, "loss": 0.4503, "step": 8761 }, { "epoch": 3.4281797974808597, "grad_norm": 0.48934117903372404, "learning_rate": 4.669981799318673e-06, "loss": 0.4554, "step": 8762 }, { "epoch": 3.428574956779452, "grad_norm": 0.4878416543021339, "learning_rate": 4.669903995216772e-06, "loss": 0.4563, "step": 8763 }, { "epoch": 3.428970116078044, "grad_norm": 0.4967785153968765, "learning_rate": 4.669826182592806e-06, "loss": 0.4613, "step": 8764 }, { "epoch": 3.4293652753766364, "grad_norm": 0.4892337802144703, "learning_rate": 4.669748361447081e-06, "loss": 0.4604, "step": 8765 }, { "epoch": 3.4297604346752286, "grad_norm": 0.5013718521706811, "learning_rate": 4.6696705317799e-06, "loss": 0.4558, "step": 8766 }, { "epoch": 3.430155593973821, "grad_norm": 0.5039804223697372, "learning_rate": 4.669592693591571e-06, "loss": 0.4654, "step": 8767 }, { "epoch": 3.430550753272413, "grad_norm": 0.5028272637505543, "learning_rate": 4.669514846882401e-06, "loss": 0.4487, "step": 8768 }, { "epoch": 3.4309459125710053, "grad_norm": 0.47464547478018637, "learning_rate": 4.669436991652693e-06, "loss": 0.4574, "step": 8769 }, { "epoch": 3.4313410718695976, "grad_norm": 0.4998838948907234, "learning_rate": 4.6693591279027535e-06, "loss": 0.461, "step": 8770 }, { "epoch": 3.43173623116819, "grad_norm": 0.4955573106154294, "learning_rate": 4.669281255632889e-06, "loss": 0.4541, "step": 8771 }, { "epoch": 3.432131390466782, "grad_norm": 0.5161307923252046, "learning_rate": 4.669203374843405e-06, "loss": 0.4616, "step": 8772 }, { "epoch": 3.4325265497653743, "grad_norm": 0.4873616543396202, "learning_rate": 4.669125485534608e-06, "loss": 0.4531, "step": 8773 }, { "epoch": 3.4329217090639665, "grad_norm": 0.4914250914065892, "learning_rate": 4.669047587706803e-06, "loss": 0.4371, "step": 8774 }, { "epoch": 3.433316868362559, "grad_norm": 0.49013758880173275, "learning_rate": 4.668969681360295e-06, "loss": 0.455, "step": 8775 }, { "epoch": 3.433712027661151, "grad_norm": 0.4935136679299746, "learning_rate": 4.668891766495393e-06, "loss": 0.4581, "step": 8776 }, { "epoch": 3.4341071869597433, "grad_norm": 0.5068462598993336, "learning_rate": 4.6688138431124e-06, "loss": 0.4592, "step": 8777 }, { "epoch": 3.4345023462583355, "grad_norm": 0.5140101571522524, "learning_rate": 4.668735911211623e-06, "loss": 0.44, "step": 8778 }, { "epoch": 3.4348975055569277, "grad_norm": 0.48453766176716734, "learning_rate": 4.668657970793369e-06, "loss": 0.4403, "step": 8779 }, { "epoch": 3.43529266485552, "grad_norm": 0.5039231232251424, "learning_rate": 4.668580021857943e-06, "loss": 0.4624, "step": 8780 }, { "epoch": 3.4356878241541122, "grad_norm": 0.5007826918099934, "learning_rate": 4.668502064405651e-06, "loss": 0.4575, "step": 8781 }, { "epoch": 3.4360829834527045, "grad_norm": 0.4847249820915745, "learning_rate": 4.6684240984368005e-06, "loss": 0.4504, "step": 8782 }, { "epoch": 3.4364781427512967, "grad_norm": 0.5305724135544032, "learning_rate": 4.668346123951696e-06, "loss": 0.4301, "step": 8783 }, { "epoch": 3.436873302049889, "grad_norm": 0.4873670692094404, "learning_rate": 4.668268140950646e-06, "loss": 0.4645, "step": 8784 }, { "epoch": 3.437268461348481, "grad_norm": 0.4961011367709567, "learning_rate": 4.668190149433955e-06, "loss": 0.4574, "step": 8785 }, { "epoch": 3.4376636206470734, "grad_norm": 0.519896601726298, "learning_rate": 4.668112149401928e-06, "loss": 0.4673, "step": 8786 }, { "epoch": 3.4380587799456657, "grad_norm": 0.5092646275519088, "learning_rate": 4.6680341408548746e-06, "loss": 0.4497, "step": 8787 }, { "epoch": 3.438453939244258, "grad_norm": 0.5057955855549762, "learning_rate": 4.667956123793099e-06, "loss": 0.4783, "step": 8788 }, { "epoch": 3.43884909854285, "grad_norm": 0.4985817701072453, "learning_rate": 4.667878098216909e-06, "loss": 0.4517, "step": 8789 }, { "epoch": 3.4392442578414424, "grad_norm": 0.5134738759251121, "learning_rate": 4.66780006412661e-06, "loss": 0.4479, "step": 8790 }, { "epoch": 3.4396394171400346, "grad_norm": 0.5255474891390775, "learning_rate": 4.667722021522508e-06, "loss": 0.4562, "step": 8791 }, { "epoch": 3.440034576438627, "grad_norm": 0.49695920779912806, "learning_rate": 4.667643970404911e-06, "loss": 0.4648, "step": 8792 }, { "epoch": 3.440429735737219, "grad_norm": 0.49907335777305206, "learning_rate": 4.667565910774124e-06, "loss": 0.4706, "step": 8793 }, { "epoch": 3.4408248950358113, "grad_norm": 0.4884559789494324, "learning_rate": 4.667487842630455e-06, "loss": 0.4514, "step": 8794 }, { "epoch": 3.4412200543344036, "grad_norm": 0.5000777793061358, "learning_rate": 4.667409765974209e-06, "loss": 0.4539, "step": 8795 }, { "epoch": 3.441615213632996, "grad_norm": 0.5069843087753504, "learning_rate": 4.667331680805694e-06, "loss": 0.4659, "step": 8796 }, { "epoch": 3.442010372931588, "grad_norm": 0.5197003132246248, "learning_rate": 4.6672535871252165e-06, "loss": 0.4652, "step": 8797 }, { "epoch": 3.4424055322301803, "grad_norm": 0.4871319544582793, "learning_rate": 4.6671754849330834e-06, "loss": 0.4448, "step": 8798 }, { "epoch": 3.4428006915287725, "grad_norm": 0.5028109247529198, "learning_rate": 4.6670973742296e-06, "loss": 0.4618, "step": 8799 }, { "epoch": 3.443195850827365, "grad_norm": 0.49091531311391173, "learning_rate": 4.667019255015075e-06, "loss": 0.4409, "step": 8800 }, { "epoch": 3.443591010125957, "grad_norm": 0.5051259627632506, "learning_rate": 4.666941127289813e-06, "loss": 0.4567, "step": 8801 }, { "epoch": 3.4439861694245493, "grad_norm": 0.49791345108367346, "learning_rate": 4.666862991054123e-06, "loss": 0.4553, "step": 8802 }, { "epoch": 3.4443813287231415, "grad_norm": 0.4943684112194807, "learning_rate": 4.666784846308311e-06, "loss": 0.4501, "step": 8803 }, { "epoch": 3.4447764880217338, "grad_norm": 0.598269397017305, "learning_rate": 4.666706693052684e-06, "loss": 0.4722, "step": 8804 }, { "epoch": 3.445171647320326, "grad_norm": 0.5033122711335563, "learning_rate": 4.666628531287548e-06, "loss": 0.4559, "step": 8805 }, { "epoch": 3.4455668066189182, "grad_norm": 0.47928044079152565, "learning_rate": 4.666550361013211e-06, "loss": 0.4556, "step": 8806 }, { "epoch": 3.4459619659175105, "grad_norm": 0.49275067830564856, "learning_rate": 4.66647218222998e-06, "loss": 0.4271, "step": 8807 }, { "epoch": 3.4463571252161027, "grad_norm": 0.49320681766088637, "learning_rate": 4.666393994938162e-06, "loss": 0.4565, "step": 8808 }, { "epoch": 3.446752284514695, "grad_norm": 0.48982527817865373, "learning_rate": 4.666315799138064e-06, "loss": 0.459, "step": 8809 }, { "epoch": 3.447147443813287, "grad_norm": 0.48836849709394153, "learning_rate": 4.6662375948299924e-06, "loss": 0.4543, "step": 8810 }, { "epoch": 3.4475426031118794, "grad_norm": 0.49635265949726676, "learning_rate": 4.666159382014255e-06, "loss": 0.4618, "step": 8811 }, { "epoch": 3.4479377624104717, "grad_norm": 0.4953454582630569, "learning_rate": 4.66608116069116e-06, "loss": 0.4572, "step": 8812 }, { "epoch": 3.448332921709064, "grad_norm": 0.5006722224939579, "learning_rate": 4.6660029308610125e-06, "loss": 0.4508, "step": 8813 }, { "epoch": 3.448728081007656, "grad_norm": 0.5043235685637225, "learning_rate": 4.665924692524121e-06, "loss": 0.462, "step": 8814 }, { "epoch": 3.4491232403062484, "grad_norm": 0.5053267442560526, "learning_rate": 4.665846445680792e-06, "loss": 0.4496, "step": 8815 }, { "epoch": 3.4495183996048406, "grad_norm": 0.4917987457397994, "learning_rate": 4.665768190331334e-06, "loss": 0.4662, "step": 8816 }, { "epoch": 3.449913558903433, "grad_norm": 0.48570702363194657, "learning_rate": 4.665689926476054e-06, "loss": 0.4525, "step": 8817 }, { "epoch": 3.450308718202025, "grad_norm": 0.49691878545305157, "learning_rate": 4.665611654115258e-06, "loss": 0.4573, "step": 8818 }, { "epoch": 3.4507038775006174, "grad_norm": 0.4895661716841793, "learning_rate": 4.665533373249255e-06, "loss": 0.4411, "step": 8819 }, { "epoch": 3.4510990367992096, "grad_norm": 0.49186441261086594, "learning_rate": 4.665455083878352e-06, "loss": 0.4692, "step": 8820 }, { "epoch": 3.451494196097802, "grad_norm": 0.5099516410323954, "learning_rate": 4.665376786002856e-06, "loss": 0.4656, "step": 8821 }, { "epoch": 3.451889355396394, "grad_norm": 0.5169744344300137, "learning_rate": 4.665298479623075e-06, "loss": 0.4556, "step": 8822 }, { "epoch": 3.4522845146949863, "grad_norm": 0.5171843190465332, "learning_rate": 4.665220164739316e-06, "loss": 0.4769, "step": 8823 }, { "epoch": 3.4526796739935786, "grad_norm": 0.4974279869698287, "learning_rate": 4.665141841351888e-06, "loss": 0.439, "step": 8824 }, { "epoch": 3.453074833292171, "grad_norm": 0.50434560452558, "learning_rate": 4.665063509461098e-06, "loss": 0.4594, "step": 8825 }, { "epoch": 3.453469992590763, "grad_norm": 0.4955408379140396, "learning_rate": 4.664985169067251e-06, "loss": 0.4758, "step": 8826 }, { "epoch": 3.4538651518893553, "grad_norm": 0.48902071003257797, "learning_rate": 4.664906820170658e-06, "loss": 0.4516, "step": 8827 }, { "epoch": 3.4542603111879475, "grad_norm": 0.4937855562200361, "learning_rate": 4.664828462771627e-06, "loss": 0.4646, "step": 8828 }, { "epoch": 3.4546554704865398, "grad_norm": 0.5082998875173607, "learning_rate": 4.664750096870463e-06, "loss": 0.4603, "step": 8829 }, { "epoch": 3.455050629785132, "grad_norm": 0.47237718060013906, "learning_rate": 4.664671722467475e-06, "loss": 0.4576, "step": 8830 }, { "epoch": 3.4554457890837242, "grad_norm": 0.5038153640837024, "learning_rate": 4.664593339562971e-06, "loss": 0.4622, "step": 8831 }, { "epoch": 3.4558409483823165, "grad_norm": 0.49668508531522104, "learning_rate": 4.66451494815726e-06, "loss": 0.4477, "step": 8832 }, { "epoch": 3.4562361076809087, "grad_norm": 0.5133159759954414, "learning_rate": 4.664436548250646e-06, "loss": 0.4596, "step": 8833 }, { "epoch": 3.456631266979501, "grad_norm": 0.49905932346964094, "learning_rate": 4.664358139843442e-06, "loss": 0.4759, "step": 8834 }, { "epoch": 3.457026426278093, "grad_norm": 0.4849036414485639, "learning_rate": 4.664279722935953e-06, "loss": 0.4609, "step": 8835 }, { "epoch": 3.4574215855766854, "grad_norm": 0.5058385856975989, "learning_rate": 4.664201297528487e-06, "loss": 0.4706, "step": 8836 }, { "epoch": 3.4578167448752777, "grad_norm": 0.4967750894306117, "learning_rate": 4.6641228636213524e-06, "loss": 0.4501, "step": 8837 }, { "epoch": 3.45821190417387, "grad_norm": 0.5136748603140383, "learning_rate": 4.664044421214857e-06, "loss": 0.4634, "step": 8838 }, { "epoch": 3.458607063472462, "grad_norm": 0.4914768586557601, "learning_rate": 4.663965970309311e-06, "loss": 0.4609, "step": 8839 }, { "epoch": 3.4590022227710544, "grad_norm": 0.5032307100674864, "learning_rate": 4.6638875109050184e-06, "loss": 0.454, "step": 8840 }, { "epoch": 3.4593973820696466, "grad_norm": 0.5373774375004284, "learning_rate": 4.663809043002291e-06, "loss": 0.4558, "step": 8841 }, { "epoch": 3.459792541368239, "grad_norm": 0.5050016062307993, "learning_rate": 4.663730566601436e-06, "loss": 0.4626, "step": 8842 }, { "epoch": 3.460187700666831, "grad_norm": 0.4891364435706425, "learning_rate": 4.6636520817027596e-06, "loss": 0.4506, "step": 8843 }, { "epoch": 3.460582859965424, "grad_norm": 0.48998437910287085, "learning_rate": 4.6635735883065734e-06, "loss": 0.4529, "step": 8844 }, { "epoch": 3.460978019264016, "grad_norm": 0.5234452315475704, "learning_rate": 4.663495086413183e-06, "loss": 0.4699, "step": 8845 }, { "epoch": 3.4613731785626083, "grad_norm": 0.49371396515639815, "learning_rate": 4.663416576022898e-06, "loss": 0.4706, "step": 8846 }, { "epoch": 3.4617683378612005, "grad_norm": 0.49709209591769404, "learning_rate": 4.663338057136025e-06, "loss": 0.476, "step": 8847 }, { "epoch": 3.4621634971597928, "grad_norm": 0.49578446171291213, "learning_rate": 4.663259529752876e-06, "loss": 0.4444, "step": 8848 }, { "epoch": 3.462558656458385, "grad_norm": 0.49126172062385615, "learning_rate": 4.663180993873756e-06, "loss": 0.4489, "step": 8849 }, { "epoch": 3.4629538157569772, "grad_norm": 0.49180903216506305, "learning_rate": 4.663102449498974e-06, "loss": 0.4546, "step": 8850 }, { "epoch": 3.4633489750555695, "grad_norm": 0.5005694477594985, "learning_rate": 4.66302389662884e-06, "loss": 0.4512, "step": 8851 }, { "epoch": 3.4637441343541617, "grad_norm": 0.49774390912434563, "learning_rate": 4.6629453352636615e-06, "loss": 0.4651, "step": 8852 }, { "epoch": 3.464139293652754, "grad_norm": 0.5149141552536891, "learning_rate": 4.662866765403747e-06, "loss": 0.4473, "step": 8853 }, { "epoch": 3.464534452951346, "grad_norm": 0.5072099362598289, "learning_rate": 4.662788187049405e-06, "loss": 0.4772, "step": 8854 }, { "epoch": 3.4649296122499385, "grad_norm": 0.49560828556589115, "learning_rate": 4.6627096002009445e-06, "loss": 0.4537, "step": 8855 }, { "epoch": 3.4653247715485307, "grad_norm": 0.4920647341464689, "learning_rate": 4.662631004858674e-06, "loss": 0.4422, "step": 8856 }, { "epoch": 3.465719930847123, "grad_norm": 0.4992878751198371, "learning_rate": 4.6625524010229026e-06, "loss": 0.4608, "step": 8857 }, { "epoch": 3.466115090145715, "grad_norm": 0.49054926255487874, "learning_rate": 4.662473788693938e-06, "loss": 0.4539, "step": 8858 }, { "epoch": 3.4665102494443074, "grad_norm": 0.511990187730294, "learning_rate": 4.6623951678720894e-06, "loss": 0.4829, "step": 8859 }, { "epoch": 3.4669054087428997, "grad_norm": 0.5042603888878492, "learning_rate": 4.662316538557666e-06, "loss": 0.4613, "step": 8860 }, { "epoch": 3.467300568041492, "grad_norm": 0.4969317296679051, "learning_rate": 4.662237900750976e-06, "loss": 0.445, "step": 8861 }, { "epoch": 3.467695727340084, "grad_norm": 0.503339914538068, "learning_rate": 4.662159254452329e-06, "loss": 0.4607, "step": 8862 }, { "epoch": 3.4680908866386764, "grad_norm": 0.47526527789470935, "learning_rate": 4.662080599662032e-06, "loss": 0.4332, "step": 8863 }, { "epoch": 3.4684860459372686, "grad_norm": 0.48910025218721065, "learning_rate": 4.662001936380397e-06, "loss": 0.4532, "step": 8864 }, { "epoch": 3.468881205235861, "grad_norm": 0.5023800565124126, "learning_rate": 4.661923264607731e-06, "loss": 0.4635, "step": 8865 }, { "epoch": 3.469276364534453, "grad_norm": 0.49650951915124897, "learning_rate": 4.661844584344342e-06, "loss": 0.4726, "step": 8866 }, { "epoch": 3.4696715238330453, "grad_norm": 0.4968947894789248, "learning_rate": 4.661765895590541e-06, "loss": 0.4736, "step": 8867 }, { "epoch": 3.4700666831316376, "grad_norm": 0.5009727916810776, "learning_rate": 4.661687198346636e-06, "loss": 0.459, "step": 8868 }, { "epoch": 3.47046184243023, "grad_norm": 0.48976101872113026, "learning_rate": 4.661608492612937e-06, "loss": 0.4467, "step": 8869 }, { "epoch": 3.470857001728822, "grad_norm": 0.5016589813587168, "learning_rate": 4.661529778389752e-06, "loss": 0.4542, "step": 8870 }, { "epoch": 3.4712521610274143, "grad_norm": 0.4861886122067568, "learning_rate": 4.66145105567739e-06, "loss": 0.461, "step": 8871 }, { "epoch": 3.4716473203260065, "grad_norm": 0.641821102486397, "learning_rate": 4.661372324476161e-06, "loss": 0.4541, "step": 8872 }, { "epoch": 3.4720424796245988, "grad_norm": 0.4940555527439461, "learning_rate": 4.6612935847863746e-06, "loss": 0.4645, "step": 8873 }, { "epoch": 3.472437638923191, "grad_norm": 0.5017974488912654, "learning_rate": 4.661214836608339e-06, "loss": 0.4536, "step": 8874 }, { "epoch": 3.4728327982217833, "grad_norm": 0.4969241194719381, "learning_rate": 4.661136079942364e-06, "loss": 0.4661, "step": 8875 }, { "epoch": 3.4732279575203755, "grad_norm": 0.497649831354656, "learning_rate": 4.661057314788758e-06, "loss": 0.4413, "step": 8876 }, { "epoch": 3.4736231168189677, "grad_norm": 0.485793011113323, "learning_rate": 4.6609785411478326e-06, "loss": 0.4653, "step": 8877 }, { "epoch": 3.47401827611756, "grad_norm": 0.49424185636318685, "learning_rate": 4.6608997590198945e-06, "loss": 0.4475, "step": 8878 }, { "epoch": 3.474413435416152, "grad_norm": 0.48959550866721235, "learning_rate": 4.6608209684052555e-06, "loss": 0.455, "step": 8879 }, { "epoch": 3.4748085947147445, "grad_norm": 0.4878411516995577, "learning_rate": 4.660742169304223e-06, "loss": 0.4612, "step": 8880 }, { "epoch": 3.4752037540133367, "grad_norm": 0.5094157531998392, "learning_rate": 4.660663361717107e-06, "loss": 0.4718, "step": 8881 }, { "epoch": 3.475598913311929, "grad_norm": 0.4989284958044305, "learning_rate": 4.660584545644218e-06, "loss": 0.4602, "step": 8882 }, { "epoch": 3.475994072610521, "grad_norm": 0.48462148426022633, "learning_rate": 4.660505721085865e-06, "loss": 0.4506, "step": 8883 }, { "epoch": 3.4763892319091134, "grad_norm": 0.4881796143310651, "learning_rate": 4.660426888042356e-06, "loss": 0.4601, "step": 8884 }, { "epoch": 3.4767843912077057, "grad_norm": 0.49485495431393517, "learning_rate": 4.660348046514004e-06, "loss": 0.4462, "step": 8885 }, { "epoch": 3.477179550506298, "grad_norm": 0.48394677880720927, "learning_rate": 4.660269196501116e-06, "loss": 0.4463, "step": 8886 }, { "epoch": 3.47757470980489, "grad_norm": 0.4855215947244085, "learning_rate": 4.660190338004002e-06, "loss": 0.4465, "step": 8887 }, { "epoch": 3.4779698691034824, "grad_norm": 0.5086993405331509, "learning_rate": 4.660111471022973e-06, "loss": 0.456, "step": 8888 }, { "epoch": 3.4783650284020746, "grad_norm": 0.49724371282452545, "learning_rate": 4.660032595558337e-06, "loss": 0.4611, "step": 8889 }, { "epoch": 3.478760187700667, "grad_norm": 0.5029381576180928, "learning_rate": 4.659953711610405e-06, "loss": 0.4807, "step": 8890 }, { "epoch": 3.479155346999259, "grad_norm": 0.48536678216800494, "learning_rate": 4.659874819179486e-06, "loss": 0.4673, "step": 8891 }, { "epoch": 3.4795505062978513, "grad_norm": 0.48336927893573745, "learning_rate": 4.65979591826589e-06, "loss": 0.4539, "step": 8892 }, { "epoch": 3.4799456655964436, "grad_norm": 0.4937274142048356, "learning_rate": 4.659717008869928e-06, "loss": 0.4763, "step": 8893 }, { "epoch": 3.480340824895036, "grad_norm": 0.4928360042158491, "learning_rate": 4.659638090991909e-06, "loss": 0.4613, "step": 8894 }, { "epoch": 3.480735984193628, "grad_norm": 0.530269437282756, "learning_rate": 4.659559164632142e-06, "loss": 0.4487, "step": 8895 }, { "epoch": 3.4811311434922203, "grad_norm": 0.4910271446685397, "learning_rate": 4.659480229790938e-06, "loss": 0.4719, "step": 8896 }, { "epoch": 3.4815263027908125, "grad_norm": 0.4777584233118303, "learning_rate": 4.659401286468609e-06, "loss": 0.4439, "step": 8897 }, { "epoch": 3.481921462089405, "grad_norm": 0.48331532023965773, "learning_rate": 4.659322334665461e-06, "loss": 0.454, "step": 8898 }, { "epoch": 3.482316621387997, "grad_norm": 0.49680547638456035, "learning_rate": 4.659243374381806e-06, "loss": 0.4589, "step": 8899 }, { "epoch": 3.4827117806865893, "grad_norm": 0.4759886694048339, "learning_rate": 4.659164405617955e-06, "loss": 0.453, "step": 8900 }, { "epoch": 3.4831069399851815, "grad_norm": 0.48347058134167675, "learning_rate": 4.6590854283742175e-06, "loss": 0.4649, "step": 8901 }, { "epoch": 3.4835020992837737, "grad_norm": 0.5021839134977939, "learning_rate": 4.659006442650903e-06, "loss": 0.4534, "step": 8902 }, { "epoch": 3.483897258582366, "grad_norm": 0.48132025196008, "learning_rate": 4.658927448448323e-06, "loss": 0.4478, "step": 8903 }, { "epoch": 3.4842924178809582, "grad_norm": 0.49135070095173283, "learning_rate": 4.658848445766786e-06, "loss": 0.4506, "step": 8904 }, { "epoch": 3.4846875771795505, "grad_norm": 0.48125094395751417, "learning_rate": 4.6587694346066035e-06, "loss": 0.4377, "step": 8905 }, { "epoch": 3.4850827364781427, "grad_norm": 0.49750742233825657, "learning_rate": 4.658690414968086e-06, "loss": 0.4655, "step": 8906 }, { "epoch": 3.485477895776735, "grad_norm": 0.48458376051136504, "learning_rate": 4.658611386851543e-06, "loss": 0.4559, "step": 8907 }, { "epoch": 3.485873055075327, "grad_norm": 0.5115369273188421, "learning_rate": 4.658532350257285e-06, "loss": 0.4611, "step": 8908 }, { "epoch": 3.4862682143739194, "grad_norm": 0.48861679265318614, "learning_rate": 4.658453305185623e-06, "loss": 0.4476, "step": 8909 }, { "epoch": 3.4866633736725117, "grad_norm": 0.4877426200944746, "learning_rate": 4.658374251636867e-06, "loss": 0.4544, "step": 8910 }, { "epoch": 3.487058532971104, "grad_norm": 0.4828165510082438, "learning_rate": 4.658295189611327e-06, "loss": 0.4535, "step": 8911 }, { "epoch": 3.487453692269696, "grad_norm": 0.6245926944397081, "learning_rate": 4.658216119109314e-06, "loss": 0.4151, "step": 8912 }, { "epoch": 3.4878488515682884, "grad_norm": 0.4874104433869976, "learning_rate": 4.65813704013114e-06, "loss": 0.4456, "step": 8913 }, { "epoch": 3.4882440108668806, "grad_norm": 0.5145352743350671, "learning_rate": 4.658057952677113e-06, "loss": 0.4768, "step": 8914 }, { "epoch": 3.488639170165473, "grad_norm": 0.49016021595770465, "learning_rate": 4.657978856747546e-06, "loss": 0.4567, "step": 8915 }, { "epoch": 3.489034329464065, "grad_norm": 0.48831218665648374, "learning_rate": 4.6578997523427475e-06, "loss": 0.4356, "step": 8916 }, { "epoch": 3.4894294887626573, "grad_norm": 0.5010113609737595, "learning_rate": 4.657820639463029e-06, "loss": 0.4799, "step": 8917 }, { "epoch": 3.4898246480612496, "grad_norm": 0.4907345989010224, "learning_rate": 4.657741518108702e-06, "loss": 0.4628, "step": 8918 }, { "epoch": 3.490219807359842, "grad_norm": 0.49499089035592075, "learning_rate": 4.657662388280076e-06, "loss": 0.4615, "step": 8919 }, { "epoch": 3.490614966658434, "grad_norm": 0.5066619543747422, "learning_rate": 4.657583249977463e-06, "loss": 0.4592, "step": 8920 }, { "epoch": 3.4910101259570263, "grad_norm": 0.4958302058204061, "learning_rate": 4.657504103201173e-06, "loss": 0.4485, "step": 8921 }, { "epoch": 3.4914052852556186, "grad_norm": 0.4944424623211386, "learning_rate": 4.657424947951517e-06, "loss": 0.4522, "step": 8922 }, { "epoch": 3.491800444554211, "grad_norm": 0.6267993198451537, "learning_rate": 4.657345784228805e-06, "loss": 0.462, "step": 8923 }, { "epoch": 3.492195603852803, "grad_norm": 0.548874751841137, "learning_rate": 4.657266612033348e-06, "loss": 0.4649, "step": 8924 }, { "epoch": 3.4925907631513953, "grad_norm": 0.5118844974346741, "learning_rate": 4.65718743136546e-06, "loss": 0.4482, "step": 8925 }, { "epoch": 3.4929859224499875, "grad_norm": 0.5033343474006436, "learning_rate": 4.657108242225449e-06, "loss": 0.4731, "step": 8926 }, { "epoch": 3.4933810817485798, "grad_norm": 0.4861803347304094, "learning_rate": 4.657029044613627e-06, "loss": 0.4503, "step": 8927 }, { "epoch": 3.493776241047172, "grad_norm": 0.4791250625600111, "learning_rate": 4.656949838530304e-06, "loss": 0.4428, "step": 8928 }, { "epoch": 3.4941714003457642, "grad_norm": 0.4863617818870344, "learning_rate": 4.656870623975791e-06, "loss": 0.4659, "step": 8929 }, { "epoch": 3.4945665596443565, "grad_norm": 0.5127343716951415, "learning_rate": 4.656791400950401e-06, "loss": 0.461, "step": 8930 }, { "epoch": 3.4949617189429487, "grad_norm": 0.49777034576549223, "learning_rate": 4.656712169454444e-06, "loss": 0.4585, "step": 8931 }, { "epoch": 3.495356878241541, "grad_norm": 0.48877150097142436, "learning_rate": 4.656632929488231e-06, "loss": 0.4547, "step": 8932 }, { "epoch": 3.495752037540133, "grad_norm": 0.4927474974697246, "learning_rate": 4.656553681052074e-06, "loss": 0.4581, "step": 8933 }, { "epoch": 3.4961471968387254, "grad_norm": 0.4849494940227447, "learning_rate": 4.656474424146283e-06, "loss": 0.4459, "step": 8934 }, { "epoch": 3.4965423561373177, "grad_norm": 0.48094079718336596, "learning_rate": 4.6563951587711706e-06, "loss": 0.4617, "step": 8935 }, { "epoch": 3.49693751543591, "grad_norm": 0.5234897546830422, "learning_rate": 4.6563158849270465e-06, "loss": 0.4462, "step": 8936 }, { "epoch": 3.497332674734502, "grad_norm": 0.49375252338582026, "learning_rate": 4.656236602614223e-06, "loss": 0.4497, "step": 8937 }, { "epoch": 3.4977278340330944, "grad_norm": 0.5267848201795143, "learning_rate": 4.656157311833013e-06, "loss": 0.442, "step": 8938 }, { "epoch": 3.4981229933316866, "grad_norm": 0.5060587932376777, "learning_rate": 4.656078012583725e-06, "loss": 0.4711, "step": 8939 }, { "epoch": 3.498518152630279, "grad_norm": 0.5025260956706785, "learning_rate": 4.655998704866672e-06, "loss": 0.4468, "step": 8940 }, { "epoch": 3.498913311928871, "grad_norm": 0.687091654136015, "learning_rate": 4.655919388682166e-06, "loss": 0.4844, "step": 8941 }, { "epoch": 3.4993084712274634, "grad_norm": 0.48026680469874616, "learning_rate": 4.655840064030517e-06, "loss": 0.4412, "step": 8942 }, { "epoch": 3.4997036305260556, "grad_norm": 0.4942720809704896, "learning_rate": 4.655760730912038e-06, "loss": 0.4645, "step": 8943 }, { "epoch": 3.500098789824648, "grad_norm": 0.509126848920385, "learning_rate": 4.655681389327039e-06, "loss": 0.4497, "step": 8944 }, { "epoch": 3.50049394912324, "grad_norm": 0.7110801116137855, "learning_rate": 4.655602039275833e-06, "loss": 0.4598, "step": 8945 }, { "epoch": 3.5008891084218323, "grad_norm": 0.5019582976854259, "learning_rate": 4.6555226807587304e-06, "loss": 0.4657, "step": 8946 }, { "epoch": 3.5012842677204246, "grad_norm": 0.47619258987485347, "learning_rate": 4.655443313776045e-06, "loss": 0.4483, "step": 8947 }, { "epoch": 3.501679427019017, "grad_norm": 0.5029273839023065, "learning_rate": 4.655363938328086e-06, "loss": 0.4669, "step": 8948 }, { "epoch": 3.502074586317609, "grad_norm": 0.5028619187317396, "learning_rate": 4.655284554415167e-06, "loss": 0.4582, "step": 8949 }, { "epoch": 3.5024697456162013, "grad_norm": 0.49264262307149015, "learning_rate": 4.655205162037598e-06, "loss": 0.4606, "step": 8950 }, { "epoch": 3.5028649049147935, "grad_norm": 0.4708730042896239, "learning_rate": 4.655125761195694e-06, "loss": 0.4526, "step": 8951 }, { "epoch": 3.5032600642133858, "grad_norm": 0.4861263707563721, "learning_rate": 4.655046351889763e-06, "loss": 0.4414, "step": 8952 }, { "epoch": 3.503655223511978, "grad_norm": 0.4893316048388238, "learning_rate": 4.654966934120118e-06, "loss": 0.453, "step": 8953 }, { "epoch": 3.5040503828105702, "grad_norm": 0.5139962221803164, "learning_rate": 4.654887507887073e-06, "loss": 0.465, "step": 8954 }, { "epoch": 3.5044455421091625, "grad_norm": 0.47724658926285185, "learning_rate": 4.654808073190937e-06, "loss": 0.4418, "step": 8955 }, { "epoch": 3.504840701407755, "grad_norm": 0.48987603430328075, "learning_rate": 4.654728630032024e-06, "loss": 0.4561, "step": 8956 }, { "epoch": 3.5052358607063474, "grad_norm": 0.49005477886288673, "learning_rate": 4.654649178410645e-06, "loss": 0.4554, "step": 8957 }, { "epoch": 3.5056310200049396, "grad_norm": 0.49167524318701145, "learning_rate": 4.654569718327113e-06, "loss": 0.4432, "step": 8958 }, { "epoch": 3.506026179303532, "grad_norm": 0.48832196263631317, "learning_rate": 4.654490249781739e-06, "loss": 0.4567, "step": 8959 }, { "epoch": 3.506421338602124, "grad_norm": 0.5195513981086755, "learning_rate": 4.654410772774836e-06, "loss": 0.4682, "step": 8960 }, { "epoch": 3.5068164979007164, "grad_norm": 0.4883940235257672, "learning_rate": 4.654331287306715e-06, "loss": 0.4648, "step": 8961 }, { "epoch": 3.5072116571993086, "grad_norm": 0.4939222640936546, "learning_rate": 4.65425179337769e-06, "loss": 0.4555, "step": 8962 }, { "epoch": 3.507606816497901, "grad_norm": 0.507042086996444, "learning_rate": 4.654172290988071e-06, "loss": 0.4688, "step": 8963 }, { "epoch": 3.508001975796493, "grad_norm": 0.4851747453407202, "learning_rate": 4.654092780138172e-06, "loss": 0.4667, "step": 8964 }, { "epoch": 3.5083971350950853, "grad_norm": 0.49132032301674466, "learning_rate": 4.654013260828304e-06, "loss": 0.4558, "step": 8965 }, { "epoch": 3.5087922943936776, "grad_norm": 0.5020222923780524, "learning_rate": 4.653933733058781e-06, "loss": 0.4471, "step": 8966 }, { "epoch": 3.50918745369227, "grad_norm": 0.4894974516850361, "learning_rate": 4.653854196829913e-06, "loss": 0.442, "step": 8967 }, { "epoch": 3.509582612990862, "grad_norm": 0.4868608182493534, "learning_rate": 4.653774652142014e-06, "loss": 0.4481, "step": 8968 }, { "epoch": 3.5099777722894543, "grad_norm": 0.7314629885353082, "learning_rate": 4.6536950989953965e-06, "loss": 0.4573, "step": 8969 }, { "epoch": 3.5103729315880465, "grad_norm": 0.48728983834155626, "learning_rate": 4.653615537390371e-06, "loss": 0.4548, "step": 8970 }, { "epoch": 3.5107680908866388, "grad_norm": 0.49949151444479584, "learning_rate": 4.653535967327253e-06, "loss": 0.463, "step": 8971 }, { "epoch": 3.511163250185231, "grad_norm": 0.4859042126598325, "learning_rate": 4.6534563888063535e-06, "loss": 0.4636, "step": 8972 }, { "epoch": 3.5115584094838233, "grad_norm": 0.49909226950739194, "learning_rate": 4.653376801827983e-06, "loss": 0.4647, "step": 8973 }, { "epoch": 3.5119535687824155, "grad_norm": 0.4841418346190016, "learning_rate": 4.653297206392458e-06, "loss": 0.4487, "step": 8974 }, { "epoch": 3.5123487280810077, "grad_norm": 0.5017750095021966, "learning_rate": 4.653217602500089e-06, "loss": 0.4772, "step": 8975 }, { "epoch": 3.5127438873796, "grad_norm": 0.5054355471499746, "learning_rate": 4.653137990151188e-06, "loss": 0.442, "step": 8976 }, { "epoch": 3.513139046678192, "grad_norm": 0.5124016068780876, "learning_rate": 4.6530583693460685e-06, "loss": 0.4693, "step": 8977 }, { "epoch": 3.5135342059767845, "grad_norm": 0.4884638972810405, "learning_rate": 4.6529787400850435e-06, "loss": 0.4549, "step": 8978 }, { "epoch": 3.5139293652753767, "grad_norm": 0.484980903508112, "learning_rate": 4.652899102368425e-06, "loss": 0.4508, "step": 8979 }, { "epoch": 3.514324524573969, "grad_norm": 0.4750814543537716, "learning_rate": 4.652819456196527e-06, "loss": 0.4455, "step": 8980 }, { "epoch": 3.514719683872561, "grad_norm": 0.4787417445872702, "learning_rate": 4.65273980156966e-06, "loss": 0.4497, "step": 8981 }, { "epoch": 3.5151148431711534, "grad_norm": 0.49080099109165837, "learning_rate": 4.6526601384881396e-06, "loss": 0.4487, "step": 8982 }, { "epoch": 3.5155100024697457, "grad_norm": 0.47651127208635596, "learning_rate": 4.652580466952277e-06, "loss": 0.4488, "step": 8983 }, { "epoch": 3.515905161768338, "grad_norm": 0.47966488268224355, "learning_rate": 4.6525007869623845e-06, "loss": 0.4513, "step": 8984 }, { "epoch": 3.51630032106693, "grad_norm": 0.4878014502335229, "learning_rate": 4.652421098518777e-06, "loss": 0.4429, "step": 8985 }, { "epoch": 3.5166954803655224, "grad_norm": 0.48298319426845326, "learning_rate": 4.652341401621766e-06, "loss": 0.448, "step": 8986 }, { "epoch": 3.5170906396641146, "grad_norm": 0.47671515243568685, "learning_rate": 4.652261696271665e-06, "loss": 0.4482, "step": 8987 }, { "epoch": 3.517485798962707, "grad_norm": 0.5012073706299464, "learning_rate": 4.652181982468787e-06, "loss": 0.4517, "step": 8988 }, { "epoch": 3.517880958261299, "grad_norm": 0.48952067375088115, "learning_rate": 4.652102260213445e-06, "loss": 0.4697, "step": 8989 }, { "epoch": 3.5182761175598913, "grad_norm": 0.5014312771832417, "learning_rate": 4.652022529505953e-06, "loss": 0.4617, "step": 8990 }, { "epoch": 3.5186712768584836, "grad_norm": 0.48760811032686774, "learning_rate": 4.651942790346622e-06, "loss": 0.4775, "step": 8991 }, { "epoch": 3.519066436157076, "grad_norm": 0.4804722298357196, "learning_rate": 4.6518630427357666e-06, "loss": 0.4565, "step": 8992 }, { "epoch": 3.519461595455668, "grad_norm": 0.49171418019719676, "learning_rate": 4.6517832866737e-06, "loss": 0.4605, "step": 8993 }, { "epoch": 3.5198567547542603, "grad_norm": 0.49918333071216625, "learning_rate": 4.651703522160736e-06, "loss": 0.463, "step": 8994 }, { "epoch": 3.5202519140528525, "grad_norm": 0.49170780602129877, "learning_rate": 4.651623749197187e-06, "loss": 0.4683, "step": 8995 }, { "epoch": 3.520647073351445, "grad_norm": 0.5033774543953301, "learning_rate": 4.651543967783366e-06, "loss": 0.4509, "step": 8996 }, { "epoch": 3.521042232650037, "grad_norm": 0.6184118730606496, "learning_rate": 4.651464177919587e-06, "loss": 0.4398, "step": 8997 }, { "epoch": 3.5214373919486293, "grad_norm": 0.5583292211707592, "learning_rate": 4.6513843796061624e-06, "loss": 0.441, "step": 8998 }, { "epoch": 3.5218325512472215, "grad_norm": 0.5111210467079835, "learning_rate": 4.651304572843407e-06, "loss": 0.4463, "step": 8999 }, { "epoch": 3.5222277105458137, "grad_norm": 0.4925902941362768, "learning_rate": 4.651224757631634e-06, "loss": 0.4565, "step": 9000 }, { "epoch": 3.522622869844406, "grad_norm": 0.5053536793843054, "learning_rate": 4.651144933971156e-06, "loss": 0.454, "step": 9001 }, { "epoch": 3.523018029142998, "grad_norm": 0.4939136118476549, "learning_rate": 4.651065101862287e-06, "loss": 0.4509, "step": 9002 }, { "epoch": 3.5234131884415905, "grad_norm": 0.5038896891132291, "learning_rate": 4.65098526130534e-06, "loss": 0.438, "step": 9003 }, { "epoch": 3.5238083477401827, "grad_norm": 0.5009202891855576, "learning_rate": 4.65090541230063e-06, "loss": 0.4411, "step": 9004 }, { "epoch": 3.524203507038775, "grad_norm": 0.498130501386656, "learning_rate": 4.650825554848468e-06, "loss": 0.4614, "step": 9005 }, { "epoch": 3.524598666337367, "grad_norm": 0.5107785107225102, "learning_rate": 4.650745688949171e-06, "loss": 0.4641, "step": 9006 }, { "epoch": 3.5249938256359594, "grad_norm": 0.4973684335934365, "learning_rate": 4.65066581460305e-06, "loss": 0.4511, "step": 9007 }, { "epoch": 3.5253889849345517, "grad_norm": 0.5120248873540496, "learning_rate": 4.650585931810421e-06, "loss": 0.4767, "step": 9008 }, { "epoch": 3.525784144233144, "grad_norm": 0.5032821292378913, "learning_rate": 4.6505060405715944e-06, "loss": 0.4502, "step": 9009 }, { "epoch": 3.526179303531736, "grad_norm": 0.4956958147919807, "learning_rate": 4.6504261408868875e-06, "loss": 0.4561, "step": 9010 }, { "epoch": 3.5265744628303284, "grad_norm": 0.4957187392523741, "learning_rate": 4.6503462327566115e-06, "loss": 0.4681, "step": 9011 }, { "epoch": 3.5269696221289206, "grad_norm": 0.4952159771925259, "learning_rate": 4.650266316181082e-06, "loss": 0.4461, "step": 9012 }, { "epoch": 3.527364781427513, "grad_norm": 0.5672869678695379, "learning_rate": 4.650186391160611e-06, "loss": 0.4461, "step": 9013 }, { "epoch": 3.527759940726105, "grad_norm": 0.5080704469053954, "learning_rate": 4.650106457695515e-06, "loss": 0.4708, "step": 9014 }, { "epoch": 3.5281551000246973, "grad_norm": 0.4936767560240317, "learning_rate": 4.650026515786106e-06, "loss": 0.4585, "step": 9015 }, { "epoch": 3.5285502593232896, "grad_norm": 0.5008042637689482, "learning_rate": 4.649946565432698e-06, "loss": 0.4692, "step": 9016 }, { "epoch": 3.528945418621882, "grad_norm": 0.4962751637351177, "learning_rate": 4.649866606635605e-06, "loss": 0.4354, "step": 9017 }, { "epoch": 3.529340577920474, "grad_norm": 0.5008934519010351, "learning_rate": 4.649786639395142e-06, "loss": 0.4699, "step": 9018 }, { "epoch": 3.5297357372190663, "grad_norm": 0.48596615093437223, "learning_rate": 4.649706663711623e-06, "loss": 0.461, "step": 9019 }, { "epoch": 3.5301308965176585, "grad_norm": 0.49089635795809994, "learning_rate": 4.649626679585361e-06, "loss": 0.4614, "step": 9020 }, { "epoch": 3.530526055816251, "grad_norm": 0.4946185325035558, "learning_rate": 4.64954668701667e-06, "loss": 0.4487, "step": 9021 }, { "epoch": 3.530921215114843, "grad_norm": 0.5126278761898772, "learning_rate": 4.6494666860058655e-06, "loss": 0.4568, "step": 9022 }, { "epoch": 3.5313163744134353, "grad_norm": 0.5285391391199131, "learning_rate": 4.649386676553262e-06, "loss": 0.4557, "step": 9023 }, { "epoch": 3.5317115337120275, "grad_norm": 0.5038626087100095, "learning_rate": 4.649306658659172e-06, "loss": 0.4707, "step": 9024 }, { "epoch": 3.5321066930106197, "grad_norm": 0.4859360226761564, "learning_rate": 4.649226632323911e-06, "loss": 0.4837, "step": 9025 }, { "epoch": 3.532501852309212, "grad_norm": 0.4966668712438498, "learning_rate": 4.649146597547792e-06, "loss": 0.4648, "step": 9026 }, { "epoch": 3.5328970116078047, "grad_norm": 0.48166044464094737, "learning_rate": 4.649066554331131e-06, "loss": 0.4455, "step": 9027 }, { "epoch": 3.533292170906397, "grad_norm": 0.49847140402641615, "learning_rate": 4.648986502674241e-06, "loss": 0.4745, "step": 9028 }, { "epoch": 3.533687330204989, "grad_norm": 0.48375147299765764, "learning_rate": 4.6489064425774375e-06, "loss": 0.448, "step": 9029 }, { "epoch": 3.5340824895035814, "grad_norm": 0.501937133054762, "learning_rate": 4.648826374041034e-06, "loss": 0.4857, "step": 9030 }, { "epoch": 3.5344776488021736, "grad_norm": 0.5027543628434751, "learning_rate": 4.648746297065345e-06, "loss": 0.4561, "step": 9031 }, { "epoch": 3.534872808100766, "grad_norm": 0.48644400230014095, "learning_rate": 4.648666211650686e-06, "loss": 0.4511, "step": 9032 }, { "epoch": 3.535267967399358, "grad_norm": 0.492175793746889, "learning_rate": 4.648586117797371e-06, "loss": 0.4451, "step": 9033 }, { "epoch": 3.5356631266979504, "grad_norm": 0.4854671322850227, "learning_rate": 4.648506015505714e-06, "loss": 0.4627, "step": 9034 }, { "epoch": 3.5360582859965426, "grad_norm": 0.4972183962198117, "learning_rate": 4.64842590477603e-06, "loss": 0.4699, "step": 9035 }, { "epoch": 3.536453445295135, "grad_norm": 0.5200452269905167, "learning_rate": 4.648345785608633e-06, "loss": 0.4788, "step": 9036 }, { "epoch": 3.536848604593727, "grad_norm": 0.530977112773409, "learning_rate": 4.64826565800384e-06, "loss": 0.4598, "step": 9037 }, { "epoch": 3.5372437638923193, "grad_norm": 0.48919921086325696, "learning_rate": 4.648185521961963e-06, "loss": 0.4605, "step": 9038 }, { "epoch": 3.5376389231909116, "grad_norm": 0.48321014692774794, "learning_rate": 4.648105377483318e-06, "loss": 0.4562, "step": 9039 }, { "epoch": 3.538034082489504, "grad_norm": 0.5096165386730921, "learning_rate": 4.648025224568219e-06, "loss": 0.4709, "step": 9040 }, { "epoch": 3.538429241788096, "grad_norm": 0.4992741223806897, "learning_rate": 4.647945063216981e-06, "loss": 0.4531, "step": 9041 }, { "epoch": 3.5388244010866883, "grad_norm": 0.5075195911086815, "learning_rate": 4.64786489342992e-06, "loss": 0.4482, "step": 9042 }, { "epoch": 3.5392195603852805, "grad_norm": 0.4817099394481762, "learning_rate": 4.64778471520735e-06, "loss": 0.4566, "step": 9043 }, { "epoch": 3.5396147196838728, "grad_norm": 0.4936601585082596, "learning_rate": 4.6477045285495845e-06, "loss": 0.4618, "step": 9044 }, { "epoch": 3.540009878982465, "grad_norm": 0.4892077110552015, "learning_rate": 4.647624333456941e-06, "loss": 0.4588, "step": 9045 }, { "epoch": 3.5404050382810572, "grad_norm": 0.4969970473771617, "learning_rate": 4.647544129929733e-06, "loss": 0.4653, "step": 9046 }, { "epoch": 3.5408001975796495, "grad_norm": 0.4819114190057846, "learning_rate": 4.647463917968275e-06, "loss": 0.4526, "step": 9047 }, { "epoch": 3.5411953568782417, "grad_norm": 0.48764776197722826, "learning_rate": 4.647383697572883e-06, "loss": 0.4338, "step": 9048 }, { "epoch": 3.541590516176834, "grad_norm": 0.4818100110717255, "learning_rate": 4.647303468743873e-06, "loss": 0.4755, "step": 9049 }, { "epoch": 3.541985675475426, "grad_norm": 0.5005030935665212, "learning_rate": 4.647223231481557e-06, "loss": 0.4614, "step": 9050 }, { "epoch": 3.5423808347740184, "grad_norm": 0.5150064517997461, "learning_rate": 4.647142985786254e-06, "loss": 0.4648, "step": 9051 }, { "epoch": 3.5427759940726107, "grad_norm": 0.48527068890730546, "learning_rate": 4.647062731658276e-06, "loss": 0.4558, "step": 9052 }, { "epoch": 3.543171153371203, "grad_norm": 0.5093185573266966, "learning_rate": 4.6469824690979394e-06, "loss": 0.4848, "step": 9053 }, { "epoch": 3.543566312669795, "grad_norm": 0.5371264924132514, "learning_rate": 4.64690219810556e-06, "loss": 0.4721, "step": 9054 }, { "epoch": 3.5439614719683874, "grad_norm": 0.49196835315592014, "learning_rate": 4.646821918681451e-06, "loss": 0.4584, "step": 9055 }, { "epoch": 3.5443566312669796, "grad_norm": 0.5095176634298171, "learning_rate": 4.64674163082593e-06, "loss": 0.454, "step": 9056 }, { "epoch": 3.544751790565572, "grad_norm": 0.48759629545526767, "learning_rate": 4.646661334539312e-06, "loss": 0.4679, "step": 9057 }, { "epoch": 3.545146949864164, "grad_norm": 0.4870613246032581, "learning_rate": 4.646581029821912e-06, "loss": 0.4512, "step": 9058 }, { "epoch": 3.5455421091627564, "grad_norm": 0.4823673270563382, "learning_rate": 4.646500716674044e-06, "loss": 0.4649, "step": 9059 }, { "epoch": 3.5459372684613486, "grad_norm": 0.4977917296918362, "learning_rate": 4.646420395096025e-06, "loss": 0.4615, "step": 9060 }, { "epoch": 3.546332427759941, "grad_norm": 0.4981541055070267, "learning_rate": 4.646340065088169e-06, "loss": 0.4505, "step": 9061 }, { "epoch": 3.546727587058533, "grad_norm": 0.4768432018824217, "learning_rate": 4.646259726650795e-06, "loss": 0.4401, "step": 9062 }, { "epoch": 3.5471227463571253, "grad_norm": 0.49995907837696146, "learning_rate": 4.646179379784215e-06, "loss": 0.4566, "step": 9063 }, { "epoch": 3.5475179056557176, "grad_norm": 0.529211500161168, "learning_rate": 4.646099024488745e-06, "loss": 0.4578, "step": 9064 }, { "epoch": 3.54791306495431, "grad_norm": 0.49321795179014727, "learning_rate": 4.646018660764702e-06, "loss": 0.4441, "step": 9065 }, { "epoch": 3.548308224252902, "grad_norm": 0.49786103565113565, "learning_rate": 4.6459382886123996e-06, "loss": 0.4731, "step": 9066 }, { "epoch": 3.5487033835514943, "grad_norm": 0.48219336482722647, "learning_rate": 4.645857908032156e-06, "loss": 0.4381, "step": 9067 }, { "epoch": 3.5490985428500865, "grad_norm": 0.4923319509953273, "learning_rate": 4.645777519024285e-06, "loss": 0.4582, "step": 9068 }, { "epoch": 3.5494937021486788, "grad_norm": 0.4865553175937021, "learning_rate": 4.645697121589103e-06, "loss": 0.4423, "step": 9069 }, { "epoch": 3.549888861447271, "grad_norm": 0.48095897113056707, "learning_rate": 4.645616715726926e-06, "loss": 0.448, "step": 9070 }, { "epoch": 3.5502840207458632, "grad_norm": 0.5001311843314722, "learning_rate": 4.645536301438069e-06, "loss": 0.4466, "step": 9071 }, { "epoch": 3.5506791800444555, "grad_norm": 0.477997428256851, "learning_rate": 4.645455878722848e-06, "loss": 0.4532, "step": 9072 }, { "epoch": 3.5510743393430477, "grad_norm": 0.49193722985280725, "learning_rate": 4.64537544758158e-06, "loss": 0.4599, "step": 9073 }, { "epoch": 3.55146949864164, "grad_norm": 0.4920447859862548, "learning_rate": 4.645295008014579e-06, "loss": 0.4532, "step": 9074 }, { "epoch": 3.551864657940232, "grad_norm": 0.49558747516046264, "learning_rate": 4.645214560022162e-06, "loss": 0.4736, "step": 9075 }, { "epoch": 3.5522598172388244, "grad_norm": 0.4762337243641712, "learning_rate": 4.6451341036046455e-06, "loss": 0.4517, "step": 9076 }, { "epoch": 3.5526549765374167, "grad_norm": 0.5013245757181765, "learning_rate": 4.6450536387623444e-06, "loss": 0.4648, "step": 9077 }, { "epoch": 3.553050135836009, "grad_norm": 0.5403068478658516, "learning_rate": 4.644973165495576e-06, "loss": 0.4449, "step": 9078 }, { "epoch": 3.553445295134601, "grad_norm": 0.4931560024394474, "learning_rate": 4.644892683804653e-06, "loss": 0.4646, "step": 9079 }, { "epoch": 3.5538404544331934, "grad_norm": 0.5078764038379829, "learning_rate": 4.644812193689897e-06, "loss": 0.4648, "step": 9080 }, { "epoch": 3.5542356137317856, "grad_norm": 0.506957616825252, "learning_rate": 4.64473169515162e-06, "loss": 0.4636, "step": 9081 }, { "epoch": 3.554630773030378, "grad_norm": 0.507432762723005, "learning_rate": 4.644651188190139e-06, "loss": 0.4732, "step": 9082 }, { "epoch": 3.55502593232897, "grad_norm": 0.48687923691371193, "learning_rate": 4.6445706728057705e-06, "loss": 0.4512, "step": 9083 }, { "epoch": 3.5554210916275624, "grad_norm": 0.6304721111076129, "learning_rate": 4.64449014899883e-06, "loss": 0.4679, "step": 9084 }, { "epoch": 3.5558162509261546, "grad_norm": 0.4888618812137909, "learning_rate": 4.644409616769635e-06, "loss": 0.458, "step": 9085 }, { "epoch": 3.556211410224747, "grad_norm": 0.4859315158657899, "learning_rate": 4.644329076118502e-06, "loss": 0.4544, "step": 9086 }, { "epoch": 3.556606569523339, "grad_norm": 0.4927974306644837, "learning_rate": 4.6442485270457464e-06, "loss": 0.4546, "step": 9087 }, { "epoch": 3.5570017288219313, "grad_norm": 0.46978089261360473, "learning_rate": 4.644167969551683e-06, "loss": 0.4435, "step": 9088 }, { "epoch": 3.5573968881205236, "grad_norm": 0.5002112740039599, "learning_rate": 4.644087403636631e-06, "loss": 0.4575, "step": 9089 }, { "epoch": 3.557792047419116, "grad_norm": 0.4816381481041411, "learning_rate": 4.644006829300906e-06, "loss": 0.4441, "step": 9090 }, { "epoch": 3.558187206717708, "grad_norm": 0.5106665981705985, "learning_rate": 4.643926246544823e-06, "loss": 0.4611, "step": 9091 }, { "epoch": 3.5585823660163003, "grad_norm": 0.49510570450790387, "learning_rate": 4.6438456553687e-06, "loss": 0.4687, "step": 9092 }, { "epoch": 3.5589775253148925, "grad_norm": 0.507508378297751, "learning_rate": 4.6437650557728535e-06, "loss": 0.4652, "step": 9093 }, { "epoch": 3.5593726846134848, "grad_norm": 0.49897649507220837, "learning_rate": 4.643684447757599e-06, "loss": 0.45, "step": 9094 }, { "epoch": 3.559767843912077, "grad_norm": 0.5042685832105795, "learning_rate": 4.643603831323255e-06, "loss": 0.447, "step": 9095 }, { "epoch": 3.5601630032106693, "grad_norm": 0.5209604652999842, "learning_rate": 4.643523206470135e-06, "loss": 0.4588, "step": 9096 }, { "epoch": 3.5605581625092615, "grad_norm": 0.49949487365809175, "learning_rate": 4.6434425731985585e-06, "loss": 0.4741, "step": 9097 }, { "epoch": 3.5609533218078537, "grad_norm": 0.5157946856320857, "learning_rate": 4.643361931508841e-06, "loss": 0.4709, "step": 9098 }, { "epoch": 3.561348481106446, "grad_norm": 0.5076379876922849, "learning_rate": 4.6432812814013e-06, "loss": 0.4518, "step": 9099 }, { "epoch": 3.561743640405038, "grad_norm": 0.48357121942351644, "learning_rate": 4.64320062287625e-06, "loss": 0.4689, "step": 9100 }, { "epoch": 3.5621387997036305, "grad_norm": 0.4851016675612043, "learning_rate": 4.64311995593401e-06, "loss": 0.4623, "step": 9101 }, { "epoch": 3.5625339590022227, "grad_norm": 0.5032142946703332, "learning_rate": 4.643039280574897e-06, "loss": 0.4569, "step": 9102 }, { "epoch": 3.562929118300815, "grad_norm": 0.4981056358231815, "learning_rate": 4.642958596799226e-06, "loss": 0.4652, "step": 9103 }, { "epoch": 3.563324277599407, "grad_norm": 0.5045031493173247, "learning_rate": 4.642877904607316e-06, "loss": 0.4757, "step": 9104 }, { "epoch": 3.5637194368979994, "grad_norm": 0.49929741491236107, "learning_rate": 4.642797203999482e-06, "loss": 0.4553, "step": 9105 }, { "epoch": 3.5641145961965917, "grad_norm": 0.48493063212795195, "learning_rate": 4.6427164949760415e-06, "loss": 0.4519, "step": 9106 }, { "epoch": 3.564509755495184, "grad_norm": 0.4825224454556553, "learning_rate": 4.642635777537312e-06, "loss": 0.4657, "step": 9107 }, { "epoch": 3.564904914793776, "grad_norm": 0.4923598417193523, "learning_rate": 4.6425550516836106e-06, "loss": 0.4535, "step": 9108 }, { "epoch": 3.5653000740923684, "grad_norm": 0.5002848790092057, "learning_rate": 4.6424743174152544e-06, "loss": 0.4617, "step": 9109 }, { "epoch": 3.5656952333909606, "grad_norm": 0.5246927195871764, "learning_rate": 4.642393574732559e-06, "loss": 0.4581, "step": 9110 }, { "epoch": 3.566090392689553, "grad_norm": 0.5054517893159672, "learning_rate": 4.642312823635843e-06, "loss": 0.4571, "step": 9111 }, { "epoch": 3.566485551988145, "grad_norm": 0.483406457593407, "learning_rate": 4.642232064125424e-06, "loss": 0.4415, "step": 9112 }, { "epoch": 3.5668807112867373, "grad_norm": 0.49885049133184317, "learning_rate": 4.642151296201617e-06, "loss": 0.45, "step": 9113 }, { "epoch": 3.5672758705853296, "grad_norm": 0.6059844092001336, "learning_rate": 4.6420705198647405e-06, "loss": 0.4584, "step": 9114 }, { "epoch": 3.567671029883922, "grad_norm": 0.49181406902845737, "learning_rate": 4.641989735115112e-06, "loss": 0.4593, "step": 9115 }, { "epoch": 3.568066189182514, "grad_norm": 0.4942661823361688, "learning_rate": 4.64190894195305e-06, "loss": 0.4683, "step": 9116 }, { "epoch": 3.5684613484811063, "grad_norm": 0.4954476984028424, "learning_rate": 4.641828140378868e-06, "loss": 0.4657, "step": 9117 }, { "epoch": 3.5688565077796985, "grad_norm": 0.4937799659377194, "learning_rate": 4.641747330392886e-06, "loss": 0.4503, "step": 9118 }, { "epoch": 3.569251667078291, "grad_norm": 0.4902163898739017, "learning_rate": 4.641666511995422e-06, "loss": 0.463, "step": 9119 }, { "epoch": 3.569646826376883, "grad_norm": 0.5043714006810839, "learning_rate": 4.641585685186792e-06, "loss": 0.4624, "step": 9120 }, { "epoch": 3.5700419856754753, "grad_norm": 0.5004840014877152, "learning_rate": 4.641504849967315e-06, "loss": 0.4879, "step": 9121 }, { "epoch": 3.5704371449740675, "grad_norm": 0.5077783702630878, "learning_rate": 4.6414240063373065e-06, "loss": 0.4456, "step": 9122 }, { "epoch": 3.5708323042726597, "grad_norm": 0.4824382259117086, "learning_rate": 4.6413431542970845e-06, "loss": 0.4638, "step": 9123 }, { "epoch": 3.571227463571252, "grad_norm": 0.4871993531897932, "learning_rate": 4.641262293846966e-06, "loss": 0.4599, "step": 9124 }, { "epoch": 3.571622622869844, "grad_norm": 0.49457463356784, "learning_rate": 4.641181424987271e-06, "loss": 0.4611, "step": 9125 }, { "epoch": 3.5720177821684365, "grad_norm": 0.48893854230152123, "learning_rate": 4.641100547718314e-06, "loss": 0.4459, "step": 9126 }, { "epoch": 3.5724129414670287, "grad_norm": 0.4814786016167628, "learning_rate": 4.641019662040417e-06, "loss": 0.4518, "step": 9127 }, { "epoch": 3.572808100765621, "grad_norm": 0.4990303217153392, "learning_rate": 4.6409387679538925e-06, "loss": 0.4668, "step": 9128 }, { "epoch": 3.573203260064213, "grad_norm": 0.5001213765361451, "learning_rate": 4.640857865459061e-06, "loss": 0.4632, "step": 9129 }, { "epoch": 3.5735984193628054, "grad_norm": 0.47937361186876326, "learning_rate": 4.6407769545562395e-06, "loss": 0.4403, "step": 9130 }, { "epoch": 3.5739935786613977, "grad_norm": 0.4886845046562659, "learning_rate": 4.6406960352457476e-06, "loss": 0.4671, "step": 9131 }, { "epoch": 3.57438873795999, "grad_norm": 0.6230155087433286, "learning_rate": 4.6406151075279e-06, "loss": 0.4663, "step": 9132 }, { "epoch": 3.574783897258582, "grad_norm": 0.49211316752499784, "learning_rate": 4.640534171403017e-06, "loss": 0.4662, "step": 9133 }, { "epoch": 3.5751790565571744, "grad_norm": 0.47931129273412515, "learning_rate": 4.640453226871415e-06, "loss": 0.462, "step": 9134 }, { "epoch": 3.5755742158557666, "grad_norm": 0.4801125074561214, "learning_rate": 4.640372273933412e-06, "loss": 0.4514, "step": 9135 }, { "epoch": 3.575969375154359, "grad_norm": 0.48855690347192954, "learning_rate": 4.6402913125893275e-06, "loss": 0.4655, "step": 9136 }, { "epoch": 3.576364534452951, "grad_norm": 0.4902447553553269, "learning_rate": 4.640210342839479e-06, "loss": 0.4594, "step": 9137 }, { "epoch": 3.5767596937515433, "grad_norm": 0.48368454552996915, "learning_rate": 4.640129364684182e-06, "loss": 0.4566, "step": 9138 }, { "epoch": 3.5771548530501356, "grad_norm": 0.514151993001096, "learning_rate": 4.640048378123757e-06, "loss": 0.4547, "step": 9139 }, { "epoch": 3.577550012348728, "grad_norm": 0.4776789489243305, "learning_rate": 4.639967383158523e-06, "loss": 0.4598, "step": 9140 }, { "epoch": 3.57794517164732, "grad_norm": 0.48525569302537913, "learning_rate": 4.639886379788794e-06, "loss": 0.4418, "step": 9141 }, { "epoch": 3.5783403309459123, "grad_norm": 0.478721665258136, "learning_rate": 4.6398053680148926e-06, "loss": 0.4651, "step": 9142 }, { "epoch": 3.5787354902445045, "grad_norm": 0.47444493198435933, "learning_rate": 4.639724347837135e-06, "loss": 0.4424, "step": 9143 }, { "epoch": 3.579130649543097, "grad_norm": 0.5070133096361706, "learning_rate": 4.639643319255838e-06, "loss": 0.466, "step": 9144 }, { "epoch": 3.5795258088416895, "grad_norm": 0.4866231994143781, "learning_rate": 4.639562282271323e-06, "loss": 0.4792, "step": 9145 }, { "epoch": 3.5799209681402817, "grad_norm": 0.513869457185633, "learning_rate": 4.6394812368839055e-06, "loss": 0.4557, "step": 9146 }, { "epoch": 3.580316127438874, "grad_norm": 0.48227115983762886, "learning_rate": 4.639400183093905e-06, "loss": 0.4516, "step": 9147 }, { "epoch": 3.580711286737466, "grad_norm": 0.5869304828960821, "learning_rate": 4.63931912090164e-06, "loss": 0.4585, "step": 9148 }, { "epoch": 3.5811064460360584, "grad_norm": 0.4882734722747367, "learning_rate": 4.639238050307428e-06, "loss": 0.4565, "step": 9149 }, { "epoch": 3.5815016053346507, "grad_norm": 0.5044115242028842, "learning_rate": 4.639156971311589e-06, "loss": 0.4693, "step": 9150 }, { "epoch": 3.581896764633243, "grad_norm": 0.5019659383311873, "learning_rate": 4.63907588391444e-06, "loss": 0.4629, "step": 9151 }, { "epoch": 3.582291923931835, "grad_norm": 0.5141519049977628, "learning_rate": 4.638994788116299e-06, "loss": 0.4677, "step": 9152 }, { "epoch": 3.5826870832304274, "grad_norm": 0.4785979998489708, "learning_rate": 4.638913683917486e-06, "loss": 0.4595, "step": 9153 }, { "epoch": 3.5830822425290196, "grad_norm": 0.4922496588594111, "learning_rate": 4.638832571318319e-06, "loss": 0.4422, "step": 9154 }, { "epoch": 3.583477401827612, "grad_norm": 0.49580896733041097, "learning_rate": 4.6387514503191165e-06, "loss": 0.448, "step": 9155 }, { "epoch": 3.583872561126204, "grad_norm": 0.48090076896396333, "learning_rate": 4.638670320920196e-06, "loss": 0.4618, "step": 9156 }, { "epoch": 3.5842677204247964, "grad_norm": 0.4790421513834484, "learning_rate": 4.638589183121879e-06, "loss": 0.467, "step": 9157 }, { "epoch": 3.5846628797233886, "grad_norm": 0.4878225988687124, "learning_rate": 4.63850803692448e-06, "loss": 0.4849, "step": 9158 }, { "epoch": 3.585058039021981, "grad_norm": 0.4870640632594355, "learning_rate": 4.638426882328322e-06, "loss": 0.4645, "step": 9159 }, { "epoch": 3.585453198320573, "grad_norm": 0.5050638549536798, "learning_rate": 4.638345719333721e-06, "loss": 0.4572, "step": 9160 }, { "epoch": 3.5858483576191653, "grad_norm": 0.5069748386116536, "learning_rate": 4.638264547940996e-06, "loss": 0.4553, "step": 9161 }, { "epoch": 3.5862435169177576, "grad_norm": 0.5578612016671592, "learning_rate": 4.6381833681504675e-06, "loss": 0.4625, "step": 9162 }, { "epoch": 3.58663867621635, "grad_norm": 0.4656285059162261, "learning_rate": 4.638102179962452e-06, "loss": 0.4616, "step": 9163 }, { "epoch": 3.587033835514942, "grad_norm": 0.4795082800303555, "learning_rate": 4.63802098337727e-06, "loss": 0.4658, "step": 9164 }, { "epoch": 3.5874289948135343, "grad_norm": 0.4989309789649853, "learning_rate": 4.637939778395239e-06, "loss": 0.4533, "step": 9165 }, { "epoch": 3.5878241541121265, "grad_norm": 0.4790162592288028, "learning_rate": 4.637858565016679e-06, "loss": 0.4608, "step": 9166 }, { "epoch": 3.5882193134107188, "grad_norm": 0.4881752317773561, "learning_rate": 4.6377773432419105e-06, "loss": 0.4633, "step": 9167 }, { "epoch": 3.588614472709311, "grad_norm": 0.4892512885912421, "learning_rate": 4.637696113071249e-06, "loss": 0.4697, "step": 9168 }, { "epoch": 3.5890096320079032, "grad_norm": 0.4723594520462734, "learning_rate": 4.637614874505016e-06, "loss": 0.4436, "step": 9169 }, { "epoch": 3.5894047913064955, "grad_norm": 0.4893238305591922, "learning_rate": 4.637533627543529e-06, "loss": 0.4557, "step": 9170 }, { "epoch": 3.5897999506050877, "grad_norm": 0.48322974662494034, "learning_rate": 4.637452372187109e-06, "loss": 0.4547, "step": 9171 }, { "epoch": 3.59019510990368, "grad_norm": 0.5042287973047576, "learning_rate": 4.6373711084360725e-06, "loss": 0.4595, "step": 9172 }, { "epoch": 3.590590269202272, "grad_norm": 0.48463044178358355, "learning_rate": 4.637289836290741e-06, "loss": 0.4553, "step": 9173 }, { "epoch": 3.5909854285008644, "grad_norm": 0.48284154312198374, "learning_rate": 4.6372085557514335e-06, "loss": 0.4398, "step": 9174 }, { "epoch": 3.5913805877994567, "grad_norm": 0.48052961472036454, "learning_rate": 4.637127266818467e-06, "loss": 0.4667, "step": 9175 }, { "epoch": 3.591775747098049, "grad_norm": 0.5272462072802832, "learning_rate": 4.637045969492164e-06, "loss": 0.4617, "step": 9176 }, { "epoch": 3.592170906396641, "grad_norm": 0.47896935915209116, "learning_rate": 4.636964663772841e-06, "loss": 0.4543, "step": 9177 }, { "epoch": 3.5925660656952334, "grad_norm": 0.5590814683075651, "learning_rate": 4.636883349660819e-06, "loss": 0.4606, "step": 9178 }, { "epoch": 3.5929612249938256, "grad_norm": 0.5042016745245929, "learning_rate": 4.6368020271564166e-06, "loss": 0.4789, "step": 9179 }, { "epoch": 3.593356384292418, "grad_norm": 0.47545093889683404, "learning_rate": 4.636720696259954e-06, "loss": 0.4471, "step": 9180 }, { "epoch": 3.59375154359101, "grad_norm": 0.4886174481015884, "learning_rate": 4.636639356971749e-06, "loss": 0.4501, "step": 9181 }, { "epoch": 3.5941467028896024, "grad_norm": 0.4909615597451317, "learning_rate": 4.6365580092921224e-06, "loss": 0.4536, "step": 9182 }, { "epoch": 3.5945418621881946, "grad_norm": 0.5011647702759715, "learning_rate": 4.6364766532213936e-06, "loss": 0.4594, "step": 9183 }, { "epoch": 3.594937021486787, "grad_norm": 0.4891039483973173, "learning_rate": 4.636395288759881e-06, "loss": 0.4428, "step": 9184 }, { "epoch": 3.595332180785379, "grad_norm": 0.49238304772818725, "learning_rate": 4.6363139159079056e-06, "loss": 0.4598, "step": 9185 }, { "epoch": 3.5957273400839713, "grad_norm": 0.499060773425193, "learning_rate": 4.636232534665787e-06, "loss": 0.4816, "step": 9186 }, { "epoch": 3.5961224993825636, "grad_norm": 0.4876846908708361, "learning_rate": 4.636151145033844e-06, "loss": 0.4695, "step": 9187 }, { "epoch": 3.596517658681156, "grad_norm": 0.5010421129141813, "learning_rate": 4.636069747012395e-06, "loss": 0.4674, "step": 9188 }, { "epoch": 3.596912817979748, "grad_norm": 0.4742596611960192, "learning_rate": 4.6359883406017625e-06, "loss": 0.4597, "step": 9189 }, { "epoch": 3.5973079772783403, "grad_norm": 0.5045304178288424, "learning_rate": 4.635906925802264e-06, "loss": 0.4566, "step": 9190 }, { "epoch": 3.5977031365769325, "grad_norm": 0.49285646311806497, "learning_rate": 4.635825502614221e-06, "loss": 0.4506, "step": 9191 }, { "epoch": 3.5980982958755248, "grad_norm": 1.3189611198454247, "learning_rate": 4.635744071037952e-06, "loss": 0.4826, "step": 9192 }, { "epoch": 3.598493455174117, "grad_norm": 0.4978191275680937, "learning_rate": 4.6356626310737774e-06, "loss": 0.4644, "step": 9193 }, { "epoch": 3.5988886144727092, "grad_norm": 0.48939510446702533, "learning_rate": 4.635581182722017e-06, "loss": 0.4695, "step": 9194 }, { "epoch": 3.5992837737713015, "grad_norm": 0.5054530113910645, "learning_rate": 4.635499725982989e-06, "loss": 0.4633, "step": 9195 }, { "epoch": 3.5996789330698937, "grad_norm": 0.48017657962523336, "learning_rate": 4.6354182608570155e-06, "loss": 0.4469, "step": 9196 }, { "epoch": 3.600074092368486, "grad_norm": 0.5007634832293895, "learning_rate": 4.635336787344416e-06, "loss": 0.4594, "step": 9197 }, { "epoch": 3.600469251667078, "grad_norm": 0.5032668243356092, "learning_rate": 4.635255305445511e-06, "loss": 0.4653, "step": 9198 }, { "epoch": 3.6008644109656704, "grad_norm": 0.49875765737609773, "learning_rate": 4.635173815160619e-06, "loss": 0.4605, "step": 9199 }, { "epoch": 3.6012595702642627, "grad_norm": 0.5062930640900483, "learning_rate": 4.635092316490061e-06, "loss": 0.4514, "step": 9200 }, { "epoch": 3.601654729562855, "grad_norm": 0.4974728918315835, "learning_rate": 4.635010809434157e-06, "loss": 0.4514, "step": 9201 }, { "epoch": 3.602049888861447, "grad_norm": 0.49648655888984, "learning_rate": 4.634929293993226e-06, "loss": 0.4744, "step": 9202 }, { "epoch": 3.6024450481600394, "grad_norm": 0.4824566489860067, "learning_rate": 4.634847770167591e-06, "loss": 0.4635, "step": 9203 }, { "epoch": 3.6028402074586316, "grad_norm": 0.4979705347753646, "learning_rate": 4.6347662379575685e-06, "loss": 0.451, "step": 9204 }, { "epoch": 3.603235366757224, "grad_norm": 0.5062537574749927, "learning_rate": 4.634684697363482e-06, "loss": 0.4744, "step": 9205 }, { "epoch": 3.603630526055816, "grad_norm": 0.5738607455547821, "learning_rate": 4.634603148385649e-06, "loss": 0.4592, "step": 9206 }, { "epoch": 3.6040256853544084, "grad_norm": 1.189807804448986, "learning_rate": 4.6345215910243915e-06, "loss": 0.4479, "step": 9207 }, { "epoch": 3.6044208446530006, "grad_norm": 0.4879581966562651, "learning_rate": 4.634440025280029e-06, "loss": 0.4646, "step": 9208 }, { "epoch": 3.604816003951593, "grad_norm": 0.5497629591149095, "learning_rate": 4.634358451152883e-06, "loss": 0.4546, "step": 9209 }, { "epoch": 3.605211163250185, "grad_norm": 0.5194558268040678, "learning_rate": 4.634276868643273e-06, "loss": 0.4611, "step": 9210 }, { "epoch": 3.6056063225487773, "grad_norm": 0.4845956782876475, "learning_rate": 4.634195277751518e-06, "loss": 0.4445, "step": 9211 }, { "epoch": 3.6060014818473696, "grad_norm": 0.4940635660411414, "learning_rate": 4.634113678477942e-06, "loss": 0.4524, "step": 9212 }, { "epoch": 3.606396641145962, "grad_norm": 0.5070909576956948, "learning_rate": 4.634032070822862e-06, "loss": 0.4833, "step": 9213 }, { "epoch": 3.606791800444554, "grad_norm": 0.490670333205641, "learning_rate": 4.633950454786601e-06, "loss": 0.4624, "step": 9214 }, { "epoch": 3.6071869597431463, "grad_norm": 0.4835269632726879, "learning_rate": 4.633868830369477e-06, "loss": 0.4555, "step": 9215 }, { "epoch": 3.607582119041739, "grad_norm": 0.4992739285137002, "learning_rate": 4.633787197571813e-06, "loss": 0.4769, "step": 9216 }, { "epoch": 3.607977278340331, "grad_norm": 0.5402074831328878, "learning_rate": 4.633705556393928e-06, "loss": 0.4751, "step": 9217 }, { "epoch": 3.6083724376389235, "grad_norm": 0.48961767171363246, "learning_rate": 4.633623906836144e-06, "loss": 0.4565, "step": 9218 }, { "epoch": 3.6087675969375157, "grad_norm": 0.4838103684730782, "learning_rate": 4.63354224889878e-06, "loss": 0.4506, "step": 9219 }, { "epoch": 3.609162756236108, "grad_norm": 0.4925277287138113, "learning_rate": 4.633460582582157e-06, "loss": 0.4488, "step": 9220 }, { "epoch": 3.6095579155347, "grad_norm": 0.4825571381803209, "learning_rate": 4.633378907886597e-06, "loss": 0.4607, "step": 9221 }, { "epoch": 3.6099530748332924, "grad_norm": 0.48936877599995243, "learning_rate": 4.633297224812422e-06, "loss": 0.4578, "step": 9222 }, { "epoch": 3.6103482341318847, "grad_norm": 0.4905493219692705, "learning_rate": 4.633215533359949e-06, "loss": 0.4848, "step": 9223 }, { "epoch": 3.610743393430477, "grad_norm": 0.5041306176026158, "learning_rate": 4.633133833529501e-06, "loss": 0.4625, "step": 9224 }, { "epoch": 3.611138552729069, "grad_norm": 0.47807489803501046, "learning_rate": 4.633052125321399e-06, "loss": 0.4579, "step": 9225 }, { "epoch": 3.6115337120276614, "grad_norm": 0.4905504604932367, "learning_rate": 4.632970408735963e-06, "loss": 0.4677, "step": 9226 }, { "epoch": 3.6119288713262536, "grad_norm": 0.48751702506148886, "learning_rate": 4.632888683773515e-06, "loss": 0.4787, "step": 9227 }, { "epoch": 3.612324030624846, "grad_norm": 0.497021089178736, "learning_rate": 4.6328069504343745e-06, "loss": 0.4421, "step": 9228 }, { "epoch": 3.612719189923438, "grad_norm": 0.5243484285892979, "learning_rate": 4.632725208718864e-06, "loss": 0.451, "step": 9229 }, { "epoch": 3.6131143492220303, "grad_norm": 0.49379034366259994, "learning_rate": 4.6326434586273035e-06, "loss": 0.4446, "step": 9230 }, { "epoch": 3.6135095085206226, "grad_norm": 0.5142924559989173, "learning_rate": 4.632561700160015e-06, "loss": 0.4488, "step": 9231 }, { "epoch": 3.613904667819215, "grad_norm": 0.504763239789171, "learning_rate": 4.632479933317319e-06, "loss": 0.4603, "step": 9232 }, { "epoch": 3.614299827117807, "grad_norm": 0.503230561587256, "learning_rate": 4.632398158099537e-06, "loss": 0.4544, "step": 9233 }, { "epoch": 3.6146949864163993, "grad_norm": 0.490744168584867, "learning_rate": 4.63231637450699e-06, "loss": 0.4409, "step": 9234 }, { "epoch": 3.6150901457149915, "grad_norm": 0.5095413791531018, "learning_rate": 4.6322345825399985e-06, "loss": 0.4655, "step": 9235 }, { "epoch": 3.615485305013584, "grad_norm": 0.4937333674079909, "learning_rate": 4.6321527821988845e-06, "loss": 0.4571, "step": 9236 }, { "epoch": 3.615880464312176, "grad_norm": 0.4898084499608632, "learning_rate": 4.632070973483969e-06, "loss": 0.4839, "step": 9237 }, { "epoch": 3.6162756236107683, "grad_norm": 0.5007373601317516, "learning_rate": 4.631989156395574e-06, "loss": 0.4541, "step": 9238 }, { "epoch": 3.6166707829093605, "grad_norm": 0.48723321374477363, "learning_rate": 4.631907330934019e-06, "loss": 0.48, "step": 9239 }, { "epoch": 3.6170659422079527, "grad_norm": 0.48448261567750023, "learning_rate": 4.631825497099627e-06, "loss": 0.4543, "step": 9240 }, { "epoch": 3.617461101506545, "grad_norm": 0.5316625962607656, "learning_rate": 4.63174365489272e-06, "loss": 0.4764, "step": 9241 }, { "epoch": 3.6178562608051372, "grad_norm": 0.49953626646231936, "learning_rate": 4.6316618043136165e-06, "loss": 0.4518, "step": 9242 }, { "epoch": 3.6182514201037295, "grad_norm": 0.5003965600017454, "learning_rate": 4.631579945362641e-06, "loss": 0.4579, "step": 9243 }, { "epoch": 3.6186465794023217, "grad_norm": 0.48092823514138977, "learning_rate": 4.631498078040114e-06, "loss": 0.448, "step": 9244 }, { "epoch": 3.619041738700914, "grad_norm": 0.49710416001614555, "learning_rate": 4.631416202346357e-06, "loss": 0.4595, "step": 9245 }, { "epoch": 3.619436897999506, "grad_norm": 0.49446651343934783, "learning_rate": 4.631334318281691e-06, "loss": 0.4602, "step": 9246 }, { "epoch": 3.6198320572980984, "grad_norm": 0.5009277044883268, "learning_rate": 4.631252425846439e-06, "loss": 0.4408, "step": 9247 }, { "epoch": 3.6202272165966907, "grad_norm": 0.4948355571473325, "learning_rate": 4.63117052504092e-06, "loss": 0.4506, "step": 9248 }, { "epoch": 3.620622375895283, "grad_norm": 0.48896770764585235, "learning_rate": 4.631088615865458e-06, "loss": 0.455, "step": 9249 }, { "epoch": 3.621017535193875, "grad_norm": 0.48712330415560645, "learning_rate": 4.631006698320374e-06, "loss": 0.4579, "step": 9250 }, { "epoch": 3.6214126944924674, "grad_norm": 0.5013178593300339, "learning_rate": 4.630924772405989e-06, "loss": 0.4721, "step": 9251 }, { "epoch": 3.6218078537910596, "grad_norm": 0.691080349270438, "learning_rate": 4.630842838122627e-06, "loss": 0.4728, "step": 9252 }, { "epoch": 3.622203013089652, "grad_norm": 0.4974090024974102, "learning_rate": 4.630760895470607e-06, "loss": 0.4682, "step": 9253 }, { "epoch": 3.622598172388244, "grad_norm": 0.4738706863887212, "learning_rate": 4.630678944450253e-06, "loss": 0.43, "step": 9254 }, { "epoch": 3.6229933316868363, "grad_norm": 0.4996067080433458, "learning_rate": 4.630596985061886e-06, "loss": 0.4625, "step": 9255 }, { "epoch": 3.6233884909854286, "grad_norm": 0.49310669745464825, "learning_rate": 4.630515017305827e-06, "loss": 0.4723, "step": 9256 }, { "epoch": 3.623783650284021, "grad_norm": 0.5093600935457048, "learning_rate": 4.630433041182398e-06, "loss": 0.4711, "step": 9257 }, { "epoch": 3.624178809582613, "grad_norm": 0.48115236735295236, "learning_rate": 4.630351056691923e-06, "loss": 0.4537, "step": 9258 }, { "epoch": 3.6245739688812053, "grad_norm": 0.5011382943630356, "learning_rate": 4.630269063834723e-06, "loss": 0.458, "step": 9259 }, { "epoch": 3.6249691281797976, "grad_norm": 0.5079718914818618, "learning_rate": 4.630187062611119e-06, "loss": 0.4779, "step": 9260 }, { "epoch": 3.62536428747839, "grad_norm": 0.4980405547820634, "learning_rate": 4.630105053021433e-06, "loss": 0.4665, "step": 9261 }, { "epoch": 3.625759446776982, "grad_norm": 0.4893026369877525, "learning_rate": 4.6300230350659885e-06, "loss": 0.4648, "step": 9262 }, { "epoch": 3.6261546060755743, "grad_norm": 0.5555460867365863, "learning_rate": 4.629941008745108e-06, "loss": 0.4647, "step": 9263 }, { "epoch": 3.6265497653741665, "grad_norm": 0.5759949696380431, "learning_rate": 4.629858974059111e-06, "loss": 0.4673, "step": 9264 }, { "epoch": 3.6269449246727588, "grad_norm": 0.48052405561183786, "learning_rate": 4.629776931008322e-06, "loss": 0.458, "step": 9265 }, { "epoch": 3.627340083971351, "grad_norm": 0.5010331429705553, "learning_rate": 4.629694879593062e-06, "loss": 0.4754, "step": 9266 }, { "epoch": 3.6277352432699432, "grad_norm": 0.5007942666179961, "learning_rate": 4.6296128198136545e-06, "loss": 0.4696, "step": 9267 }, { "epoch": 3.6281304025685355, "grad_norm": 0.5016428896270185, "learning_rate": 4.62953075167042e-06, "loss": 0.4672, "step": 9268 }, { "epoch": 3.6285255618671277, "grad_norm": 0.49272882028841225, "learning_rate": 4.629448675163682e-06, "loss": 0.4639, "step": 9269 }, { "epoch": 3.62892072116572, "grad_norm": 0.48986056236330106, "learning_rate": 4.629366590293763e-06, "loss": 0.4695, "step": 9270 }, { "epoch": 3.629315880464312, "grad_norm": 0.4962133184327803, "learning_rate": 4.629284497060985e-06, "loss": 0.4502, "step": 9271 }, { "epoch": 3.6297110397629044, "grad_norm": 0.4898941204572704, "learning_rate": 4.629202395465672e-06, "loss": 0.4609, "step": 9272 }, { "epoch": 3.6301061990614967, "grad_norm": 0.49506032850463966, "learning_rate": 4.629120285508143e-06, "loss": 0.4482, "step": 9273 }, { "epoch": 3.630501358360089, "grad_norm": 0.5171758595413785, "learning_rate": 4.629038167188723e-06, "loss": 0.4501, "step": 9274 }, { "epoch": 3.630896517658681, "grad_norm": 0.4967738395377754, "learning_rate": 4.628956040507734e-06, "loss": 0.4649, "step": 9275 }, { "epoch": 3.6312916769572734, "grad_norm": 0.5063747871234539, "learning_rate": 4.628873905465498e-06, "loss": 0.4629, "step": 9276 }, { "epoch": 3.6316868362558656, "grad_norm": 0.47510127574337424, "learning_rate": 4.628791762062338e-06, "loss": 0.4449, "step": 9277 }, { "epoch": 3.632081995554458, "grad_norm": 0.49070050113003616, "learning_rate": 4.628709610298578e-06, "loss": 0.4599, "step": 9278 }, { "epoch": 3.63247715485305, "grad_norm": 0.513370408704164, "learning_rate": 4.628627450174537e-06, "loss": 0.463, "step": 9279 }, { "epoch": 3.6328723141516424, "grad_norm": 0.5091150770823012, "learning_rate": 4.628545281690541e-06, "loss": 0.4746, "step": 9280 }, { "epoch": 3.6332674734502346, "grad_norm": 0.5151701240747677, "learning_rate": 4.628463104846912e-06, "loss": 0.4705, "step": 9281 }, { "epoch": 3.633662632748827, "grad_norm": 0.49056073566435143, "learning_rate": 4.628380919643972e-06, "loss": 0.4638, "step": 9282 }, { "epoch": 3.634057792047419, "grad_norm": 0.5144421824248759, "learning_rate": 4.6282987260820445e-06, "loss": 0.4778, "step": 9283 }, { "epoch": 3.6344529513460113, "grad_norm": 0.49744475854641185, "learning_rate": 4.6282165241614515e-06, "loss": 0.458, "step": 9284 }, { "epoch": 3.6348481106446036, "grad_norm": 0.5035190140052788, "learning_rate": 4.628134313882518e-06, "loss": 0.4693, "step": 9285 }, { "epoch": 3.635243269943196, "grad_norm": 0.5615476058939156, "learning_rate": 4.6280520952455635e-06, "loss": 0.4932, "step": 9286 }, { "epoch": 3.635638429241788, "grad_norm": 0.49189317167932184, "learning_rate": 4.627969868250912e-06, "loss": 0.4635, "step": 9287 }, { "epoch": 3.6360335885403803, "grad_norm": 0.4912590147231549, "learning_rate": 4.6278876328988885e-06, "loss": 0.4639, "step": 9288 }, { "epoch": 3.6364287478389725, "grad_norm": 2.521359709853842, "learning_rate": 4.627805389189814e-06, "loss": 0.4611, "step": 9289 }, { "epoch": 3.6368239071375648, "grad_norm": 0.486064042323978, "learning_rate": 4.627723137124012e-06, "loss": 0.4412, "step": 9290 }, { "epoch": 3.637219066436157, "grad_norm": 0.5048179415790461, "learning_rate": 4.627640876701806e-06, "loss": 0.4571, "step": 9291 }, { "epoch": 3.6376142257347492, "grad_norm": 0.4959460801045448, "learning_rate": 4.627558607923517e-06, "loss": 0.4677, "step": 9292 }, { "epoch": 3.6380093850333415, "grad_norm": 0.48066916313414726, "learning_rate": 4.627476330789471e-06, "loss": 0.4447, "step": 9293 }, { "epoch": 3.6384045443319337, "grad_norm": 0.4973239424582944, "learning_rate": 4.62739404529999e-06, "loss": 0.4672, "step": 9294 }, { "epoch": 3.638799703630526, "grad_norm": 0.48199497014952, "learning_rate": 4.627311751455397e-06, "loss": 0.4517, "step": 9295 }, { "epoch": 3.639194862929118, "grad_norm": 0.49279718443129383, "learning_rate": 4.627229449256014e-06, "loss": 0.4693, "step": 9296 }, { "epoch": 3.6395900222277104, "grad_norm": 0.5010681919023393, "learning_rate": 4.627147138702166e-06, "loss": 0.4743, "step": 9297 }, { "epoch": 3.6399851815263027, "grad_norm": 0.47695376136618906, "learning_rate": 4.627064819794177e-06, "loss": 0.454, "step": 9298 }, { "epoch": 3.640380340824895, "grad_norm": 0.4888943085386212, "learning_rate": 4.626982492532368e-06, "loss": 0.4456, "step": 9299 }, { "epoch": 3.640775500123487, "grad_norm": 0.4771083873842363, "learning_rate": 4.626900156917064e-06, "loss": 0.468, "step": 9300 }, { "epoch": 3.6411706594220794, "grad_norm": 0.48136951242486825, "learning_rate": 4.626817812948586e-06, "loss": 0.4516, "step": 9301 }, { "epoch": 3.6415658187206716, "grad_norm": 0.4979788453537473, "learning_rate": 4.6267354606272605e-06, "loss": 0.4723, "step": 9302 }, { "epoch": 3.641960978019264, "grad_norm": 0.5102309614105011, "learning_rate": 4.62665309995341e-06, "loss": 0.4598, "step": 9303 }, { "epoch": 3.642356137317856, "grad_norm": 0.49649828248075617, "learning_rate": 4.6265707309273565e-06, "loss": 0.4686, "step": 9304 }, { "epoch": 3.6427512966164484, "grad_norm": 0.48125108712989917, "learning_rate": 4.626488353549425e-06, "loss": 0.4482, "step": 9305 }, { "epoch": 3.6431464559150406, "grad_norm": 0.5210918594356808, "learning_rate": 4.626405967819938e-06, "loss": 0.459, "step": 9306 }, { "epoch": 3.643541615213633, "grad_norm": 0.47329837989409934, "learning_rate": 4.626323573739219e-06, "loss": 0.4465, "step": 9307 }, { "epoch": 3.643936774512225, "grad_norm": 0.5042256332277069, "learning_rate": 4.626241171307593e-06, "loss": 0.4573, "step": 9308 }, { "epoch": 3.6443319338108173, "grad_norm": 1.0400744964458153, "learning_rate": 4.626158760525383e-06, "loss": 0.4835, "step": 9309 }, { "epoch": 3.6447270931094096, "grad_norm": 0.5001511613882816, "learning_rate": 4.6260763413929124e-06, "loss": 0.4533, "step": 9310 }, { "epoch": 3.645122252408002, "grad_norm": 0.4940591531345029, "learning_rate": 4.625993913910505e-06, "loss": 0.4646, "step": 9311 }, { "epoch": 3.645517411706594, "grad_norm": 0.4942019788158831, "learning_rate": 4.625911478078484e-06, "loss": 0.4551, "step": 9312 }, { "epoch": 3.6459125710051863, "grad_norm": 0.4884265738310717, "learning_rate": 4.6258290338971735e-06, "loss": 0.4517, "step": 9313 }, { "epoch": 3.6463077303037785, "grad_norm": 0.4908742478353482, "learning_rate": 4.625746581366898e-06, "loss": 0.4711, "step": 9314 }, { "epoch": 3.6467028896023708, "grad_norm": 0.5066441924253818, "learning_rate": 4.625664120487981e-06, "loss": 0.4672, "step": 9315 }, { "epoch": 3.647098048900963, "grad_norm": 0.4975632434645502, "learning_rate": 4.625581651260745e-06, "loss": 0.4463, "step": 9316 }, { "epoch": 3.6474932081995552, "grad_norm": 0.5054480921919917, "learning_rate": 4.625499173685516e-06, "loss": 0.4676, "step": 9317 }, { "epoch": 3.6478883674981475, "grad_norm": 0.4881775251448275, "learning_rate": 4.6254166877626175e-06, "loss": 0.4441, "step": 9318 }, { "epoch": 3.6482835267967397, "grad_norm": 0.48222427669341467, "learning_rate": 4.625334193492371e-06, "loss": 0.4665, "step": 9319 }, { "epoch": 3.648678686095332, "grad_norm": 0.4936417971367663, "learning_rate": 4.625251690875104e-06, "loss": 0.4888, "step": 9320 }, { "epoch": 3.649073845393924, "grad_norm": 0.49953652158686007, "learning_rate": 4.6251691799111376e-06, "loss": 0.4675, "step": 9321 }, { "epoch": 3.6494690046925164, "grad_norm": 0.49771002562217137, "learning_rate": 4.625086660600798e-06, "loss": 0.468, "step": 9322 }, { "epoch": 3.6498641639911087, "grad_norm": 0.49436016460202736, "learning_rate": 4.625004132944409e-06, "loss": 0.4492, "step": 9323 }, { "epoch": 3.650259323289701, "grad_norm": 0.46676714681432757, "learning_rate": 4.624921596942292e-06, "loss": 0.4241, "step": 9324 }, { "epoch": 3.650654482588293, "grad_norm": 0.49467615491708383, "learning_rate": 4.6248390525947755e-06, "loss": 0.4691, "step": 9325 }, { "epoch": 3.6510496418868854, "grad_norm": 0.4938231345250084, "learning_rate": 4.624756499902181e-06, "loss": 0.46, "step": 9326 }, { "epoch": 3.6514448011854777, "grad_norm": 0.4838739325764053, "learning_rate": 4.624673938864832e-06, "loss": 0.4513, "step": 9327 }, { "epoch": 3.65183996048407, "grad_norm": 0.49112971366065933, "learning_rate": 4.6245913694830545e-06, "loss": 0.4524, "step": 9328 }, { "epoch": 3.652235119782662, "grad_norm": 0.5011715788855052, "learning_rate": 4.624508791757173e-06, "loss": 0.4856, "step": 9329 }, { "epoch": 3.6526302790812544, "grad_norm": 0.4993605096743152, "learning_rate": 4.62442620568751e-06, "loss": 0.471, "step": 9330 }, { "epoch": 3.6530254383798466, "grad_norm": 0.48611034727334546, "learning_rate": 4.624343611274391e-06, "loss": 0.4552, "step": 9331 }, { "epoch": 3.653420597678439, "grad_norm": 0.5253566397582367, "learning_rate": 4.624261008518141e-06, "loss": 0.4566, "step": 9332 }, { "epoch": 3.653815756977031, "grad_norm": 0.5175702806785849, "learning_rate": 4.624178397419083e-06, "loss": 0.4533, "step": 9333 }, { "epoch": 3.6542109162756238, "grad_norm": 0.4994237852186553, "learning_rate": 4.624095777977543e-06, "loss": 0.447, "step": 9334 }, { "epoch": 3.654606075574216, "grad_norm": 0.4940996164170102, "learning_rate": 4.624013150193844e-06, "loss": 0.4544, "step": 9335 }, { "epoch": 3.6550012348728083, "grad_norm": 0.4931404991433969, "learning_rate": 4.623930514068311e-06, "loss": 0.4364, "step": 9336 }, { "epoch": 3.6553963941714005, "grad_norm": 0.5060095886918009, "learning_rate": 4.623847869601269e-06, "loss": 0.4622, "step": 9337 }, { "epoch": 3.6557915534699927, "grad_norm": 0.4864732607657538, "learning_rate": 4.623765216793042e-06, "loss": 0.4508, "step": 9338 }, { "epoch": 3.656186712768585, "grad_norm": 0.515417673885724, "learning_rate": 4.623682555643955e-06, "loss": 0.4746, "step": 9339 }, { "epoch": 3.656581872067177, "grad_norm": 0.5093440038900928, "learning_rate": 4.623599886154333e-06, "loss": 0.4586, "step": 9340 }, { "epoch": 3.6569770313657695, "grad_norm": 0.4884238293387281, "learning_rate": 4.623517208324499e-06, "loss": 0.4611, "step": 9341 }, { "epoch": 3.6573721906643617, "grad_norm": 0.4949252091120524, "learning_rate": 4.623434522154779e-06, "loss": 0.453, "step": 9342 }, { "epoch": 3.657767349962954, "grad_norm": 0.4996973803416822, "learning_rate": 4.623351827645498e-06, "loss": 0.4615, "step": 9343 }, { "epoch": 3.658162509261546, "grad_norm": 0.504144710893261, "learning_rate": 4.623269124796981e-06, "loss": 0.4673, "step": 9344 }, { "epoch": 3.6585576685601384, "grad_norm": 0.4969157578415948, "learning_rate": 4.623186413609552e-06, "loss": 0.4487, "step": 9345 }, { "epoch": 3.6589528278587307, "grad_norm": 0.4932886314806929, "learning_rate": 4.623103694083535e-06, "loss": 0.456, "step": 9346 }, { "epoch": 3.659347987157323, "grad_norm": 0.5148738766834107, "learning_rate": 4.623020966219257e-06, "loss": 0.4471, "step": 9347 }, { "epoch": 3.659743146455915, "grad_norm": 0.5830202209682454, "learning_rate": 4.622938230017041e-06, "loss": 0.4643, "step": 9348 }, { "epoch": 3.6601383057545074, "grad_norm": 0.48992602819472203, "learning_rate": 4.622855485477214e-06, "loss": 0.4474, "step": 9349 }, { "epoch": 3.6605334650530996, "grad_norm": 0.48284568742478173, "learning_rate": 4.622772732600098e-06, "loss": 0.4623, "step": 9350 }, { "epoch": 3.660928624351692, "grad_norm": 0.48801605211010135, "learning_rate": 4.622689971386021e-06, "loss": 0.4426, "step": 9351 }, { "epoch": 3.661323783650284, "grad_norm": 0.49203131886308554, "learning_rate": 4.6226072018353055e-06, "loss": 0.4389, "step": 9352 }, { "epoch": 3.6617189429488763, "grad_norm": 0.48655905238073444, "learning_rate": 4.622524423948279e-06, "loss": 0.4602, "step": 9353 }, { "epoch": 3.6621141022474686, "grad_norm": 0.4934300579478431, "learning_rate": 4.6224416377252645e-06, "loss": 0.4439, "step": 9354 }, { "epoch": 3.662509261546061, "grad_norm": 0.4858789552341792, "learning_rate": 4.622358843166589e-06, "loss": 0.4555, "step": 9355 }, { "epoch": 3.662904420844653, "grad_norm": 0.4899213923525825, "learning_rate": 4.622276040272576e-06, "loss": 0.4529, "step": 9356 }, { "epoch": 3.6632995801432453, "grad_norm": 0.494097810082973, "learning_rate": 4.622193229043552e-06, "loss": 0.4511, "step": 9357 }, { "epoch": 3.6636947394418375, "grad_norm": 0.4844491051320266, "learning_rate": 4.622110409479842e-06, "loss": 0.4587, "step": 9358 }, { "epoch": 3.66408989874043, "grad_norm": 0.4914228114049162, "learning_rate": 4.622027581581771e-06, "loss": 0.4487, "step": 9359 }, { "epoch": 3.664485058039022, "grad_norm": 0.49201477673440297, "learning_rate": 4.6219447453496626e-06, "loss": 0.4654, "step": 9360 }, { "epoch": 3.6648802173376143, "grad_norm": 0.5030420734743306, "learning_rate": 4.621861900783845e-06, "loss": 0.4556, "step": 9361 }, { "epoch": 3.6652753766362065, "grad_norm": 0.49226581282451, "learning_rate": 4.621779047884642e-06, "loss": 0.4662, "step": 9362 }, { "epoch": 3.6656705359347987, "grad_norm": 0.485604702744728, "learning_rate": 4.621696186652379e-06, "loss": 0.4651, "step": 9363 }, { "epoch": 3.666065695233391, "grad_norm": 0.4947225613132439, "learning_rate": 4.621613317087382e-06, "loss": 0.45, "step": 9364 }, { "epoch": 3.6664608545319832, "grad_norm": 0.49964170799077484, "learning_rate": 4.6215304391899765e-06, "loss": 0.4368, "step": 9365 }, { "epoch": 3.6668560138305755, "grad_norm": 0.4640684797492653, "learning_rate": 4.621447552960488e-06, "loss": 0.4426, "step": 9366 }, { "epoch": 3.6672511731291677, "grad_norm": 0.493726848514641, "learning_rate": 4.621364658399241e-06, "loss": 0.4724, "step": 9367 }, { "epoch": 3.66764633242776, "grad_norm": 0.48809324691574657, "learning_rate": 4.621281755506562e-06, "loss": 0.4362, "step": 9368 }, { "epoch": 3.668041491726352, "grad_norm": 0.4939624266923956, "learning_rate": 4.621198844282777e-06, "loss": 0.4726, "step": 9369 }, { "epoch": 3.6684366510249444, "grad_norm": 0.49641926622094995, "learning_rate": 4.62111592472821e-06, "loss": 0.4717, "step": 9370 }, { "epoch": 3.6688318103235367, "grad_norm": 0.5050735506985684, "learning_rate": 4.6210329968431876e-06, "loss": 0.4674, "step": 9371 }, { "epoch": 3.669226969622129, "grad_norm": 0.4909447418334573, "learning_rate": 4.620950060628037e-06, "loss": 0.451, "step": 9372 }, { "epoch": 3.669622128920721, "grad_norm": 0.48904389952363275, "learning_rate": 4.620867116083081e-06, "loss": 0.4497, "step": 9373 }, { "epoch": 3.6700172882193134, "grad_norm": 0.5054031870535084, "learning_rate": 4.620784163208647e-06, "loss": 0.4945, "step": 9374 }, { "epoch": 3.6704124475179056, "grad_norm": 0.5433428790690614, "learning_rate": 4.6207012020050614e-06, "loss": 0.4533, "step": 9375 }, { "epoch": 3.670807606816498, "grad_norm": 0.5002618872303594, "learning_rate": 4.620618232472649e-06, "loss": 0.4489, "step": 9376 }, { "epoch": 3.67120276611509, "grad_norm": 0.48690513054549206, "learning_rate": 4.620535254611735e-06, "loss": 0.4678, "step": 9377 }, { "epoch": 3.6715979254136824, "grad_norm": 0.48402593380651104, "learning_rate": 4.6204522684226475e-06, "loss": 0.4506, "step": 9378 }, { "epoch": 3.6719930847122746, "grad_norm": 0.5103203925733566, "learning_rate": 4.620369273905711e-06, "loss": 0.4528, "step": 9379 }, { "epoch": 3.672388244010867, "grad_norm": 0.48564066203388767, "learning_rate": 4.620286271061251e-06, "loss": 0.4622, "step": 9380 }, { "epoch": 3.672783403309459, "grad_norm": 0.47915158304914873, "learning_rate": 4.620203259889593e-06, "loss": 0.4528, "step": 9381 }, { "epoch": 3.6731785626080513, "grad_norm": 0.4852717018202506, "learning_rate": 4.620120240391065e-06, "loss": 0.4716, "step": 9382 }, { "epoch": 3.6735737219066436, "grad_norm": 0.496307917359212, "learning_rate": 4.620037212565992e-06, "loss": 0.4636, "step": 9383 }, { "epoch": 3.673968881205236, "grad_norm": 0.49948922206920776, "learning_rate": 4.6199541764147e-06, "loss": 0.45, "step": 9384 }, { "epoch": 3.674364040503828, "grad_norm": 0.4906585809016417, "learning_rate": 4.619871131937516e-06, "loss": 0.4709, "step": 9385 }, { "epoch": 3.6747591998024203, "grad_norm": 0.49580349029874954, "learning_rate": 4.619788079134766e-06, "loss": 0.4575, "step": 9386 }, { "epoch": 3.6751543591010125, "grad_norm": 0.4877988171055439, "learning_rate": 4.619705018006775e-06, "loss": 0.4606, "step": 9387 }, { "epoch": 3.6755495183996048, "grad_norm": 0.47635905394834055, "learning_rate": 4.619621948553869e-06, "loss": 0.4598, "step": 9388 }, { "epoch": 3.675944677698197, "grad_norm": 0.4848704937642032, "learning_rate": 4.619538870776375e-06, "loss": 0.4531, "step": 9389 }, { "epoch": 3.6763398369967892, "grad_norm": 0.5132723431379773, "learning_rate": 4.61945578467462e-06, "loss": 0.4813, "step": 9390 }, { "epoch": 3.6767349962953815, "grad_norm": 0.49446717879197793, "learning_rate": 4.61937269024893e-06, "loss": 0.4732, "step": 9391 }, { "epoch": 3.6771301555939737, "grad_norm": 0.48783487200107034, "learning_rate": 4.619289587499631e-06, "loss": 0.4642, "step": 9392 }, { "epoch": 3.677525314892566, "grad_norm": 0.5095115851096903, "learning_rate": 4.619206476427049e-06, "loss": 0.456, "step": 9393 }, { "epoch": 3.677920474191158, "grad_norm": 0.4947934593811385, "learning_rate": 4.619123357031511e-06, "loss": 0.4721, "step": 9394 }, { "epoch": 3.6783156334897504, "grad_norm": 0.5075806473304344, "learning_rate": 4.619040229313343e-06, "loss": 0.4786, "step": 9395 }, { "epoch": 3.6787107927883427, "grad_norm": 0.5801842763429373, "learning_rate": 4.618957093272872e-06, "loss": 0.4517, "step": 9396 }, { "epoch": 3.679105952086935, "grad_norm": 0.5005276061566315, "learning_rate": 4.618873948910425e-06, "loss": 0.454, "step": 9397 }, { "epoch": 3.679501111385527, "grad_norm": 0.5031083933218373, "learning_rate": 4.618790796226327e-06, "loss": 0.4526, "step": 9398 }, { "epoch": 3.6798962706841194, "grad_norm": 0.4919801264259388, "learning_rate": 4.618707635220905e-06, "loss": 0.4524, "step": 9399 }, { "epoch": 3.6802914299827116, "grad_norm": 0.4978593733121662, "learning_rate": 4.6186244658944865e-06, "loss": 0.4633, "step": 9400 }, { "epoch": 3.680686589281304, "grad_norm": 0.4998560933361007, "learning_rate": 4.618541288247397e-06, "loss": 0.4571, "step": 9401 }, { "epoch": 3.681081748579896, "grad_norm": 0.4916373727411663, "learning_rate": 4.618458102279964e-06, "loss": 0.4585, "step": 9402 }, { "epoch": 3.6814769078784884, "grad_norm": 0.4888930129846756, "learning_rate": 4.6183749079925145e-06, "loss": 0.4678, "step": 9403 }, { "epoch": 3.6818720671770806, "grad_norm": 0.4956175383165202, "learning_rate": 4.618291705385374e-06, "loss": 0.4691, "step": 9404 }, { "epoch": 3.6822672264756733, "grad_norm": 0.5119254964059565, "learning_rate": 4.61820849445887e-06, "loss": 0.4503, "step": 9405 }, { "epoch": 3.6826623857742655, "grad_norm": 0.49166332352889675, "learning_rate": 4.61812527521333e-06, "loss": 0.4644, "step": 9406 }, { "epoch": 3.6830575450728578, "grad_norm": 0.4813292349115858, "learning_rate": 4.61804204764908e-06, "loss": 0.448, "step": 9407 }, { "epoch": 3.68345270437145, "grad_norm": 0.49544114801840133, "learning_rate": 4.6179588117664465e-06, "loss": 0.4531, "step": 9408 }, { "epoch": 3.6838478636700422, "grad_norm": 0.5062682853242432, "learning_rate": 4.6178755675657565e-06, "loss": 0.4658, "step": 9409 }, { "epoch": 3.6842430229686345, "grad_norm": 0.4885565367565571, "learning_rate": 4.617792315047338e-06, "loss": 0.4576, "step": 9410 }, { "epoch": 3.6846381822672267, "grad_norm": 0.4927003220978129, "learning_rate": 4.6177090542115176e-06, "loss": 0.4545, "step": 9411 }, { "epoch": 3.685033341565819, "grad_norm": 0.5342756099499717, "learning_rate": 4.617625785058622e-06, "loss": 0.4444, "step": 9412 }, { "epoch": 3.685428500864411, "grad_norm": 0.5191362354914825, "learning_rate": 4.617542507588977e-06, "loss": 0.4632, "step": 9413 }, { "epoch": 3.6858236601630034, "grad_norm": 0.5005797890017317, "learning_rate": 4.6174592218029115e-06, "loss": 0.4565, "step": 9414 }, { "epoch": 3.6862188194615957, "grad_norm": 0.5115586690359692, "learning_rate": 4.617375927700752e-06, "loss": 0.4716, "step": 9415 }, { "epoch": 3.686613978760188, "grad_norm": 0.5168058976627257, "learning_rate": 4.617292625282826e-06, "loss": 0.4794, "step": 9416 }, { "epoch": 3.68700913805878, "grad_norm": 0.5038401934419339, "learning_rate": 4.617209314549459e-06, "loss": 0.4795, "step": 9417 }, { "epoch": 3.6874042973573724, "grad_norm": 0.49671493204697803, "learning_rate": 4.617125995500981e-06, "loss": 0.4742, "step": 9418 }, { "epoch": 3.6877994566559646, "grad_norm": 0.5245802827596923, "learning_rate": 4.617042668137717e-06, "loss": 0.4655, "step": 9419 }, { "epoch": 3.688194615954557, "grad_norm": 0.49027669847543004, "learning_rate": 4.616959332459995e-06, "loss": 0.4685, "step": 9420 }, { "epoch": 3.688589775253149, "grad_norm": 0.5318803917463871, "learning_rate": 4.616875988468142e-06, "loss": 0.4566, "step": 9421 }, { "epoch": 3.6889849345517414, "grad_norm": 0.47512179395486337, "learning_rate": 4.616792636162486e-06, "loss": 0.4508, "step": 9422 }, { "epoch": 3.6893800938503336, "grad_norm": 0.4995389465446858, "learning_rate": 4.616709275543353e-06, "loss": 0.4573, "step": 9423 }, { "epoch": 3.689775253148926, "grad_norm": 0.4952456922866283, "learning_rate": 4.616625906611072e-06, "loss": 0.4449, "step": 9424 }, { "epoch": 3.690170412447518, "grad_norm": 0.4681009974875698, "learning_rate": 4.61654252936597e-06, "loss": 0.4443, "step": 9425 }, { "epoch": 3.6905655717461103, "grad_norm": 0.49698293961782763, "learning_rate": 4.616459143808374e-06, "loss": 0.4504, "step": 9426 }, { "epoch": 3.6909607310447026, "grad_norm": 0.4799614987205371, "learning_rate": 4.616375749938612e-06, "loss": 0.4827, "step": 9427 }, { "epoch": 3.691355890343295, "grad_norm": 0.4935543789709199, "learning_rate": 4.61629234775701e-06, "loss": 0.454, "step": 9428 }, { "epoch": 3.691751049641887, "grad_norm": 0.48312786289813203, "learning_rate": 4.616208937263897e-06, "loss": 0.4618, "step": 9429 }, { "epoch": 3.6921462089404793, "grad_norm": 0.48779193972998197, "learning_rate": 4.616125518459601e-06, "loss": 0.4664, "step": 9430 }, { "epoch": 3.6925413682390715, "grad_norm": 0.5102169263992223, "learning_rate": 4.616042091344449e-06, "loss": 0.4528, "step": 9431 }, { "epoch": 3.6929365275376638, "grad_norm": 0.5028415523931336, "learning_rate": 4.615958655918768e-06, "loss": 0.4761, "step": 9432 }, { "epoch": 3.693331686836256, "grad_norm": 0.49065489093573394, "learning_rate": 4.615875212182887e-06, "loss": 0.4392, "step": 9433 }, { "epoch": 3.6937268461348483, "grad_norm": 0.49732637769698324, "learning_rate": 4.615791760137133e-06, "loss": 0.4633, "step": 9434 }, { "epoch": 3.6941220054334405, "grad_norm": 0.49238892823032243, "learning_rate": 4.615708299781833e-06, "loss": 0.4622, "step": 9435 }, { "epoch": 3.6945171647320327, "grad_norm": 0.49343740327676344, "learning_rate": 4.615624831117316e-06, "loss": 0.4568, "step": 9436 }, { "epoch": 3.694912324030625, "grad_norm": 0.4861385224097656, "learning_rate": 4.615541354143908e-06, "loss": 0.4503, "step": 9437 }, { "epoch": 3.695307483329217, "grad_norm": 0.497943873992621, "learning_rate": 4.61545786886194e-06, "loss": 0.4495, "step": 9438 }, { "epoch": 3.6957026426278095, "grad_norm": 0.48616932102253096, "learning_rate": 4.615374375271738e-06, "loss": 0.476, "step": 9439 }, { "epoch": 3.6960978019264017, "grad_norm": 0.49036991811170894, "learning_rate": 4.615290873373629e-06, "loss": 0.4574, "step": 9440 }, { "epoch": 3.696492961224994, "grad_norm": 0.4874642622968561, "learning_rate": 4.615207363167943e-06, "loss": 0.4534, "step": 9441 }, { "epoch": 3.696888120523586, "grad_norm": 0.4874523989618741, "learning_rate": 4.615123844655006e-06, "loss": 0.4669, "step": 9442 }, { "epoch": 3.6972832798221784, "grad_norm": 0.4893447874095786, "learning_rate": 4.615040317835147e-06, "loss": 0.4441, "step": 9443 }, { "epoch": 3.6976784391207707, "grad_norm": 0.5035498076080915, "learning_rate": 4.614956782708694e-06, "loss": 0.4679, "step": 9444 }, { "epoch": 3.698073598419363, "grad_norm": 0.4973304541825886, "learning_rate": 4.614873239275976e-06, "loss": 0.4513, "step": 9445 }, { "epoch": 3.698468757717955, "grad_norm": 0.5653732922507139, "learning_rate": 4.6147896875373185e-06, "loss": 0.4583, "step": 9446 }, { "epoch": 3.6988639170165474, "grad_norm": 0.47730565163897437, "learning_rate": 4.614706127493052e-06, "loss": 0.4706, "step": 9447 }, { "epoch": 3.6992590763151396, "grad_norm": 0.4900913046606925, "learning_rate": 4.614622559143504e-06, "loss": 0.4713, "step": 9448 }, { "epoch": 3.699654235613732, "grad_norm": 0.4807624939670291, "learning_rate": 4.614538982489003e-06, "loss": 0.462, "step": 9449 }, { "epoch": 3.700049394912324, "grad_norm": 0.5255103013851058, "learning_rate": 4.614455397529876e-06, "loss": 0.4596, "step": 9450 }, { "epoch": 3.7004445542109163, "grad_norm": 0.49339176253468786, "learning_rate": 4.614371804266453e-06, "loss": 0.4446, "step": 9451 }, { "epoch": 3.7008397135095086, "grad_norm": 0.479984900218154, "learning_rate": 4.614288202699061e-06, "loss": 0.4539, "step": 9452 }, { "epoch": 3.701234872808101, "grad_norm": 0.4906333419555256, "learning_rate": 4.6142045928280284e-06, "loss": 0.4602, "step": 9453 }, { "epoch": 3.701630032106693, "grad_norm": 0.47786003426235846, "learning_rate": 4.6141209746536855e-06, "loss": 0.451, "step": 9454 }, { "epoch": 3.7020251914052853, "grad_norm": 0.505494984357728, "learning_rate": 4.614037348176358e-06, "loss": 0.4777, "step": 9455 }, { "epoch": 3.7024203507038775, "grad_norm": 0.5053597345252311, "learning_rate": 4.613953713396376e-06, "loss": 0.4582, "step": 9456 }, { "epoch": 3.70281551000247, "grad_norm": 0.5125562819742167, "learning_rate": 4.613870070314067e-06, "loss": 0.4626, "step": 9457 }, { "epoch": 3.703210669301062, "grad_norm": 0.5017156061662159, "learning_rate": 4.6137864189297595e-06, "loss": 0.451, "step": 9458 }, { "epoch": 3.7036058285996543, "grad_norm": 0.47667609480628925, "learning_rate": 4.613702759243784e-06, "loss": 0.4517, "step": 9459 }, { "epoch": 3.7040009878982465, "grad_norm": 0.4825374845237388, "learning_rate": 4.613619091256466e-06, "loss": 0.4474, "step": 9460 }, { "epoch": 3.7043961471968387, "grad_norm": 0.47835338941796834, "learning_rate": 4.6135354149681365e-06, "loss": 0.4469, "step": 9461 }, { "epoch": 3.704791306495431, "grad_norm": 0.9294106269951347, "learning_rate": 4.6134517303791235e-06, "loss": 0.4607, "step": 9462 }, { "epoch": 3.705186465794023, "grad_norm": 0.4930233385165018, "learning_rate": 4.613368037489756e-06, "loss": 0.4506, "step": 9463 }, { "epoch": 3.7055816250926155, "grad_norm": 0.49884851479906, "learning_rate": 4.613284336300361e-06, "loss": 0.4536, "step": 9464 }, { "epoch": 3.7059767843912077, "grad_norm": 0.47765620112543045, "learning_rate": 4.613200626811268e-06, "loss": 0.4554, "step": 9465 }, { "epoch": 3.7063719436898, "grad_norm": 0.49830740900398934, "learning_rate": 4.613116909022807e-06, "loss": 0.4485, "step": 9466 }, { "epoch": 3.706767102988392, "grad_norm": 0.5144490254440631, "learning_rate": 4.613033182935306e-06, "loss": 0.4664, "step": 9467 }, { "epoch": 3.7071622622869844, "grad_norm": 0.5067754911121317, "learning_rate": 4.6129494485490935e-06, "loss": 0.4505, "step": 9468 }, { "epoch": 3.7075574215855767, "grad_norm": 0.49318464717590493, "learning_rate": 4.612865705864499e-06, "loss": 0.4718, "step": 9469 }, { "epoch": 3.707952580884169, "grad_norm": 0.5308810953951024, "learning_rate": 4.612781954881851e-06, "loss": 0.4685, "step": 9470 }, { "epoch": 3.708347740182761, "grad_norm": 0.47912651709226944, "learning_rate": 4.6126981956014775e-06, "loss": 0.4418, "step": 9471 }, { "epoch": 3.7087428994813534, "grad_norm": 0.48759890811022727, "learning_rate": 4.612614428023709e-06, "loss": 0.451, "step": 9472 }, { "epoch": 3.7091380587799456, "grad_norm": 0.5022546921872667, "learning_rate": 4.612530652148875e-06, "loss": 0.4441, "step": 9473 }, { "epoch": 3.709533218078538, "grad_norm": 0.48883903463677697, "learning_rate": 4.6124468679773015e-06, "loss": 0.44, "step": 9474 }, { "epoch": 3.70992837737713, "grad_norm": 0.4925590816151284, "learning_rate": 4.61236307550932e-06, "loss": 0.4625, "step": 9475 }, { "epoch": 3.7103235366757223, "grad_norm": 0.5102359588145222, "learning_rate": 4.612279274745259e-06, "loss": 0.4532, "step": 9476 }, { "epoch": 3.7107186959743146, "grad_norm": 0.5016564703231445, "learning_rate": 4.612195465685448e-06, "loss": 0.4611, "step": 9477 }, { "epoch": 3.711113855272907, "grad_norm": 0.4985293696763766, "learning_rate": 4.612111648330216e-06, "loss": 0.4617, "step": 9478 }, { "epoch": 3.711509014571499, "grad_norm": 0.49461850359770454, "learning_rate": 4.612027822679892e-06, "loss": 0.4549, "step": 9479 }, { "epoch": 3.7119041738700913, "grad_norm": 0.4879647278277727, "learning_rate": 4.611943988734805e-06, "loss": 0.4686, "step": 9480 }, { "epoch": 3.7122993331686835, "grad_norm": 0.502142235265202, "learning_rate": 4.611860146495284e-06, "loss": 0.468, "step": 9481 }, { "epoch": 3.712694492467276, "grad_norm": 0.5078297162937819, "learning_rate": 4.611776295961659e-06, "loss": 0.4844, "step": 9482 }, { "epoch": 3.713089651765868, "grad_norm": 0.6651109373307685, "learning_rate": 4.611692437134259e-06, "loss": 0.4576, "step": 9483 }, { "epoch": 3.7134848110644603, "grad_norm": 0.48181360567693204, "learning_rate": 4.611608570013414e-06, "loss": 0.4633, "step": 9484 }, { "epoch": 3.7138799703630525, "grad_norm": 0.48089336396003285, "learning_rate": 4.611524694599452e-06, "loss": 0.4548, "step": 9485 }, { "epoch": 3.7142751296616447, "grad_norm": 0.5565739216162738, "learning_rate": 4.611440810892703e-06, "loss": 0.4592, "step": 9486 }, { "epoch": 3.714670288960237, "grad_norm": 0.5089387807662913, "learning_rate": 4.611356918893497e-06, "loss": 0.4673, "step": 9487 }, { "epoch": 3.7150654482588292, "grad_norm": 0.4849507219283539, "learning_rate": 4.611273018602164e-06, "loss": 0.4579, "step": 9488 }, { "epoch": 3.7154606075574215, "grad_norm": 0.4957394966619531, "learning_rate": 4.611189110019032e-06, "loss": 0.4722, "step": 9489 }, { "epoch": 3.7158557668560137, "grad_norm": 0.4907758996231714, "learning_rate": 4.6111051931444304e-06, "loss": 0.4597, "step": 9490 }, { "epoch": 3.716250926154606, "grad_norm": 0.5101532752645882, "learning_rate": 4.61102126797869e-06, "loss": 0.4828, "step": 9491 }, { "epoch": 3.716646085453198, "grad_norm": 0.5008474308167519, "learning_rate": 4.610937334522141e-06, "loss": 0.4722, "step": 9492 }, { "epoch": 3.7170412447517904, "grad_norm": 0.49062418438118194, "learning_rate": 4.610853392775111e-06, "loss": 0.4542, "step": 9493 }, { "epoch": 3.7174364040503827, "grad_norm": 0.5014221426388613, "learning_rate": 4.61076944273793e-06, "loss": 0.4843, "step": 9494 }, { "epoch": 3.717831563348975, "grad_norm": 0.4915499915118497, "learning_rate": 4.61068548441093e-06, "loss": 0.4587, "step": 9495 }, { "epoch": 3.718226722647567, "grad_norm": 0.5132337480297305, "learning_rate": 4.610601517794437e-06, "loss": 0.4743, "step": 9496 }, { "epoch": 3.7186218819461594, "grad_norm": 0.5016517224129324, "learning_rate": 4.610517542888785e-06, "loss": 0.462, "step": 9497 }, { "epoch": 3.7190170412447516, "grad_norm": 0.5060950647207751, "learning_rate": 4.6104335596943004e-06, "loss": 0.4641, "step": 9498 }, { "epoch": 3.719412200543344, "grad_norm": 0.5062740065120886, "learning_rate": 4.610349568211314e-06, "loss": 0.4592, "step": 9499 }, { "epoch": 3.719807359841936, "grad_norm": 0.4930087960545402, "learning_rate": 4.6102655684401575e-06, "loss": 0.47, "step": 9500 }, { "epoch": 3.7202025191405284, "grad_norm": 0.4898865945969133, "learning_rate": 4.6101815603811576e-06, "loss": 0.4437, "step": 9501 }, { "epoch": 3.7205976784391206, "grad_norm": 0.4795656726959586, "learning_rate": 4.610097544034647e-06, "loss": 0.4453, "step": 9502 }, { "epoch": 3.720992837737713, "grad_norm": 0.49426700494295145, "learning_rate": 4.610013519400954e-06, "loss": 0.4864, "step": 9503 }, { "epoch": 3.721387997036305, "grad_norm": 0.4887121669736692, "learning_rate": 4.609929486480409e-06, "loss": 0.4573, "step": 9504 }, { "epoch": 3.7217831563348973, "grad_norm": 0.4905352215891702, "learning_rate": 4.609845445273343e-06, "loss": 0.4582, "step": 9505 }, { "epoch": 3.7221783156334896, "grad_norm": 0.5105211554553892, "learning_rate": 4.6097613957800845e-06, "loss": 0.4693, "step": 9506 }, { "epoch": 3.722573474932082, "grad_norm": 0.4987264748579873, "learning_rate": 4.6096773380009655e-06, "loss": 0.4679, "step": 9507 }, { "epoch": 3.722968634230674, "grad_norm": 0.49566139627836764, "learning_rate": 4.609593271936313e-06, "loss": 0.4483, "step": 9508 }, { "epoch": 3.7233637935292663, "grad_norm": 0.49187996336706025, "learning_rate": 4.609509197586461e-06, "loss": 0.467, "step": 9509 }, { "epoch": 3.7237589528278585, "grad_norm": 0.4820091496779555, "learning_rate": 4.609425114951737e-06, "loss": 0.4536, "step": 9510 }, { "epoch": 3.7241541121264508, "grad_norm": 0.5021340173430089, "learning_rate": 4.609341024032472e-06, "loss": 0.4824, "step": 9511 }, { "epoch": 3.724549271425043, "grad_norm": 0.49803869540293166, "learning_rate": 4.609256924828997e-06, "loss": 0.4547, "step": 9512 }, { "epoch": 3.7249444307236352, "grad_norm": 0.5128440482147195, "learning_rate": 4.60917281734164e-06, "loss": 0.4608, "step": 9513 }, { "epoch": 3.7253395900222275, "grad_norm": 0.4913277569926873, "learning_rate": 4.609088701570735e-06, "loss": 0.4537, "step": 9514 }, { "epoch": 3.7257347493208197, "grad_norm": 0.495974924098836, "learning_rate": 4.609004577516609e-06, "loss": 0.4734, "step": 9515 }, { "epoch": 3.726129908619412, "grad_norm": 0.49140903339070413, "learning_rate": 4.608920445179594e-06, "loss": 0.4442, "step": 9516 }, { "epoch": 3.726525067918004, "grad_norm": 0.5217558182771603, "learning_rate": 4.60883630456002e-06, "loss": 0.4896, "step": 9517 }, { "epoch": 3.7269202272165964, "grad_norm": 0.49384622356957825, "learning_rate": 4.608752155658218e-06, "loss": 0.4412, "step": 9518 }, { "epoch": 3.7273153865151887, "grad_norm": 0.4857726615455569, "learning_rate": 4.608667998474518e-06, "loss": 0.4497, "step": 9519 }, { "epoch": 3.727710545813781, "grad_norm": 0.4963906295933952, "learning_rate": 4.60858383300925e-06, "loss": 0.4613, "step": 9520 }, { "epoch": 3.728105705112373, "grad_norm": 0.4838697905450302, "learning_rate": 4.608499659262745e-06, "loss": 0.4584, "step": 9521 }, { "epoch": 3.7285008644109654, "grad_norm": 0.48800127051436243, "learning_rate": 4.608415477235334e-06, "loss": 0.4589, "step": 9522 }, { "epoch": 3.7288960237095576, "grad_norm": 0.48246388572291404, "learning_rate": 4.6083312869273475e-06, "loss": 0.451, "step": 9523 }, { "epoch": 3.7292911830081503, "grad_norm": 0.4820982897305016, "learning_rate": 4.608247088339116e-06, "loss": 0.4481, "step": 9524 }, { "epoch": 3.7296863423067426, "grad_norm": 0.4782123289577348, "learning_rate": 4.60816288147097e-06, "loss": 0.4613, "step": 9525 }, { "epoch": 3.730081501605335, "grad_norm": 0.4893516657625509, "learning_rate": 4.60807866632324e-06, "loss": 0.4555, "step": 9526 }, { "epoch": 3.730476660903927, "grad_norm": 0.489071023151812, "learning_rate": 4.607994442896257e-06, "loss": 0.4623, "step": 9527 }, { "epoch": 3.7308718202025193, "grad_norm": 0.49687366002746247, "learning_rate": 4.607910211190353e-06, "loss": 0.4398, "step": 9528 }, { "epoch": 3.7312669795011115, "grad_norm": 0.47881395393697335, "learning_rate": 4.607825971205857e-06, "loss": 0.4469, "step": 9529 }, { "epoch": 3.7316621387997038, "grad_norm": 0.5130814385073522, "learning_rate": 4.6077417229430995e-06, "loss": 0.4628, "step": 9530 }, { "epoch": 3.732057298098296, "grad_norm": 0.48593565257627325, "learning_rate": 4.607657466402414e-06, "loss": 0.4533, "step": 9531 }, { "epoch": 3.7324524573968882, "grad_norm": 0.4875631077933674, "learning_rate": 4.607573201584129e-06, "loss": 0.4579, "step": 9532 }, { "epoch": 3.7328476166954805, "grad_norm": 0.4957260704045972, "learning_rate": 4.607488928488576e-06, "loss": 0.4563, "step": 9533 }, { "epoch": 3.7332427759940727, "grad_norm": 0.4860278696039461, "learning_rate": 4.607404647116087e-06, "loss": 0.4664, "step": 9534 }, { "epoch": 3.733637935292665, "grad_norm": 0.4845068061589555, "learning_rate": 4.607320357466992e-06, "loss": 0.4614, "step": 9535 }, { "epoch": 3.734033094591257, "grad_norm": 0.4786694984990013, "learning_rate": 4.607236059541622e-06, "loss": 0.4601, "step": 9536 }, { "epoch": 3.7344282538898494, "grad_norm": 0.5088132150023401, "learning_rate": 4.6071517533403085e-06, "loss": 0.4538, "step": 9537 }, { "epoch": 3.7348234131884417, "grad_norm": 0.4908292637789205, "learning_rate": 4.6070674388633825e-06, "loss": 0.4686, "step": 9538 }, { "epoch": 3.735218572487034, "grad_norm": 0.5142726426805816, "learning_rate": 4.606983116111175e-06, "loss": 0.4719, "step": 9539 }, { "epoch": 3.735613731785626, "grad_norm": 0.49016313325540406, "learning_rate": 4.606898785084017e-06, "loss": 0.4535, "step": 9540 }, { "epoch": 3.7360088910842184, "grad_norm": 0.49285245557129015, "learning_rate": 4.60681444578224e-06, "loss": 0.4645, "step": 9541 }, { "epoch": 3.7364040503828106, "grad_norm": 0.4839808550700243, "learning_rate": 4.6067300982061754e-06, "loss": 0.4491, "step": 9542 }, { "epoch": 3.736799209681403, "grad_norm": 0.484570336078592, "learning_rate": 4.606645742356155e-06, "loss": 0.4594, "step": 9543 }, { "epoch": 3.737194368979995, "grad_norm": 0.48518657604346466, "learning_rate": 4.606561378232508e-06, "loss": 0.4701, "step": 9544 }, { "epoch": 3.7375895282785874, "grad_norm": 0.48628553075992975, "learning_rate": 4.606477005835568e-06, "loss": 0.4533, "step": 9545 }, { "epoch": 3.7379846875771796, "grad_norm": 0.5123338585282978, "learning_rate": 4.6063926251656656e-06, "loss": 0.5095, "step": 9546 }, { "epoch": 3.738379846875772, "grad_norm": 0.5102529842049373, "learning_rate": 4.6063082362231306e-06, "loss": 0.4866, "step": 9547 }, { "epoch": 3.738775006174364, "grad_norm": 0.4795300343781412, "learning_rate": 4.606223839008297e-06, "loss": 0.4565, "step": 9548 }, { "epoch": 3.7391701654729563, "grad_norm": 0.500458305229408, "learning_rate": 4.6061394335214945e-06, "loss": 0.4546, "step": 9549 }, { "epoch": 3.7395653247715486, "grad_norm": 0.49447185079146605, "learning_rate": 4.606055019763056e-06, "loss": 0.4711, "step": 9550 }, { "epoch": 3.739960484070141, "grad_norm": 0.4837507662358641, "learning_rate": 4.6059705977333116e-06, "loss": 0.4492, "step": 9551 }, { "epoch": 3.740355643368733, "grad_norm": 0.6482460092048694, "learning_rate": 4.605886167432595e-06, "loss": 0.4591, "step": 9552 }, { "epoch": 3.7407508026673253, "grad_norm": 0.48576022911474664, "learning_rate": 4.605801728861235e-06, "loss": 0.4561, "step": 9553 }, { "epoch": 3.7411459619659175, "grad_norm": 0.4901974973756972, "learning_rate": 4.6057172820195635e-06, "loss": 0.4713, "step": 9554 }, { "epoch": 3.7415411212645098, "grad_norm": 0.49639360986444836, "learning_rate": 4.605632826907915e-06, "loss": 0.4759, "step": 9555 }, { "epoch": 3.741936280563102, "grad_norm": 0.4931648478189971, "learning_rate": 4.605548363526619e-06, "loss": 0.4587, "step": 9556 }, { "epoch": 3.7423314398616943, "grad_norm": 0.4688955095749135, "learning_rate": 4.605463891876006e-06, "loss": 0.4648, "step": 9557 }, { "epoch": 3.7427265991602865, "grad_norm": 0.49745821587087424, "learning_rate": 4.605379411956411e-06, "loss": 0.4603, "step": 9558 }, { "epoch": 3.7431217584588787, "grad_norm": 0.46605724180238206, "learning_rate": 4.605294923768164e-06, "loss": 0.4536, "step": 9559 }, { "epoch": 3.743516917757471, "grad_norm": 0.48666332590044686, "learning_rate": 4.605210427311596e-06, "loss": 0.4794, "step": 9560 }, { "epoch": 3.743912077056063, "grad_norm": 0.49885552329714344, "learning_rate": 4.605125922587041e-06, "loss": 0.451, "step": 9561 }, { "epoch": 3.7443072363546555, "grad_norm": 0.5000142082417693, "learning_rate": 4.6050414095948294e-06, "loss": 0.4529, "step": 9562 }, { "epoch": 3.7447023956532477, "grad_norm": 0.4862982293683471, "learning_rate": 4.604956888335292e-06, "loss": 0.4614, "step": 9563 }, { "epoch": 3.74509755495184, "grad_norm": 0.48972690615641834, "learning_rate": 4.604872358808764e-06, "loss": 0.4696, "step": 9564 }, { "epoch": 3.745492714250432, "grad_norm": 0.48344315601917986, "learning_rate": 4.604787821015575e-06, "loss": 0.4618, "step": 9565 }, { "epoch": 3.7458878735490244, "grad_norm": 0.501362692935816, "learning_rate": 4.604703274956057e-06, "loss": 0.4465, "step": 9566 }, { "epoch": 3.7462830328476167, "grad_norm": 0.4869221765616658, "learning_rate": 4.604618720630542e-06, "loss": 0.4519, "step": 9567 }, { "epoch": 3.746678192146209, "grad_norm": 0.49044552780260375, "learning_rate": 4.604534158039364e-06, "loss": 0.4575, "step": 9568 }, { "epoch": 3.747073351444801, "grad_norm": 0.5337351622614847, "learning_rate": 4.604449587182854e-06, "loss": 0.4757, "step": 9569 }, { "epoch": 3.7474685107433934, "grad_norm": 0.5142889870437418, "learning_rate": 4.6043650080613434e-06, "loss": 0.4368, "step": 9570 }, { "epoch": 3.7478636700419856, "grad_norm": 0.5002069423060246, "learning_rate": 4.604280420675165e-06, "loss": 0.4662, "step": 9571 }, { "epoch": 3.748258829340578, "grad_norm": 0.5012425468189804, "learning_rate": 4.60419582502465e-06, "loss": 0.4689, "step": 9572 }, { "epoch": 3.74865398863917, "grad_norm": 0.48744682141535584, "learning_rate": 4.6041112211101325e-06, "loss": 0.4783, "step": 9573 }, { "epoch": 3.7490491479377623, "grad_norm": 0.49788598446759974, "learning_rate": 4.604026608931943e-06, "loss": 0.4668, "step": 9574 }, { "epoch": 3.7494443072363546, "grad_norm": 0.4964539038735511, "learning_rate": 4.6039419884904155e-06, "loss": 0.4768, "step": 9575 }, { "epoch": 3.749839466534947, "grad_norm": 0.5034149134145032, "learning_rate": 4.603857359785881e-06, "loss": 0.4654, "step": 9576 }, { "epoch": 3.750234625833539, "grad_norm": 0.5102791225039909, "learning_rate": 4.6037727228186715e-06, "loss": 0.4767, "step": 9577 }, { "epoch": 3.7506297851321313, "grad_norm": 0.49641266816152013, "learning_rate": 4.603688077589121e-06, "loss": 0.4692, "step": 9578 }, { "epoch": 3.7510249444307235, "grad_norm": 0.5170425495731209, "learning_rate": 4.603603424097561e-06, "loss": 0.4437, "step": 9579 }, { "epoch": 3.751420103729316, "grad_norm": 0.4903857984331723, "learning_rate": 4.603518762344325e-06, "loss": 0.4665, "step": 9580 }, { "epoch": 3.751815263027908, "grad_norm": 0.49410476736023734, "learning_rate": 4.603434092329743e-06, "loss": 0.4648, "step": 9581 }, { "epoch": 3.7522104223265003, "grad_norm": 0.47620798317913865, "learning_rate": 4.603349414054151e-06, "loss": 0.4558, "step": 9582 }, { "epoch": 3.7526055816250925, "grad_norm": 0.4909803148027834, "learning_rate": 4.603264727517879e-06, "loss": 0.4728, "step": 9583 }, { "epoch": 3.7530007409236847, "grad_norm": 0.4841546764667915, "learning_rate": 4.60318003272126e-06, "loss": 0.4384, "step": 9584 }, { "epoch": 3.753395900222277, "grad_norm": 0.49138074599478704, "learning_rate": 4.603095329664627e-06, "loss": 0.4509, "step": 9585 }, { "epoch": 3.7537910595208692, "grad_norm": 0.4916572867617533, "learning_rate": 4.6030106183483135e-06, "loss": 0.4493, "step": 9586 }, { "epoch": 3.7541862188194615, "grad_norm": 0.4867907743227322, "learning_rate": 4.60292589877265e-06, "loss": 0.4561, "step": 9587 }, { "epoch": 3.7545813781180537, "grad_norm": 0.5044721101451728, "learning_rate": 4.6028411709379715e-06, "loss": 0.4525, "step": 9588 }, { "epoch": 3.754976537416646, "grad_norm": 0.47514228642887263, "learning_rate": 4.602756434844609e-06, "loss": 0.4616, "step": 9589 }, { "epoch": 3.755371696715238, "grad_norm": 0.776026961750764, "learning_rate": 4.6026716904928965e-06, "loss": 0.4883, "step": 9590 }, { "epoch": 3.7557668560138304, "grad_norm": 0.4937700305585744, "learning_rate": 4.602586937883167e-06, "loss": 0.4647, "step": 9591 }, { "epoch": 3.7561620153124227, "grad_norm": 0.47957905951413793, "learning_rate": 4.6025021770157516e-06, "loss": 0.4446, "step": 9592 }, { "epoch": 3.756557174611015, "grad_norm": 0.5017133949581569, "learning_rate": 4.602417407890984e-06, "loss": 0.4884, "step": 9593 }, { "epoch": 3.756952333909607, "grad_norm": 0.4870056949571294, "learning_rate": 4.602332630509199e-06, "loss": 0.4602, "step": 9594 }, { "epoch": 3.7573474932082, "grad_norm": 0.5202279557981537, "learning_rate": 4.602247844870728e-06, "loss": 0.4776, "step": 9595 }, { "epoch": 3.757742652506792, "grad_norm": 0.4973526737448953, "learning_rate": 4.602163050975903e-06, "loss": 0.4423, "step": 9596 }, { "epoch": 3.7581378118053843, "grad_norm": 0.4983329516809982, "learning_rate": 4.602078248825058e-06, "loss": 0.4657, "step": 9597 }, { "epoch": 3.7585329711039765, "grad_norm": 0.49221414055030543, "learning_rate": 4.601993438418527e-06, "loss": 0.4544, "step": 9598 }, { "epoch": 3.758928130402569, "grad_norm": 0.4865035442026118, "learning_rate": 4.601908619756642e-06, "loss": 0.4511, "step": 9599 }, { "epoch": 3.759323289701161, "grad_norm": 0.48356478090463534, "learning_rate": 4.601823792839735e-06, "loss": 0.4738, "step": 9600 }, { "epoch": 3.7597184489997533, "grad_norm": 0.49314242592096336, "learning_rate": 4.601738957668142e-06, "loss": 0.463, "step": 9601 }, { "epoch": 3.7601136082983455, "grad_norm": 0.5078494446402765, "learning_rate": 4.601654114242194e-06, "loss": 0.4618, "step": 9602 }, { "epoch": 3.7605087675969378, "grad_norm": 0.5028843214675345, "learning_rate": 4.6015692625622255e-06, "loss": 0.4608, "step": 9603 }, { "epoch": 3.76090392689553, "grad_norm": 0.5008323117874007, "learning_rate": 4.601484402628569e-06, "loss": 0.4549, "step": 9604 }, { "epoch": 3.7612990861941222, "grad_norm": 0.4850454741183229, "learning_rate": 4.6013995344415565e-06, "loss": 0.4627, "step": 9605 }, { "epoch": 3.7616942454927145, "grad_norm": 0.48808279245655617, "learning_rate": 4.601314658001524e-06, "loss": 0.4535, "step": 9606 }, { "epoch": 3.7620894047913067, "grad_norm": 0.5066989763656677, "learning_rate": 4.601229773308802e-06, "loss": 0.4751, "step": 9607 }, { "epoch": 3.762484564089899, "grad_norm": 0.4805137326607607, "learning_rate": 4.601144880363726e-06, "loss": 0.4638, "step": 9608 }, { "epoch": 3.762879723388491, "grad_norm": 0.4922622961214075, "learning_rate": 4.60105997916663e-06, "loss": 0.4686, "step": 9609 }, { "epoch": 3.7632748826870834, "grad_norm": 0.4810705349543995, "learning_rate": 4.600975069717846e-06, "loss": 0.4461, "step": 9610 }, { "epoch": 3.7636700419856757, "grad_norm": 0.4773354945166677, "learning_rate": 4.6008901520177065e-06, "loss": 0.4516, "step": 9611 }, { "epoch": 3.764065201284268, "grad_norm": 0.49206349143578293, "learning_rate": 4.600805226066547e-06, "loss": 0.4485, "step": 9612 }, { "epoch": 3.76446036058286, "grad_norm": 0.4814701688954984, "learning_rate": 4.6007202918647e-06, "loss": 0.4489, "step": 9613 }, { "epoch": 3.7648555198814524, "grad_norm": 0.5022048385321567, "learning_rate": 4.600635349412499e-06, "loss": 0.4721, "step": 9614 }, { "epoch": 3.7652506791800446, "grad_norm": 0.48243788870808746, "learning_rate": 4.600550398710278e-06, "loss": 0.4464, "step": 9615 }, { "epoch": 3.765645838478637, "grad_norm": 0.48757340703408836, "learning_rate": 4.600465439758371e-06, "loss": 0.4703, "step": 9616 }, { "epoch": 3.766040997777229, "grad_norm": 0.48716159886857296, "learning_rate": 4.600380472557112e-06, "loss": 0.4519, "step": 9617 }, { "epoch": 3.7664361570758214, "grad_norm": 0.5069818045941086, "learning_rate": 4.600295497106833e-06, "loss": 0.4943, "step": 9618 }, { "epoch": 3.7668313163744136, "grad_norm": 0.5022666320922664, "learning_rate": 4.6002105134078675e-06, "loss": 0.4673, "step": 9619 }, { "epoch": 3.767226475673006, "grad_norm": 0.4807723287562549, "learning_rate": 4.600125521460552e-06, "loss": 0.4646, "step": 9620 }, { "epoch": 3.767621634971598, "grad_norm": 0.4912477141689686, "learning_rate": 4.600040521265219e-06, "loss": 0.4641, "step": 9621 }, { "epoch": 3.7680167942701903, "grad_norm": 0.49929865985825106, "learning_rate": 4.599955512822201e-06, "loss": 0.4823, "step": 9622 }, { "epoch": 3.7684119535687826, "grad_norm": 0.49230115169608873, "learning_rate": 4.599870496131833e-06, "loss": 0.4593, "step": 9623 }, { "epoch": 3.768807112867375, "grad_norm": 0.48105300330907896, "learning_rate": 4.59978547119445e-06, "loss": 0.444, "step": 9624 }, { "epoch": 3.769202272165967, "grad_norm": 0.4818330655568901, "learning_rate": 4.599700438010382e-06, "loss": 0.4473, "step": 9625 }, { "epoch": 3.7695974314645593, "grad_norm": 0.4963785455532329, "learning_rate": 4.599615396579968e-06, "loss": 0.4342, "step": 9626 }, { "epoch": 3.7699925907631515, "grad_norm": 0.4830399833200486, "learning_rate": 4.5995303469035406e-06, "loss": 0.44, "step": 9627 }, { "epoch": 3.7703877500617438, "grad_norm": 0.47990178930501065, "learning_rate": 4.59944528898143e-06, "loss": 0.4504, "step": 9628 }, { "epoch": 3.770782909360336, "grad_norm": 0.4786217792852282, "learning_rate": 4.599360222813975e-06, "loss": 0.4661, "step": 9629 }, { "epoch": 3.7711780686589282, "grad_norm": 0.4805124331616531, "learning_rate": 4.599275148401507e-06, "loss": 0.4583, "step": 9630 }, { "epoch": 3.7715732279575205, "grad_norm": 0.4879125432986269, "learning_rate": 4.599190065744362e-06, "loss": 0.4528, "step": 9631 }, { "epoch": 3.7719683872561127, "grad_norm": 0.48314579798707913, "learning_rate": 4.5991049748428725e-06, "loss": 0.4537, "step": 9632 }, { "epoch": 3.772363546554705, "grad_norm": 0.4903701935863649, "learning_rate": 4.599019875697374e-06, "loss": 0.4512, "step": 9633 }, { "epoch": 3.772758705853297, "grad_norm": 0.5023398611383797, "learning_rate": 4.598934768308199e-06, "loss": 0.4546, "step": 9634 }, { "epoch": 3.7731538651518894, "grad_norm": 0.6910082967623805, "learning_rate": 4.598849652675683e-06, "loss": 0.454, "step": 9635 }, { "epoch": 3.7735490244504817, "grad_norm": 0.48924919480622175, "learning_rate": 4.598764528800161e-06, "loss": 0.4573, "step": 9636 }, { "epoch": 3.773944183749074, "grad_norm": 0.5032570900259695, "learning_rate": 4.598679396681964e-06, "loss": 0.4735, "step": 9637 }, { "epoch": 3.774339343047666, "grad_norm": 0.4884216635553054, "learning_rate": 4.598594256321431e-06, "loss": 0.4593, "step": 9638 }, { "epoch": 3.7747345023462584, "grad_norm": 0.4839339436338744, "learning_rate": 4.598509107718894e-06, "loss": 0.4734, "step": 9639 }, { "epoch": 3.7751296616448506, "grad_norm": 0.48156978064749584, "learning_rate": 4.598423950874687e-06, "loss": 0.4514, "step": 9640 }, { "epoch": 3.775524820943443, "grad_norm": 0.4866270246823189, "learning_rate": 4.598338785789144e-06, "loss": 0.4615, "step": 9641 }, { "epoch": 3.775919980242035, "grad_norm": 0.47743928665454505, "learning_rate": 4.598253612462601e-06, "loss": 0.4578, "step": 9642 }, { "epoch": 3.7763151395406274, "grad_norm": 0.47599434611222147, "learning_rate": 4.598168430895392e-06, "loss": 0.4495, "step": 9643 }, { "epoch": 3.7767102988392196, "grad_norm": 0.49548131853505706, "learning_rate": 4.598083241087852e-06, "loss": 0.4481, "step": 9644 }, { "epoch": 3.777105458137812, "grad_norm": 0.536021204426832, "learning_rate": 4.597998043040315e-06, "loss": 0.4548, "step": 9645 }, { "epoch": 3.777500617436404, "grad_norm": 0.49859452013864736, "learning_rate": 4.597912836753116e-06, "loss": 0.4472, "step": 9646 }, { "epoch": 3.7778957767349963, "grad_norm": 0.4904930266960114, "learning_rate": 4.597827622226588e-06, "loss": 0.4407, "step": 9647 }, { "epoch": 3.7782909360335886, "grad_norm": 0.5632944952584257, "learning_rate": 4.597742399461067e-06, "loss": 0.4677, "step": 9648 }, { "epoch": 3.778686095332181, "grad_norm": 0.4825786026169142, "learning_rate": 4.597657168456889e-06, "loss": 0.4711, "step": 9649 }, { "epoch": 3.779081254630773, "grad_norm": 0.48663207299892375, "learning_rate": 4.5975719292143865e-06, "loss": 0.4562, "step": 9650 }, { "epoch": 3.7794764139293653, "grad_norm": 0.48151353926840385, "learning_rate": 4.5974866817338955e-06, "loss": 0.4491, "step": 9651 }, { "epoch": 3.7798715732279575, "grad_norm": 0.490395399162099, "learning_rate": 4.597401426015751e-06, "loss": 0.4708, "step": 9652 }, { "epoch": 3.7802667325265498, "grad_norm": 0.48180099915518, "learning_rate": 4.597316162060287e-06, "loss": 0.4571, "step": 9653 }, { "epoch": 3.780661891825142, "grad_norm": 0.5093098515857098, "learning_rate": 4.597230889867837e-06, "loss": 0.4601, "step": 9654 }, { "epoch": 3.7810570511237342, "grad_norm": 0.5002845474985712, "learning_rate": 4.5971456094387395e-06, "loss": 0.4705, "step": 9655 }, { "epoch": 3.7814522104223265, "grad_norm": 0.49887974718155487, "learning_rate": 4.597060320773327e-06, "loss": 0.4552, "step": 9656 }, { "epoch": 3.7818473697209187, "grad_norm": 0.4844913686111668, "learning_rate": 4.596975023871935e-06, "loss": 0.4494, "step": 9657 }, { "epoch": 3.782242529019511, "grad_norm": 0.49008324798281255, "learning_rate": 4.596889718734898e-06, "loss": 0.4534, "step": 9658 }, { "epoch": 3.782637688318103, "grad_norm": 0.48905798875896944, "learning_rate": 4.5968044053625525e-06, "loss": 0.4652, "step": 9659 }, { "epoch": 3.7830328476166954, "grad_norm": 0.48024108850578784, "learning_rate": 4.596719083755231e-06, "loss": 0.4592, "step": 9660 }, { "epoch": 3.7834280069152877, "grad_norm": 0.4900301758693202, "learning_rate": 4.596633753913272e-06, "loss": 0.4565, "step": 9661 }, { "epoch": 3.78382316621388, "grad_norm": 0.4802953667319636, "learning_rate": 4.596548415837007e-06, "loss": 0.4543, "step": 9662 }, { "epoch": 3.784218325512472, "grad_norm": 0.4837608289095752, "learning_rate": 4.596463069526775e-06, "loss": 0.4546, "step": 9663 }, { "epoch": 3.7846134848110644, "grad_norm": 0.5182415469829378, "learning_rate": 4.596377714982907e-06, "loss": 0.4844, "step": 9664 }, { "epoch": 3.7850086441096567, "grad_norm": 0.4966888453091403, "learning_rate": 4.5962923522057415e-06, "loss": 0.4576, "step": 9665 }, { "epoch": 3.785403803408249, "grad_norm": 0.49753630366467044, "learning_rate": 4.596206981195611e-06, "loss": 0.468, "step": 9666 }, { "epoch": 3.785798962706841, "grad_norm": 0.48874836080231554, "learning_rate": 4.596121601952854e-06, "loss": 0.4405, "step": 9667 }, { "epoch": 3.7861941220054334, "grad_norm": 0.5076579580062619, "learning_rate": 4.596036214477804e-06, "loss": 0.467, "step": 9668 }, { "epoch": 3.7865892813040256, "grad_norm": 0.49135910265658483, "learning_rate": 4.595950818770796e-06, "loss": 0.4639, "step": 9669 }, { "epoch": 3.786984440602618, "grad_norm": 0.4840783957984401, "learning_rate": 4.595865414832166e-06, "loss": 0.4864, "step": 9670 }, { "epoch": 3.78737959990121, "grad_norm": 0.5059648003209108, "learning_rate": 4.59578000266225e-06, "loss": 0.4623, "step": 9671 }, { "epoch": 3.7877747591998023, "grad_norm": 0.5155562419723234, "learning_rate": 4.595694582261382e-06, "loss": 0.4552, "step": 9672 }, { "epoch": 3.7881699184983946, "grad_norm": 0.49286246868120626, "learning_rate": 4.595609153629899e-06, "loss": 0.4531, "step": 9673 }, { "epoch": 3.788565077796987, "grad_norm": 0.5030907783859865, "learning_rate": 4.5955237167681356e-06, "loss": 0.4809, "step": 9674 }, { "epoch": 3.788960237095579, "grad_norm": 0.48208063419912994, "learning_rate": 4.595438271676427e-06, "loss": 0.4621, "step": 9675 }, { "epoch": 3.7893553963941713, "grad_norm": 0.48623500793987223, "learning_rate": 4.595352818355109e-06, "loss": 0.4628, "step": 9676 }, { "epoch": 3.7897505556927635, "grad_norm": 0.5114092445947878, "learning_rate": 4.595267356804518e-06, "loss": 0.4678, "step": 9677 }, { "epoch": 3.7901457149913558, "grad_norm": 0.4776370080310583, "learning_rate": 4.595181887024989e-06, "loss": 0.4466, "step": 9678 }, { "epoch": 3.790540874289948, "grad_norm": 0.5818700802644523, "learning_rate": 4.595096409016858e-06, "loss": 0.4536, "step": 9679 }, { "epoch": 3.7909360335885403, "grad_norm": 0.4740846191844771, "learning_rate": 4.595010922780461e-06, "loss": 0.4419, "step": 9680 }, { "epoch": 3.7913311928871325, "grad_norm": 0.49737461925124105, "learning_rate": 4.594925428316132e-06, "loss": 0.458, "step": 9681 }, { "epoch": 3.7917263521857247, "grad_norm": 0.5051660541685692, "learning_rate": 4.594839925624209e-06, "loss": 0.4409, "step": 9682 }, { "epoch": 3.792121511484317, "grad_norm": 0.4984405654390054, "learning_rate": 4.594754414705027e-06, "loss": 0.4665, "step": 9683 }, { "epoch": 3.792516670782909, "grad_norm": 0.48241664062480294, "learning_rate": 4.594668895558921e-06, "loss": 0.4474, "step": 9684 }, { "epoch": 3.7929118300815015, "grad_norm": 0.5191185844568438, "learning_rate": 4.594583368186228e-06, "loss": 0.4513, "step": 9685 }, { "epoch": 3.7933069893800937, "grad_norm": 0.48750409539612494, "learning_rate": 4.594497832587283e-06, "loss": 0.4593, "step": 9686 }, { "epoch": 3.793702148678686, "grad_norm": 0.5133154479299648, "learning_rate": 4.594412288762423e-06, "loss": 0.4735, "step": 9687 }, { "epoch": 3.794097307977278, "grad_norm": 0.47361385821885527, "learning_rate": 4.594326736711983e-06, "loss": 0.4412, "step": 9688 }, { "epoch": 3.7944924672758704, "grad_norm": 0.48210172755908104, "learning_rate": 4.5942411764362985e-06, "loss": 0.4547, "step": 9689 }, { "epoch": 3.7948876265744627, "grad_norm": 0.4960098779386097, "learning_rate": 4.5941556079357076e-06, "loss": 0.4625, "step": 9690 }, { "epoch": 3.795282785873055, "grad_norm": 0.4903029773758132, "learning_rate": 4.5940700312105444e-06, "loss": 0.4712, "step": 9691 }, { "epoch": 3.795677945171647, "grad_norm": 0.5136839009508477, "learning_rate": 4.593984446261146e-06, "loss": 0.473, "step": 9692 }, { "epoch": 3.7960731044702394, "grad_norm": 0.4747156730400875, "learning_rate": 4.5938988530878485e-06, "loss": 0.4611, "step": 9693 }, { "epoch": 3.7964682637688316, "grad_norm": 0.4870902649544296, "learning_rate": 4.5938132516909865e-06, "loss": 0.4524, "step": 9694 }, { "epoch": 3.796863423067424, "grad_norm": 0.5010946794796335, "learning_rate": 4.593727642070899e-06, "loss": 0.4849, "step": 9695 }, { "epoch": 3.797258582366016, "grad_norm": 0.49536194899113356, "learning_rate": 4.593642024227919e-06, "loss": 0.4797, "step": 9696 }, { "epoch": 3.7976537416646083, "grad_norm": 0.4930240021909693, "learning_rate": 4.593556398162386e-06, "loss": 0.4596, "step": 9697 }, { "epoch": 3.7980489009632006, "grad_norm": 0.4921928001724572, "learning_rate": 4.5934707638746344e-06, "loss": 0.4532, "step": 9698 }, { "epoch": 3.798444060261793, "grad_norm": 0.4973760960380266, "learning_rate": 4.5933851213650005e-06, "loss": 0.462, "step": 9699 }, { "epoch": 3.798839219560385, "grad_norm": 0.47361402891352083, "learning_rate": 4.593299470633821e-06, "loss": 0.4658, "step": 9700 }, { "epoch": 3.7992343788589773, "grad_norm": 0.48887488140182345, "learning_rate": 4.593213811681433e-06, "loss": 0.455, "step": 9701 }, { "epoch": 3.7996295381575695, "grad_norm": 0.4867609427093806, "learning_rate": 4.593128144508171e-06, "loss": 0.4464, "step": 9702 }, { "epoch": 3.800024697456162, "grad_norm": 0.5026991707874338, "learning_rate": 4.593042469114374e-06, "loss": 0.4899, "step": 9703 }, { "epoch": 3.800419856754754, "grad_norm": 0.47389953125387, "learning_rate": 4.592956785500376e-06, "loss": 0.4522, "step": 9704 }, { "epoch": 3.8008150160533463, "grad_norm": 0.49268631953734526, "learning_rate": 4.5928710936665156e-06, "loss": 0.4552, "step": 9705 }, { "epoch": 3.8012101753519385, "grad_norm": 0.4743740110089471, "learning_rate": 4.592785393613128e-06, "loss": 0.4472, "step": 9706 }, { "epoch": 3.8016053346505307, "grad_norm": 0.4888493962366472, "learning_rate": 4.59269968534055e-06, "loss": 0.4569, "step": 9707 }, { "epoch": 3.802000493949123, "grad_norm": 0.49012796067105, "learning_rate": 4.592613968849119e-06, "loss": 0.4644, "step": 9708 }, { "epoch": 3.8023956532477152, "grad_norm": 0.48032600347511467, "learning_rate": 4.592528244139171e-06, "loss": 0.4587, "step": 9709 }, { "epoch": 3.8027908125463075, "grad_norm": 0.49803327331966196, "learning_rate": 4.5924425112110425e-06, "loss": 0.4694, "step": 9710 }, { "epoch": 3.8031859718448997, "grad_norm": 0.4876381618700325, "learning_rate": 4.59235677006507e-06, "loss": 0.4651, "step": 9711 }, { "epoch": 3.803581131143492, "grad_norm": 0.4865814270109722, "learning_rate": 4.592271020701591e-06, "loss": 0.4516, "step": 9712 }, { "epoch": 3.8039762904420846, "grad_norm": 0.47309099910390273, "learning_rate": 4.592185263120942e-06, "loss": 0.4569, "step": 9713 }, { "epoch": 3.804371449740677, "grad_norm": 0.4828676962421864, "learning_rate": 4.592099497323459e-06, "loss": 0.4503, "step": 9714 }, { "epoch": 3.804766609039269, "grad_norm": 0.5044657636177922, "learning_rate": 4.592013723309481e-06, "loss": 0.4534, "step": 9715 }, { "epoch": 3.8051617683378613, "grad_norm": 0.4741720246957415, "learning_rate": 4.591927941079341e-06, "loss": 0.4707, "step": 9716 }, { "epoch": 3.8055569276364536, "grad_norm": 0.49614095314653145, "learning_rate": 4.591842150633381e-06, "loss": 0.4493, "step": 9717 }, { "epoch": 3.805952086935046, "grad_norm": 0.4882532495153286, "learning_rate": 4.5917563519719334e-06, "loss": 0.4653, "step": 9718 }, { "epoch": 3.806347246233638, "grad_norm": 0.4949527043726383, "learning_rate": 4.5916705450953384e-06, "loss": 0.4679, "step": 9719 }, { "epoch": 3.8067424055322303, "grad_norm": 0.5602238728690052, "learning_rate": 4.591584730003931e-06, "loss": 0.4557, "step": 9720 }, { "epoch": 3.8071375648308226, "grad_norm": 0.4912568517475018, "learning_rate": 4.591498906698048e-06, "loss": 0.4709, "step": 9721 }, { "epoch": 3.807532724129415, "grad_norm": 0.4813539895906584, "learning_rate": 4.591413075178029e-06, "loss": 0.4531, "step": 9722 }, { "epoch": 3.807927883428007, "grad_norm": 0.49909964708037335, "learning_rate": 4.591327235444209e-06, "loss": 0.4624, "step": 9723 }, { "epoch": 3.8083230427265993, "grad_norm": 0.4814723511010151, "learning_rate": 4.591241387496925e-06, "loss": 0.4705, "step": 9724 }, { "epoch": 3.8087182020251915, "grad_norm": 0.4827612230715431, "learning_rate": 4.591155531336514e-06, "loss": 0.4532, "step": 9725 }, { "epoch": 3.8091133613237838, "grad_norm": 0.4938055389724764, "learning_rate": 4.591069666963315e-06, "loss": 0.4554, "step": 9726 }, { "epoch": 3.809508520622376, "grad_norm": 0.4852708740829828, "learning_rate": 4.590983794377664e-06, "loss": 0.4645, "step": 9727 }, { "epoch": 3.8099036799209682, "grad_norm": 0.48556679669930314, "learning_rate": 4.590897913579898e-06, "loss": 0.4488, "step": 9728 }, { "epoch": 3.8102988392195605, "grad_norm": 0.48745710424116256, "learning_rate": 4.590812024570355e-06, "loss": 0.4391, "step": 9729 }, { "epoch": 3.8106939985181527, "grad_norm": 0.4941001569984601, "learning_rate": 4.590726127349372e-06, "loss": 0.4688, "step": 9730 }, { "epoch": 3.811089157816745, "grad_norm": 0.5066586936353458, "learning_rate": 4.5906402219172865e-06, "loss": 0.4672, "step": 9731 }, { "epoch": 3.811484317115337, "grad_norm": 0.49499770510893454, "learning_rate": 4.590554308274435e-06, "loss": 0.4665, "step": 9732 }, { "epoch": 3.8118794764139294, "grad_norm": 0.48385700939951193, "learning_rate": 4.5904683864211564e-06, "loss": 0.4287, "step": 9733 }, { "epoch": 3.8122746357125217, "grad_norm": 0.5006235625670952, "learning_rate": 4.590382456357787e-06, "loss": 0.4763, "step": 9734 }, { "epoch": 3.812669795011114, "grad_norm": 0.5063019446978001, "learning_rate": 4.5902965180846645e-06, "loss": 0.4849, "step": 9735 }, { "epoch": 3.813064954309706, "grad_norm": 0.47648185972596263, "learning_rate": 4.590210571602127e-06, "loss": 0.4518, "step": 9736 }, { "epoch": 3.8134601136082984, "grad_norm": 0.48236587628734506, "learning_rate": 4.590124616910511e-06, "loss": 0.4613, "step": 9737 }, { "epoch": 3.8138552729068906, "grad_norm": 0.4854824711766063, "learning_rate": 4.590038654010155e-06, "loss": 0.4611, "step": 9738 }, { "epoch": 3.814250432205483, "grad_norm": 0.47570054733499323, "learning_rate": 4.5899526829013966e-06, "loss": 0.4569, "step": 9739 }, { "epoch": 3.814645591504075, "grad_norm": 0.47994844957076466, "learning_rate": 4.589866703584573e-06, "loss": 0.465, "step": 9740 }, { "epoch": 3.8150407508026674, "grad_norm": 0.49181023857768696, "learning_rate": 4.589780716060022e-06, "loss": 0.4519, "step": 9741 }, { "epoch": 3.8154359101012596, "grad_norm": 0.4784710573418661, "learning_rate": 4.589694720328081e-06, "loss": 0.4663, "step": 9742 }, { "epoch": 3.815831069399852, "grad_norm": 0.4852518817476975, "learning_rate": 4.589608716389088e-06, "loss": 0.4718, "step": 9743 }, { "epoch": 3.816226228698444, "grad_norm": 0.4869284641618619, "learning_rate": 4.589522704243381e-06, "loss": 0.4675, "step": 9744 }, { "epoch": 3.8166213879970363, "grad_norm": 0.4895242207948993, "learning_rate": 4.589436683891299e-06, "loss": 0.4726, "step": 9745 }, { "epoch": 3.8170165472956286, "grad_norm": 0.49471582563789906, "learning_rate": 4.589350655333177e-06, "loss": 0.4548, "step": 9746 }, { "epoch": 3.817411706594221, "grad_norm": 0.48302731444823493, "learning_rate": 4.589264618569355e-06, "loss": 0.453, "step": 9747 }, { "epoch": 3.817806865892813, "grad_norm": 0.4991821360059649, "learning_rate": 4.5891785736001696e-06, "loss": 0.4618, "step": 9748 }, { "epoch": 3.8182020251914053, "grad_norm": 0.5035434112433709, "learning_rate": 4.58909252042596e-06, "loss": 0.4794, "step": 9749 }, { "epoch": 3.8185971844899975, "grad_norm": 0.5381210527580598, "learning_rate": 4.589006459047063e-06, "loss": 0.457, "step": 9750 }, { "epoch": 3.8189923437885898, "grad_norm": 0.5042220819531829, "learning_rate": 4.588920389463817e-06, "loss": 0.4713, "step": 9751 }, { "epoch": 3.819387503087182, "grad_norm": 0.49824542001689043, "learning_rate": 4.5888343116765604e-06, "loss": 0.459, "step": 9752 }, { "epoch": 3.8197826623857742, "grad_norm": 0.4812925277928597, "learning_rate": 4.5887482256856305e-06, "loss": 0.4468, "step": 9753 }, { "epoch": 3.8201778216843665, "grad_norm": 0.4867523713344396, "learning_rate": 4.588662131491367e-06, "loss": 0.4725, "step": 9754 }, { "epoch": 3.8205729809829587, "grad_norm": 0.5045119465234934, "learning_rate": 4.588576029094107e-06, "loss": 0.4651, "step": 9755 }, { "epoch": 3.820968140281551, "grad_norm": 0.49383831121424904, "learning_rate": 4.588489918494188e-06, "loss": 0.4575, "step": 9756 }, { "epoch": 3.821363299580143, "grad_norm": 0.49047441119572965, "learning_rate": 4.588403799691949e-06, "loss": 0.4587, "step": 9757 }, { "epoch": 3.8217584588787354, "grad_norm": 0.513029731134795, "learning_rate": 4.5883176726877276e-06, "loss": 0.4473, "step": 9758 }, { "epoch": 3.8221536181773277, "grad_norm": 0.5013246021960757, "learning_rate": 4.588231537481863e-06, "loss": 0.4535, "step": 9759 }, { "epoch": 3.82254877747592, "grad_norm": 0.4916166616553396, "learning_rate": 4.5881453940746925e-06, "loss": 0.4626, "step": 9760 }, { "epoch": 3.822943936774512, "grad_norm": 0.49314443616382275, "learning_rate": 4.588059242466555e-06, "loss": 0.4711, "step": 9761 }, { "epoch": 3.8233390960731044, "grad_norm": 0.5008098059952933, "learning_rate": 4.587973082657789e-06, "loss": 0.445, "step": 9762 }, { "epoch": 3.8237342553716966, "grad_norm": 0.4838807035601777, "learning_rate": 4.587886914648733e-06, "loss": 0.456, "step": 9763 }, { "epoch": 3.824129414670289, "grad_norm": 0.48847805670933026, "learning_rate": 4.587800738439725e-06, "loss": 0.461, "step": 9764 }, { "epoch": 3.824524573968881, "grad_norm": 0.4836684897311038, "learning_rate": 4.587714554031103e-06, "loss": 0.4676, "step": 9765 }, { "epoch": 3.8249197332674734, "grad_norm": 0.4744461410812829, "learning_rate": 4.587628361423205e-06, "loss": 0.4467, "step": 9766 }, { "epoch": 3.8253148925660656, "grad_norm": 0.5046875959722604, "learning_rate": 4.587542160616372e-06, "loss": 0.4594, "step": 9767 }, { "epoch": 3.825710051864658, "grad_norm": 0.4894718925080993, "learning_rate": 4.587455951610941e-06, "loss": 0.4596, "step": 9768 }, { "epoch": 3.82610521116325, "grad_norm": 0.6451510629529777, "learning_rate": 4.58736973440725e-06, "loss": 0.456, "step": 9769 }, { "epoch": 3.8265003704618423, "grad_norm": 0.49698088184421574, "learning_rate": 4.587283509005638e-06, "loss": 0.4729, "step": 9770 }, { "epoch": 3.8268955297604346, "grad_norm": 0.4744119187512547, "learning_rate": 4.587197275406444e-06, "loss": 0.4611, "step": 9771 }, { "epoch": 3.827290689059027, "grad_norm": 0.48047611658306816, "learning_rate": 4.587111033610007e-06, "loss": 0.4404, "step": 9772 }, { "epoch": 3.827685848357619, "grad_norm": 0.5074066632267403, "learning_rate": 4.587024783616665e-06, "loss": 0.4595, "step": 9773 }, { "epoch": 3.8280810076562113, "grad_norm": 0.4886772671378936, "learning_rate": 4.586938525426756e-06, "loss": 0.4584, "step": 9774 }, { "epoch": 3.8284761669548035, "grad_norm": 0.4855093381504905, "learning_rate": 4.586852259040621e-06, "loss": 0.4727, "step": 9775 }, { "epoch": 3.8288713262533958, "grad_norm": 0.47748604681856477, "learning_rate": 4.586765984458597e-06, "loss": 0.441, "step": 9776 }, { "epoch": 3.829266485551988, "grad_norm": 0.48420777791114294, "learning_rate": 4.586679701681024e-06, "loss": 0.4587, "step": 9777 }, { "epoch": 3.8296616448505802, "grad_norm": 0.4833361501646878, "learning_rate": 4.586593410708239e-06, "loss": 0.4474, "step": 9778 }, { "epoch": 3.8300568041491725, "grad_norm": 0.49869671048495123, "learning_rate": 4.586507111540583e-06, "loss": 0.4707, "step": 9779 }, { "epoch": 3.8304519634477647, "grad_norm": 0.4879810325262599, "learning_rate": 4.586420804178394e-06, "loss": 0.4575, "step": 9780 }, { "epoch": 3.830847122746357, "grad_norm": 0.48913117133273337, "learning_rate": 4.58633448862201e-06, "loss": 0.4654, "step": 9781 }, { "epoch": 3.831242282044949, "grad_norm": 0.5998061332057096, "learning_rate": 4.586248164871773e-06, "loss": 0.4651, "step": 9782 }, { "epoch": 3.8316374413435415, "grad_norm": 0.5325641228028988, "learning_rate": 4.5861618329280185e-06, "loss": 0.4674, "step": 9783 }, { "epoch": 3.832032600642134, "grad_norm": 0.48101331017897647, "learning_rate": 4.586075492791088e-06, "loss": 0.4709, "step": 9784 }, { "epoch": 3.8324277599407264, "grad_norm": 0.48810663701291834, "learning_rate": 4.585989144461319e-06, "loss": 0.4682, "step": 9785 }, { "epoch": 3.8328229192393186, "grad_norm": 0.49088685212482186, "learning_rate": 4.585902787939052e-06, "loss": 0.4667, "step": 9786 }, { "epoch": 3.833218078537911, "grad_norm": 0.4816964576299058, "learning_rate": 4.585816423224625e-06, "loss": 0.4619, "step": 9787 }, { "epoch": 3.833613237836503, "grad_norm": 0.49202140202010347, "learning_rate": 4.585730050318378e-06, "loss": 0.4692, "step": 9788 }, { "epoch": 3.8340083971350953, "grad_norm": 0.49988725949905755, "learning_rate": 4.585643669220648e-06, "loss": 0.4513, "step": 9789 }, { "epoch": 3.8344035564336876, "grad_norm": 0.4826706645801356, "learning_rate": 4.585557279931779e-06, "loss": 0.4474, "step": 9790 }, { "epoch": 3.83479871573228, "grad_norm": 0.49913183679571577, "learning_rate": 4.585470882452106e-06, "loss": 0.4592, "step": 9791 }, { "epoch": 3.835193875030872, "grad_norm": 0.5102617938895149, "learning_rate": 4.58538447678197e-06, "loss": 0.4622, "step": 9792 }, { "epoch": 3.8355890343294643, "grad_norm": 0.5093288577737429, "learning_rate": 4.58529806292171e-06, "loss": 0.4876, "step": 9793 }, { "epoch": 3.8359841936280565, "grad_norm": 0.4847749359173575, "learning_rate": 4.585211640871665e-06, "loss": 0.4527, "step": 9794 }, { "epoch": 3.836379352926649, "grad_norm": 0.4967128561121068, "learning_rate": 4.5851252106321755e-06, "loss": 0.4512, "step": 9795 }, { "epoch": 3.836774512225241, "grad_norm": 0.5122771054364694, "learning_rate": 4.585038772203581e-06, "loss": 0.4581, "step": 9796 }, { "epoch": 3.8371696715238333, "grad_norm": 0.4778663343535554, "learning_rate": 4.584952325586219e-06, "loss": 0.455, "step": 9797 }, { "epoch": 3.8375648308224255, "grad_norm": 0.4730286915929293, "learning_rate": 4.584865870780431e-06, "loss": 0.4438, "step": 9798 }, { "epoch": 3.8379599901210177, "grad_norm": 0.49474641210541287, "learning_rate": 4.584779407786556e-06, "loss": 0.4647, "step": 9799 }, { "epoch": 3.83835514941961, "grad_norm": 0.4979187357021784, "learning_rate": 4.5846929366049316e-06, "loss": 0.469, "step": 9800 }, { "epoch": 3.838750308718202, "grad_norm": 0.4969648880958739, "learning_rate": 4.584606457235901e-06, "loss": 0.472, "step": 9801 }, { "epoch": 3.8391454680167945, "grad_norm": 0.48419610177762074, "learning_rate": 4.584519969679803e-06, "loss": 0.4609, "step": 9802 }, { "epoch": 3.8395406273153867, "grad_norm": 0.47519923964504873, "learning_rate": 4.584433473936975e-06, "loss": 0.4407, "step": 9803 }, { "epoch": 3.839935786613979, "grad_norm": 0.5036626686511386, "learning_rate": 4.584346970007758e-06, "loss": 0.4666, "step": 9804 }, { "epoch": 3.840330945912571, "grad_norm": 0.49885414130181815, "learning_rate": 4.584260457892492e-06, "loss": 0.4486, "step": 9805 }, { "epoch": 3.8407261052111634, "grad_norm": 0.4973984116213614, "learning_rate": 4.584173937591516e-06, "loss": 0.4716, "step": 9806 }, { "epoch": 3.8411212645097557, "grad_norm": 0.47438026051623494, "learning_rate": 4.584087409105171e-06, "loss": 0.4508, "step": 9807 }, { "epoch": 3.841516423808348, "grad_norm": 0.48007747354455976, "learning_rate": 4.5840008724337955e-06, "loss": 0.4631, "step": 9808 }, { "epoch": 3.84191158310694, "grad_norm": 0.587833445348625, "learning_rate": 4.583914327577731e-06, "loss": 0.4741, "step": 9809 }, { "epoch": 3.8423067424055324, "grad_norm": 0.499951286009737, "learning_rate": 4.583827774537316e-06, "loss": 0.456, "step": 9810 }, { "epoch": 3.8427019017041246, "grad_norm": 0.4848679924648173, "learning_rate": 4.583741213312891e-06, "loss": 0.4729, "step": 9811 }, { "epoch": 3.843097061002717, "grad_norm": 0.48840107615604933, "learning_rate": 4.583654643904796e-06, "loss": 0.4642, "step": 9812 }, { "epoch": 3.843492220301309, "grad_norm": 0.4968970099252351, "learning_rate": 4.5835680663133705e-06, "loss": 0.4621, "step": 9813 }, { "epoch": 3.8438873795999013, "grad_norm": 0.47240176053138544, "learning_rate": 4.583481480538955e-06, "loss": 0.458, "step": 9814 }, { "epoch": 3.8442825388984936, "grad_norm": 0.4873267364456109, "learning_rate": 4.583394886581889e-06, "loss": 0.4614, "step": 9815 }, { "epoch": 3.844677698197086, "grad_norm": 0.4813398493399383, "learning_rate": 4.5833082844425135e-06, "loss": 0.4543, "step": 9816 }, { "epoch": 3.845072857495678, "grad_norm": 0.49742091745038736, "learning_rate": 4.583221674121167e-06, "loss": 0.4437, "step": 9817 }, { "epoch": 3.8454680167942703, "grad_norm": 0.4937773614181849, "learning_rate": 4.5831350556181934e-06, "loss": 0.4668, "step": 9818 }, { "epoch": 3.8458631760928625, "grad_norm": 0.4807185417064237, "learning_rate": 4.583048428933928e-06, "loss": 0.4303, "step": 9819 }, { "epoch": 3.846258335391455, "grad_norm": 0.49996366474147874, "learning_rate": 4.582961794068714e-06, "loss": 0.463, "step": 9820 }, { "epoch": 3.846653494690047, "grad_norm": 0.4767782183375261, "learning_rate": 4.582875151022891e-06, "loss": 0.4725, "step": 9821 }, { "epoch": 3.8470486539886393, "grad_norm": 0.48139899842359823, "learning_rate": 4.582788499796798e-06, "loss": 0.4533, "step": 9822 }, { "epoch": 3.8474438132872315, "grad_norm": 0.4963731934676299, "learning_rate": 4.582701840390778e-06, "loss": 0.4667, "step": 9823 }, { "epoch": 3.8478389725858237, "grad_norm": 0.48437203938631246, "learning_rate": 4.5826151728051696e-06, "loss": 0.4576, "step": 9824 }, { "epoch": 3.848234131884416, "grad_norm": 0.4788898724749113, "learning_rate": 4.582528497040313e-06, "loss": 0.4588, "step": 9825 }, { "epoch": 3.8486292911830082, "grad_norm": 0.4864655138587333, "learning_rate": 4.5824418130965485e-06, "loss": 0.4664, "step": 9826 }, { "epoch": 3.8490244504816005, "grad_norm": 0.491011327584925, "learning_rate": 4.582355120974218e-06, "loss": 0.443, "step": 9827 }, { "epoch": 3.8494196097801927, "grad_norm": 0.5037804637537733, "learning_rate": 4.582268420673661e-06, "loss": 0.496, "step": 9828 }, { "epoch": 3.849814769078785, "grad_norm": 0.4911686450431375, "learning_rate": 4.582181712195218e-06, "loss": 0.4648, "step": 9829 }, { "epoch": 3.850209928377377, "grad_norm": 0.4885405950646796, "learning_rate": 4.582094995539229e-06, "loss": 0.4663, "step": 9830 }, { "epoch": 3.8506050876759694, "grad_norm": 0.46999551043751275, "learning_rate": 4.582008270706035e-06, "loss": 0.4411, "step": 9831 }, { "epoch": 3.8510002469745617, "grad_norm": 0.4882884684859506, "learning_rate": 4.581921537695978e-06, "loss": 0.4626, "step": 9832 }, { "epoch": 3.851395406273154, "grad_norm": 0.4913585267933321, "learning_rate": 4.581834796509397e-06, "loss": 0.457, "step": 9833 }, { "epoch": 3.851790565571746, "grad_norm": 0.4659923322842953, "learning_rate": 4.581748047146633e-06, "loss": 0.4434, "step": 9834 }, { "epoch": 3.8521857248703384, "grad_norm": 0.49559627762410036, "learning_rate": 4.581661289608027e-06, "loss": 0.4724, "step": 9835 }, { "epoch": 3.8525808841689306, "grad_norm": 0.4881260286548596, "learning_rate": 4.581574523893919e-06, "loss": 0.4519, "step": 9836 }, { "epoch": 3.852976043467523, "grad_norm": 0.488542461742271, "learning_rate": 4.581487750004651e-06, "loss": 0.4687, "step": 9837 }, { "epoch": 3.853371202766115, "grad_norm": 0.47946619013094255, "learning_rate": 4.581400967940562e-06, "loss": 0.4507, "step": 9838 }, { "epoch": 3.8537663620647074, "grad_norm": 0.49113873280049447, "learning_rate": 4.581314177701994e-06, "loss": 0.4652, "step": 9839 }, { "epoch": 3.8541615213632996, "grad_norm": 0.506053760875361, "learning_rate": 4.581227379289288e-06, "loss": 0.4565, "step": 9840 }, { "epoch": 3.854556680661892, "grad_norm": 0.498570278337444, "learning_rate": 4.581140572702785e-06, "loss": 0.4572, "step": 9841 }, { "epoch": 3.854951839960484, "grad_norm": 0.4750541834581741, "learning_rate": 4.5810537579428255e-06, "loss": 0.4602, "step": 9842 }, { "epoch": 3.8553469992590763, "grad_norm": 0.48656050386521776, "learning_rate": 4.580966935009751e-06, "loss": 0.4811, "step": 9843 }, { "epoch": 3.8557421585576686, "grad_norm": 0.48249156384469394, "learning_rate": 4.580880103903901e-06, "loss": 0.4764, "step": 9844 }, { "epoch": 3.856137317856261, "grad_norm": 0.4880019438321168, "learning_rate": 4.580793264625618e-06, "loss": 0.4573, "step": 9845 }, { "epoch": 3.856532477154853, "grad_norm": 0.5055590149018785, "learning_rate": 4.5807064171752426e-06, "loss": 0.4484, "step": 9846 }, { "epoch": 3.8569276364534453, "grad_norm": 0.48601910111621366, "learning_rate": 4.580619561553116e-06, "loss": 0.4391, "step": 9847 }, { "epoch": 3.8573227957520375, "grad_norm": 0.5571216265744174, "learning_rate": 4.5805326977595784e-06, "loss": 0.4402, "step": 9848 }, { "epoch": 3.8577179550506298, "grad_norm": 0.49789507388969795, "learning_rate": 4.5804458257949725e-06, "loss": 0.4753, "step": 9849 }, { "epoch": 3.858113114349222, "grad_norm": 0.493498154898902, "learning_rate": 4.580358945659639e-06, "loss": 0.4788, "step": 9850 }, { "epoch": 3.8585082736478142, "grad_norm": 0.48765613657828505, "learning_rate": 4.580272057353918e-06, "loss": 0.4624, "step": 9851 }, { "epoch": 3.8589034329464065, "grad_norm": 0.4844714607389518, "learning_rate": 4.580185160878151e-06, "loss": 0.459, "step": 9852 }, { "epoch": 3.8592985922449987, "grad_norm": 0.497325111713686, "learning_rate": 4.580098256232681e-06, "loss": 0.4842, "step": 9853 }, { "epoch": 3.859693751543591, "grad_norm": 0.48442884762655214, "learning_rate": 4.5800113434178485e-06, "loss": 0.4475, "step": 9854 }, { "epoch": 3.860088910842183, "grad_norm": 0.4942069483791756, "learning_rate": 4.579924422433993e-06, "loss": 0.458, "step": 9855 }, { "epoch": 3.8604840701407754, "grad_norm": 0.48273095849038933, "learning_rate": 4.579837493281459e-06, "loss": 0.459, "step": 9856 }, { "epoch": 3.8608792294393677, "grad_norm": 0.48744132992622485, "learning_rate": 4.579750555960585e-06, "loss": 0.466, "step": 9857 }, { "epoch": 3.86127438873796, "grad_norm": 0.4947598696132769, "learning_rate": 4.5796636104717155e-06, "loss": 0.4701, "step": 9858 }, { "epoch": 3.861669548036552, "grad_norm": 0.4856546204635866, "learning_rate": 4.579576656815188e-06, "loss": 0.4595, "step": 9859 }, { "epoch": 3.8620647073351444, "grad_norm": 0.484668155652294, "learning_rate": 4.579489694991347e-06, "loss": 0.452, "step": 9860 }, { "epoch": 3.8624598666337366, "grad_norm": 0.49823648475531956, "learning_rate": 4.579402725000534e-06, "loss": 0.4525, "step": 9861 }, { "epoch": 3.862855025932329, "grad_norm": 0.4879242829207528, "learning_rate": 4.579315746843088e-06, "loss": 0.4682, "step": 9862 }, { "epoch": 3.863250185230921, "grad_norm": 0.5043668954023681, "learning_rate": 4.579228760519354e-06, "loss": 0.4696, "step": 9863 }, { "epoch": 3.8636453445295134, "grad_norm": 0.47800525043818226, "learning_rate": 4.579141766029672e-06, "loss": 0.4392, "step": 9864 }, { "epoch": 3.8640405038281056, "grad_norm": 0.4946490914928116, "learning_rate": 4.579054763374383e-06, "loss": 0.4515, "step": 9865 }, { "epoch": 3.864435663126698, "grad_norm": 0.48212116695919593, "learning_rate": 4.578967752553829e-06, "loss": 0.4771, "step": 9866 }, { "epoch": 3.86483082242529, "grad_norm": 0.49582742338022634, "learning_rate": 4.578880733568353e-06, "loss": 0.4477, "step": 9867 }, { "epoch": 3.8652259817238823, "grad_norm": 0.49339493973167836, "learning_rate": 4.578793706418295e-06, "loss": 0.4487, "step": 9868 }, { "epoch": 3.8656211410224746, "grad_norm": 0.4929894857650072, "learning_rate": 4.578706671103998e-06, "loss": 0.4811, "step": 9869 }, { "epoch": 3.866016300321067, "grad_norm": 0.5010690182198145, "learning_rate": 4.578619627625803e-06, "loss": 0.4644, "step": 9870 }, { "epoch": 3.866411459619659, "grad_norm": 0.48769438400413784, "learning_rate": 4.578532575984053e-06, "loss": 0.4517, "step": 9871 }, { "epoch": 3.8668066189182513, "grad_norm": 0.49383384006167214, "learning_rate": 4.5784455161790895e-06, "loss": 0.4656, "step": 9872 }, { "epoch": 3.8672017782168435, "grad_norm": 0.4810665135549552, "learning_rate": 4.578358448211253e-06, "loss": 0.4454, "step": 9873 }, { "epoch": 3.8675969375154358, "grad_norm": 0.48861310498403004, "learning_rate": 4.578271372080888e-06, "loss": 0.4605, "step": 9874 }, { "epoch": 3.867992096814028, "grad_norm": 0.49304147135775594, "learning_rate": 4.5781842877883335e-06, "loss": 0.4559, "step": 9875 }, { "epoch": 3.8683872561126202, "grad_norm": 0.4901516373069663, "learning_rate": 4.578097195333935e-06, "loss": 0.4488, "step": 9876 }, { "epoch": 3.8687824154112125, "grad_norm": 0.47112670377763216, "learning_rate": 4.57801009471803e-06, "loss": 0.4447, "step": 9877 }, { "epoch": 3.8691775747098047, "grad_norm": 0.48837740071566554, "learning_rate": 4.577922985940965e-06, "loss": 0.457, "step": 9878 }, { "epoch": 3.869572734008397, "grad_norm": 0.4965735724057039, "learning_rate": 4.57783586900308e-06, "loss": 0.4508, "step": 9879 }, { "epoch": 3.869967893306989, "grad_norm": 0.4896825295302401, "learning_rate": 4.577748743904717e-06, "loss": 0.4706, "step": 9880 }, { "epoch": 3.8703630526055814, "grad_norm": 0.48477203855856776, "learning_rate": 4.577661610646219e-06, "loss": 0.4486, "step": 9881 }, { "epoch": 3.8707582119041737, "grad_norm": 0.4772573464731468, "learning_rate": 4.577574469227928e-06, "loss": 0.4639, "step": 9882 }, { "epoch": 3.871153371202766, "grad_norm": 0.47562723439852267, "learning_rate": 4.577487319650186e-06, "loss": 0.4738, "step": 9883 }, { "epoch": 3.871548530501358, "grad_norm": 0.48073672972235093, "learning_rate": 4.577400161913335e-06, "loss": 0.4711, "step": 9884 }, { "epoch": 3.8719436897999504, "grad_norm": 0.4937434966089234, "learning_rate": 4.577312996017718e-06, "loss": 0.4678, "step": 9885 }, { "epoch": 3.8723388490985426, "grad_norm": 0.5076968652598326, "learning_rate": 4.5772258219636765e-06, "loss": 0.4633, "step": 9886 }, { "epoch": 3.872734008397135, "grad_norm": 0.4945776677023478, "learning_rate": 4.5771386397515535e-06, "loss": 0.4664, "step": 9887 }, { "epoch": 3.873129167695727, "grad_norm": 0.49792744846750037, "learning_rate": 4.577051449381691e-06, "loss": 0.4664, "step": 9888 }, { "epoch": 3.8735243269943194, "grad_norm": 0.48812305721614563, "learning_rate": 4.576964250854432e-06, "loss": 0.4683, "step": 9889 }, { "epoch": 3.8739194862929116, "grad_norm": 0.48682855568601446, "learning_rate": 4.5768770441701184e-06, "loss": 0.4594, "step": 9890 }, { "epoch": 3.874314645591504, "grad_norm": 0.48935463641111665, "learning_rate": 4.576789829329093e-06, "loss": 0.4752, "step": 9891 }, { "epoch": 3.874709804890096, "grad_norm": 0.5341391098277775, "learning_rate": 4.576702606331698e-06, "loss": 0.4527, "step": 9892 }, { "epoch": 3.8751049641886883, "grad_norm": 0.49067023477198496, "learning_rate": 4.5766153751782775e-06, "loss": 0.4546, "step": 9893 }, { "epoch": 3.8755001234872806, "grad_norm": 0.4953575355699284, "learning_rate": 4.576528135869171e-06, "loss": 0.4609, "step": 9894 }, { "epoch": 3.875895282785873, "grad_norm": 0.5064570635778317, "learning_rate": 4.576440888404724e-06, "loss": 0.4615, "step": 9895 }, { "epoch": 3.876290442084465, "grad_norm": 0.4852237088041872, "learning_rate": 4.576353632785278e-06, "loss": 0.4504, "step": 9896 }, { "epoch": 3.8766856013830573, "grad_norm": 0.4896826011480754, "learning_rate": 4.576266369011175e-06, "loss": 0.46, "step": 9897 }, { "epoch": 3.8770807606816495, "grad_norm": 0.48478132345598957, "learning_rate": 4.576179097082759e-06, "loss": 0.4634, "step": 9898 }, { "epoch": 3.8774759199802418, "grad_norm": 0.49074759563963166, "learning_rate": 4.576091817000372e-06, "loss": 0.4631, "step": 9899 }, { "epoch": 3.877871079278834, "grad_norm": 0.5444614925031207, "learning_rate": 4.576004528764358e-06, "loss": 0.4646, "step": 9900 }, { "epoch": 3.8782662385774263, "grad_norm": 0.4886325205119538, "learning_rate": 4.575917232375058e-06, "loss": 0.4634, "step": 9901 }, { "epoch": 3.878661397876019, "grad_norm": 0.49095261197771417, "learning_rate": 4.575829927832816e-06, "loss": 0.465, "step": 9902 }, { "epoch": 3.879056557174611, "grad_norm": 0.49252455851339666, "learning_rate": 4.575742615137973e-06, "loss": 0.4713, "step": 9903 }, { "epoch": 3.8794517164732034, "grad_norm": 0.5467856159576922, "learning_rate": 4.575655294290875e-06, "loss": 0.479, "step": 9904 }, { "epoch": 3.8798468757717957, "grad_norm": 0.49019837168996383, "learning_rate": 4.575567965291864e-06, "loss": 0.4722, "step": 9905 }, { "epoch": 3.880242035070388, "grad_norm": 0.5341818933608717, "learning_rate": 4.575480628141281e-06, "loss": 0.4527, "step": 9906 }, { "epoch": 3.88063719436898, "grad_norm": 0.4913343796849772, "learning_rate": 4.575393282839471e-06, "loss": 0.473, "step": 9907 }, { "epoch": 3.8810323536675724, "grad_norm": 0.48134695756624346, "learning_rate": 4.575305929386776e-06, "loss": 0.4438, "step": 9908 }, { "epoch": 3.8814275129661646, "grad_norm": 0.5029217370787735, "learning_rate": 4.57521856778354e-06, "loss": 0.4536, "step": 9909 }, { "epoch": 3.881822672264757, "grad_norm": 0.4807948637994951, "learning_rate": 4.5751311980301064e-06, "loss": 0.4405, "step": 9910 }, { "epoch": 3.882217831563349, "grad_norm": 0.47962412886103184, "learning_rate": 4.5750438201268165e-06, "loss": 0.4619, "step": 9911 }, { "epoch": 3.8826129908619413, "grad_norm": 0.49393796080145874, "learning_rate": 4.574956434074014e-06, "loss": 0.4589, "step": 9912 }, { "epoch": 3.8830081501605336, "grad_norm": 0.4946297200387813, "learning_rate": 4.574869039872044e-06, "loss": 0.4811, "step": 9913 }, { "epoch": 3.883403309459126, "grad_norm": 0.5028163739738554, "learning_rate": 4.574781637521247e-06, "loss": 0.4621, "step": 9914 }, { "epoch": 3.883798468757718, "grad_norm": 0.4846491793605942, "learning_rate": 4.5746942270219686e-06, "loss": 0.461, "step": 9915 }, { "epoch": 3.8841936280563103, "grad_norm": 0.5158987625564292, "learning_rate": 4.574606808374551e-06, "loss": 0.4566, "step": 9916 }, { "epoch": 3.8845887873549025, "grad_norm": 0.4833525715221507, "learning_rate": 4.574519381579337e-06, "loss": 0.4601, "step": 9917 }, { "epoch": 3.884983946653495, "grad_norm": 0.5076912429801129, "learning_rate": 4.574431946636671e-06, "loss": 0.4689, "step": 9918 }, { "epoch": 3.885379105952087, "grad_norm": 0.4863106371729655, "learning_rate": 4.574344503546896e-06, "loss": 0.4532, "step": 9919 }, { "epoch": 3.8857742652506793, "grad_norm": 0.49043260914664416, "learning_rate": 4.5742570523103555e-06, "loss": 0.4664, "step": 9920 }, { "epoch": 3.8861694245492715, "grad_norm": 0.5123972555223707, "learning_rate": 4.574169592927392e-06, "loss": 0.4707, "step": 9921 }, { "epoch": 3.8865645838478637, "grad_norm": 0.47548790562191307, "learning_rate": 4.5740821253983505e-06, "loss": 0.4617, "step": 9922 }, { "epoch": 3.886959743146456, "grad_norm": 0.489095797302651, "learning_rate": 4.573994649723575e-06, "loss": 0.4711, "step": 9923 }, { "epoch": 3.887354902445048, "grad_norm": 0.48921262328932935, "learning_rate": 4.573907165903406e-06, "loss": 0.4555, "step": 9924 }, { "epoch": 3.8877500617436405, "grad_norm": 0.49537271238528213, "learning_rate": 4.57381967393819e-06, "loss": 0.4711, "step": 9925 }, { "epoch": 3.8881452210422327, "grad_norm": 0.5259888620610288, "learning_rate": 4.573732173828269e-06, "loss": 0.4623, "step": 9926 }, { "epoch": 3.888540380340825, "grad_norm": 0.48638729424662486, "learning_rate": 4.573644665573987e-06, "loss": 0.4726, "step": 9927 }, { "epoch": 3.888935539639417, "grad_norm": 0.503911496590745, "learning_rate": 4.573557149175689e-06, "loss": 0.4711, "step": 9928 }, { "epoch": 3.8893306989380094, "grad_norm": 0.47957742258960145, "learning_rate": 4.573469624633717e-06, "loss": 0.4574, "step": 9929 }, { "epoch": 3.8897258582366017, "grad_norm": 0.49719338473032687, "learning_rate": 4.5733820919484165e-06, "loss": 0.4637, "step": 9930 }, { "epoch": 3.890121017535194, "grad_norm": 0.49630282899421835, "learning_rate": 4.573294551120129e-06, "loss": 0.4621, "step": 9931 }, { "epoch": 3.890516176833786, "grad_norm": 0.48727643540534094, "learning_rate": 4.573207002149199e-06, "loss": 0.4585, "step": 9932 }, { "epoch": 3.8909113361323784, "grad_norm": 0.4809569000720417, "learning_rate": 4.573119445035972e-06, "loss": 0.4586, "step": 9933 }, { "epoch": 3.8913064954309706, "grad_norm": 0.47571185854215003, "learning_rate": 4.57303187978079e-06, "loss": 0.4683, "step": 9934 }, { "epoch": 3.891701654729563, "grad_norm": 0.48390173237622336, "learning_rate": 4.5729443063839984e-06, "loss": 0.4748, "step": 9935 }, { "epoch": 3.892096814028155, "grad_norm": 0.49110153521074534, "learning_rate": 4.57285672484594e-06, "loss": 0.4597, "step": 9936 }, { "epoch": 3.8924919733267473, "grad_norm": 0.4952646122846938, "learning_rate": 4.572769135166959e-06, "loss": 0.4827, "step": 9937 }, { "epoch": 3.8928871326253396, "grad_norm": 0.4823294860273062, "learning_rate": 4.572681537347398e-06, "loss": 0.47, "step": 9938 }, { "epoch": 3.893282291923932, "grad_norm": 0.48698397822277945, "learning_rate": 4.572593931387604e-06, "loss": 0.4602, "step": 9939 }, { "epoch": 3.893677451222524, "grad_norm": 0.48400546395610927, "learning_rate": 4.572506317287921e-06, "loss": 0.4481, "step": 9940 }, { "epoch": 3.8940726105211163, "grad_norm": 0.48847518203566603, "learning_rate": 4.572418695048689e-06, "loss": 0.4573, "step": 9941 }, { "epoch": 3.8944677698197085, "grad_norm": 0.4854571876960505, "learning_rate": 4.572331064670257e-06, "loss": 0.4626, "step": 9942 }, { "epoch": 3.894862929118301, "grad_norm": 0.47898682929482006, "learning_rate": 4.572243426152965e-06, "loss": 0.44, "step": 9943 }, { "epoch": 3.895258088416893, "grad_norm": 0.491532931524844, "learning_rate": 4.57215577949716e-06, "loss": 0.4807, "step": 9944 }, { "epoch": 3.8956532477154853, "grad_norm": 0.47872724576808984, "learning_rate": 4.572068124703185e-06, "loss": 0.4603, "step": 9945 }, { "epoch": 3.8960484070140775, "grad_norm": 0.45451419510929936, "learning_rate": 4.571980461771386e-06, "loss": 0.458, "step": 9946 }, { "epoch": 3.8964435663126697, "grad_norm": 0.4762087573565171, "learning_rate": 4.571892790702105e-06, "loss": 0.4539, "step": 9947 }, { "epoch": 3.896838725611262, "grad_norm": 0.501694628462472, "learning_rate": 4.571805111495687e-06, "loss": 0.4593, "step": 9948 }, { "epoch": 3.8972338849098542, "grad_norm": 0.49270030012761595, "learning_rate": 4.571717424152476e-06, "loss": 0.4679, "step": 9949 }, { "epoch": 3.8976290442084465, "grad_norm": 0.4925249641495412, "learning_rate": 4.5716297286728184e-06, "loss": 0.4769, "step": 9950 }, { "epoch": 3.8980242035070387, "grad_norm": 0.4858992891832617, "learning_rate": 4.571542025057057e-06, "loss": 0.4795, "step": 9951 }, { "epoch": 3.898419362805631, "grad_norm": 0.4786925181433357, "learning_rate": 4.571454313305536e-06, "loss": 0.4415, "step": 9952 }, { "epoch": 3.898814522104223, "grad_norm": 0.48023530140553916, "learning_rate": 4.5713665934186005e-06, "loss": 0.4498, "step": 9953 }, { "epoch": 3.8992096814028154, "grad_norm": 0.48453549831266507, "learning_rate": 4.571278865396594e-06, "loss": 0.4644, "step": 9954 }, { "epoch": 3.8996048407014077, "grad_norm": 0.4900914360178932, "learning_rate": 4.571191129239863e-06, "loss": 0.474, "step": 9955 }, { "epoch": 3.9, "grad_norm": 0.5041682526200453, "learning_rate": 4.57110338494875e-06, "loss": 0.4554, "step": 9956 }, { "epoch": 3.900395159298592, "grad_norm": 0.5410219421410024, "learning_rate": 4.571015632523601e-06, "loss": 0.4793, "step": 9957 }, { "epoch": 3.9007903185971844, "grad_norm": 0.4825869143751156, "learning_rate": 4.57092787196476e-06, "loss": 0.482, "step": 9958 }, { "epoch": 3.9011854778957766, "grad_norm": 0.48251364045941925, "learning_rate": 4.5708401032725725e-06, "loss": 0.4482, "step": 9959 }, { "epoch": 3.901580637194369, "grad_norm": 0.47369158864484645, "learning_rate": 4.570752326447382e-06, "loss": 0.4565, "step": 9960 }, { "epoch": 3.901975796492961, "grad_norm": 0.4777236188587447, "learning_rate": 4.5706645414895335e-06, "loss": 0.4592, "step": 9961 }, { "epoch": 3.9023709557915534, "grad_norm": 0.4818832878308696, "learning_rate": 4.5705767483993725e-06, "loss": 0.4561, "step": 9962 }, { "epoch": 3.9027661150901456, "grad_norm": 0.4728202329441403, "learning_rate": 4.570488947177243e-06, "loss": 0.4611, "step": 9963 }, { "epoch": 3.903161274388738, "grad_norm": 0.4774824700271554, "learning_rate": 4.570401137823491e-06, "loss": 0.4667, "step": 9964 }, { "epoch": 3.90355643368733, "grad_norm": 0.5201325910426959, "learning_rate": 4.57031332033846e-06, "loss": 0.4556, "step": 9965 }, { "epoch": 3.9039515929859223, "grad_norm": 0.49100204946745024, "learning_rate": 4.570225494722495e-06, "loss": 0.4605, "step": 9966 }, { "epoch": 3.9043467522845146, "grad_norm": 0.4933350216968649, "learning_rate": 4.5701376609759415e-06, "loss": 0.465, "step": 9967 }, { "epoch": 3.904741911583107, "grad_norm": 0.49608467793393873, "learning_rate": 4.570049819099145e-06, "loss": 0.4673, "step": 9968 }, { "epoch": 3.905137070881699, "grad_norm": 0.49885302185060626, "learning_rate": 4.569961969092449e-06, "loss": 0.4731, "step": 9969 }, { "epoch": 3.9055322301802913, "grad_norm": 0.4765724417728314, "learning_rate": 4.569874110956201e-06, "loss": 0.4664, "step": 9970 }, { "epoch": 3.9059273894788835, "grad_norm": 0.48345495768456714, "learning_rate": 4.569786244690743e-06, "loss": 0.4747, "step": 9971 }, { "epoch": 3.9063225487774758, "grad_norm": 0.48003753403167, "learning_rate": 4.569698370296421e-06, "loss": 0.445, "step": 9972 }, { "epoch": 3.9067177080760684, "grad_norm": 0.4923494525260739, "learning_rate": 4.5696104877735815e-06, "loss": 0.4575, "step": 9973 }, { "epoch": 3.9071128673746607, "grad_norm": 0.5211429128554534, "learning_rate": 4.569522597122569e-06, "loss": 0.4637, "step": 9974 }, { "epoch": 3.907508026673253, "grad_norm": 0.4818937227907757, "learning_rate": 4.569434698343727e-06, "loss": 0.4429, "step": 9975 }, { "epoch": 3.907903185971845, "grad_norm": 0.5046848124987253, "learning_rate": 4.569346791437403e-06, "loss": 0.4938, "step": 9976 }, { "epoch": 3.9082983452704374, "grad_norm": 0.5027445720769897, "learning_rate": 4.5692588764039415e-06, "loss": 0.4727, "step": 9977 }, { "epoch": 3.9086935045690296, "grad_norm": 0.5039278354823773, "learning_rate": 4.569170953243688e-06, "loss": 0.4559, "step": 9978 }, { "epoch": 3.909088663867622, "grad_norm": 0.4843847065341945, "learning_rate": 4.569083021956987e-06, "loss": 0.4564, "step": 9979 }, { "epoch": 3.909483823166214, "grad_norm": 0.49170737209975196, "learning_rate": 4.568995082544184e-06, "loss": 0.4752, "step": 9980 }, { "epoch": 3.9098789824648064, "grad_norm": 0.4784679535834229, "learning_rate": 4.568907135005625e-06, "loss": 0.4719, "step": 9981 }, { "epoch": 3.9102741417633986, "grad_norm": 0.48953143430090623, "learning_rate": 4.5688191793416545e-06, "loss": 0.4604, "step": 9982 }, { "epoch": 3.910669301061991, "grad_norm": 0.5026359012231226, "learning_rate": 4.56873121555262e-06, "loss": 0.4494, "step": 9983 }, { "epoch": 3.911064460360583, "grad_norm": 0.48889537939386374, "learning_rate": 4.568643243638864e-06, "loss": 0.4452, "step": 9984 }, { "epoch": 3.9114596196591753, "grad_norm": 0.4968415694067955, "learning_rate": 4.568555263600735e-06, "loss": 0.4645, "step": 9985 }, { "epoch": 3.9118547789577676, "grad_norm": 0.4877243861936003, "learning_rate": 4.568467275438575e-06, "loss": 0.4587, "step": 9986 }, { "epoch": 3.91224993825636, "grad_norm": 0.49223731516425107, "learning_rate": 4.568379279152733e-06, "loss": 0.459, "step": 9987 }, { "epoch": 3.912645097554952, "grad_norm": 0.49250738243308856, "learning_rate": 4.568291274743553e-06, "loss": 0.4778, "step": 9988 }, { "epoch": 3.9130402568535443, "grad_norm": 0.5009850640273527, "learning_rate": 4.56820326221138e-06, "loss": 0.4748, "step": 9989 }, { "epoch": 3.9134354161521365, "grad_norm": 0.4961840767254751, "learning_rate": 4.568115241556562e-06, "loss": 0.4531, "step": 9990 }, { "epoch": 3.9138305754507288, "grad_norm": 0.48894424493050087, "learning_rate": 4.5680272127794424e-06, "loss": 0.4451, "step": 9991 }, { "epoch": 3.914225734749321, "grad_norm": 0.4811481982683269, "learning_rate": 4.567939175880367e-06, "loss": 0.4579, "step": 9992 }, { "epoch": 3.9146208940479132, "grad_norm": 0.4924442883893573, "learning_rate": 4.567851130859683e-06, "loss": 0.4714, "step": 9993 }, { "epoch": 3.9150160533465055, "grad_norm": 0.5035466446279564, "learning_rate": 4.567763077717735e-06, "loss": 0.4947, "step": 9994 }, { "epoch": 3.9154112126450977, "grad_norm": 0.4995580198600491, "learning_rate": 4.56767501645487e-06, "loss": 0.4711, "step": 9995 }, { "epoch": 3.91580637194369, "grad_norm": 0.495893119902799, "learning_rate": 4.5675869470714314e-06, "loss": 0.4749, "step": 9996 }, { "epoch": 3.916201531242282, "grad_norm": 0.4723772655931205, "learning_rate": 4.567498869567769e-06, "loss": 0.4362, "step": 9997 }, { "epoch": 3.9165966905408744, "grad_norm": 0.4718587728404546, "learning_rate": 4.567410783944225e-06, "loss": 0.4638, "step": 9998 }, { "epoch": 3.9169918498394667, "grad_norm": 0.4928852611306575, "learning_rate": 4.567322690201147e-06, "loss": 0.4487, "step": 9999 }, { "epoch": 3.917387009138059, "grad_norm": 0.47713196411455694, "learning_rate": 4.5672345883388816e-06, "loss": 0.455, "step": 10000 }, { "epoch": 3.917782168436651, "grad_norm": 0.49819690808918476, "learning_rate": 4.567146478357773e-06, "loss": 0.4426, "step": 10001 }, { "epoch": 3.9181773277352434, "grad_norm": 0.5062977312521194, "learning_rate": 4.5670583602581685e-06, "loss": 0.4653, "step": 10002 }, { "epoch": 3.9185724870338356, "grad_norm": 0.5003782785606375, "learning_rate": 4.566970234040415e-06, "loss": 0.4615, "step": 10003 }, { "epoch": 3.918967646332428, "grad_norm": 0.5189545106568986, "learning_rate": 4.566882099704857e-06, "loss": 0.4731, "step": 10004 }, { "epoch": 3.91936280563102, "grad_norm": 0.5035785229251792, "learning_rate": 4.566793957251841e-06, "loss": 0.4779, "step": 10005 }, { "epoch": 3.9197579649296124, "grad_norm": 0.4800415368289926, "learning_rate": 4.566705806681712e-06, "loss": 0.4539, "step": 10006 }, { "epoch": 3.9201531242282046, "grad_norm": 0.4882424905193737, "learning_rate": 4.56661764799482e-06, "loss": 0.4584, "step": 10007 }, { "epoch": 3.920548283526797, "grad_norm": 0.47308810535562973, "learning_rate": 4.566529481191507e-06, "loss": 0.4741, "step": 10008 }, { "epoch": 3.920943442825389, "grad_norm": 0.4867276304401251, "learning_rate": 4.566441306272123e-06, "loss": 0.4541, "step": 10009 }, { "epoch": 3.9213386021239813, "grad_norm": 0.6343188880548906, "learning_rate": 4.56635312323701e-06, "loss": 0.4842, "step": 10010 }, { "epoch": 3.9217337614225736, "grad_norm": 0.5071802879941949, "learning_rate": 4.5662649320865186e-06, "loss": 0.489, "step": 10011 }, { "epoch": 3.922128920721166, "grad_norm": 0.5255039030846776, "learning_rate": 4.566176732820991e-06, "loss": 0.4627, "step": 10012 }, { "epoch": 3.922524080019758, "grad_norm": 0.4971011698886485, "learning_rate": 4.566088525440778e-06, "loss": 0.4599, "step": 10013 }, { "epoch": 3.9229192393183503, "grad_norm": 0.4852264543790252, "learning_rate": 4.566000309946223e-06, "loss": 0.4602, "step": 10014 }, { "epoch": 3.9233143986169425, "grad_norm": 0.49830525957051336, "learning_rate": 4.565912086337674e-06, "loss": 0.4736, "step": 10015 }, { "epoch": 3.9237095579155348, "grad_norm": 0.4773373018311242, "learning_rate": 4.565823854615477e-06, "loss": 0.466, "step": 10016 }, { "epoch": 3.924104717214127, "grad_norm": 0.4831436103078164, "learning_rate": 4.565735614779977e-06, "loss": 0.4643, "step": 10017 }, { "epoch": 3.9244998765127193, "grad_norm": 0.4802473860556205, "learning_rate": 4.565647366831522e-06, "loss": 0.4721, "step": 10018 }, { "epoch": 3.9248950358113115, "grad_norm": 0.49874045500383063, "learning_rate": 4.5655591107704595e-06, "loss": 0.4702, "step": 10019 }, { "epoch": 3.9252901951099037, "grad_norm": 0.47831567102450434, "learning_rate": 4.565470846597135e-06, "loss": 0.474, "step": 10020 }, { "epoch": 3.925685354408496, "grad_norm": 0.49992203811316205, "learning_rate": 4.565382574311894e-06, "loss": 0.4655, "step": 10021 }, { "epoch": 3.926080513707088, "grad_norm": 0.4926517107007104, "learning_rate": 4.565294293915086e-06, "loss": 0.4675, "step": 10022 }, { "epoch": 3.9264756730056805, "grad_norm": 0.5084730405938702, "learning_rate": 4.565206005407055e-06, "loss": 0.4822, "step": 10023 }, { "epoch": 3.9268708323042727, "grad_norm": 0.4931236099060291, "learning_rate": 4.565117708788149e-06, "loss": 0.4437, "step": 10024 }, { "epoch": 3.927265991602865, "grad_norm": 0.483301233803827, "learning_rate": 4.565029404058715e-06, "loss": 0.4709, "step": 10025 }, { "epoch": 3.927661150901457, "grad_norm": 0.47819265448253306, "learning_rate": 4.5649410912191e-06, "loss": 0.4658, "step": 10026 }, { "epoch": 3.9280563102000494, "grad_norm": 0.4822872193597864, "learning_rate": 4.564852770269648e-06, "loss": 0.4385, "step": 10027 }, { "epoch": 3.9284514694986417, "grad_norm": 0.48138130491052755, "learning_rate": 4.5647644412107104e-06, "loss": 0.4603, "step": 10028 }, { "epoch": 3.928846628797234, "grad_norm": 0.4945851911204728, "learning_rate": 4.564676104042631e-06, "loss": 0.4505, "step": 10029 }, { "epoch": 3.929241788095826, "grad_norm": 0.48293991390518787, "learning_rate": 4.564587758765759e-06, "loss": 0.45, "step": 10030 }, { "epoch": 3.9296369473944184, "grad_norm": 0.5024502851465005, "learning_rate": 4.5644994053804384e-06, "loss": 0.4603, "step": 10031 }, { "epoch": 3.9300321066930106, "grad_norm": 0.47694264905270695, "learning_rate": 4.564411043887018e-06, "loss": 0.4547, "step": 10032 }, { "epoch": 3.930427265991603, "grad_norm": 0.5039734950596827, "learning_rate": 4.564322674285845e-06, "loss": 0.4551, "step": 10033 }, { "epoch": 3.930822425290195, "grad_norm": 0.48332955949025447, "learning_rate": 4.564234296577266e-06, "loss": 0.4636, "step": 10034 }, { "epoch": 3.9312175845887873, "grad_norm": 0.487981969360117, "learning_rate": 4.564145910761627e-06, "loss": 0.4659, "step": 10035 }, { "epoch": 3.9316127438873796, "grad_norm": 0.4884183172019331, "learning_rate": 4.564057516839277e-06, "loss": 0.4776, "step": 10036 }, { "epoch": 3.932007903185972, "grad_norm": 0.484407990579762, "learning_rate": 4.563969114810563e-06, "loss": 0.4549, "step": 10037 }, { "epoch": 3.932403062484564, "grad_norm": 0.49302390973656574, "learning_rate": 4.563880704675831e-06, "loss": 0.461, "step": 10038 }, { "epoch": 3.9327982217831563, "grad_norm": 0.4824245546460462, "learning_rate": 4.56379228643543e-06, "loss": 0.4456, "step": 10039 }, { "epoch": 3.9331933810817485, "grad_norm": 0.524281421582032, "learning_rate": 4.563703860089705e-06, "loss": 0.4631, "step": 10040 }, { "epoch": 3.933588540380341, "grad_norm": 0.48699863426336437, "learning_rate": 4.563615425639005e-06, "loss": 0.4554, "step": 10041 }, { "epoch": 3.933983699678933, "grad_norm": 0.4956289418570272, "learning_rate": 4.5635269830836764e-06, "loss": 0.4691, "step": 10042 }, { "epoch": 3.9343788589775253, "grad_norm": 0.48487716834135475, "learning_rate": 4.563438532424067e-06, "loss": 0.4785, "step": 10043 }, { "epoch": 3.9347740182761175, "grad_norm": 0.47820983244486903, "learning_rate": 4.563350073660524e-06, "loss": 0.4445, "step": 10044 }, { "epoch": 3.9351691775747097, "grad_norm": 0.5073931859582581, "learning_rate": 4.5632616067933944e-06, "loss": 0.4777, "step": 10045 }, { "epoch": 3.935564336873302, "grad_norm": 0.473757508440239, "learning_rate": 4.563173131823026e-06, "loss": 0.4494, "step": 10046 }, { "epoch": 3.9359594961718942, "grad_norm": 0.4807056489036854, "learning_rate": 4.563084648749767e-06, "loss": 0.4452, "step": 10047 }, { "epoch": 3.9363546554704865, "grad_norm": 0.4878554376029113, "learning_rate": 4.562996157573964e-06, "loss": 0.4536, "step": 10048 }, { "epoch": 3.9367498147690787, "grad_norm": 0.5087371100450915, "learning_rate": 4.562907658295966e-06, "loss": 0.488, "step": 10049 }, { "epoch": 3.937144974067671, "grad_norm": 0.48438840021575363, "learning_rate": 4.562819150916118e-06, "loss": 0.4857, "step": 10050 }, { "epoch": 3.937540133366263, "grad_norm": 0.49057136917157657, "learning_rate": 4.562730635434768e-06, "loss": 0.4643, "step": 10051 }, { "epoch": 3.9379352926648554, "grad_norm": 0.47526482048284263, "learning_rate": 4.562642111852266e-06, "loss": 0.4482, "step": 10052 }, { "epoch": 3.9383304519634477, "grad_norm": 0.5014131338073492, "learning_rate": 4.562553580168958e-06, "loss": 0.4565, "step": 10053 }, { "epoch": 3.93872561126204, "grad_norm": 0.5031712717781439, "learning_rate": 4.562465040385193e-06, "loss": 0.4495, "step": 10054 }, { "epoch": 3.939120770560632, "grad_norm": 1.0950220813960667, "learning_rate": 4.562376492501316e-06, "loss": 0.4635, "step": 10055 }, { "epoch": 3.9395159298592244, "grad_norm": 0.4839972284867313, "learning_rate": 4.5622879365176775e-06, "loss": 0.4449, "step": 10056 }, { "epoch": 3.9399110891578166, "grad_norm": 0.4914974711515712, "learning_rate": 4.562199372434624e-06, "loss": 0.4587, "step": 10057 }, { "epoch": 3.940306248456409, "grad_norm": 0.4876711211009558, "learning_rate": 4.562110800252504e-06, "loss": 0.4591, "step": 10058 }, { "epoch": 3.940701407755001, "grad_norm": 0.47546627363978317, "learning_rate": 4.5620222199716645e-06, "loss": 0.4416, "step": 10059 }, { "epoch": 3.9410965670535933, "grad_norm": 0.48586322704048324, "learning_rate": 4.561933631592453e-06, "loss": 0.4549, "step": 10060 }, { "epoch": 3.9414917263521856, "grad_norm": 0.4926171275617641, "learning_rate": 4.56184503511522e-06, "loss": 0.4598, "step": 10061 }, { "epoch": 3.941886885650778, "grad_norm": 0.5123664239699524, "learning_rate": 4.561756430540311e-06, "loss": 0.4851, "step": 10062 }, { "epoch": 3.94228204494937, "grad_norm": 0.5199067760226548, "learning_rate": 4.5616678178680744e-06, "loss": 0.4807, "step": 10063 }, { "epoch": 3.9426772042479623, "grad_norm": 0.49024172756251366, "learning_rate": 4.561579197098858e-06, "loss": 0.4632, "step": 10064 }, { "epoch": 3.9430723635465545, "grad_norm": 0.4814153973868991, "learning_rate": 4.561490568233013e-06, "loss": 0.4495, "step": 10065 }, { "epoch": 3.943467522845147, "grad_norm": 0.47441139134775145, "learning_rate": 4.561401931270882e-06, "loss": 0.4604, "step": 10066 }, { "epoch": 3.943862682143739, "grad_norm": 0.4924678869233399, "learning_rate": 4.561313286212817e-06, "loss": 0.4699, "step": 10067 }, { "epoch": 3.9442578414423313, "grad_norm": 0.48654953928643213, "learning_rate": 4.561224633059166e-06, "loss": 0.4489, "step": 10068 }, { "epoch": 3.9446530007409235, "grad_norm": 0.4963720302187141, "learning_rate": 4.561135971810275e-06, "loss": 0.4721, "step": 10069 }, { "epoch": 3.9450481600395157, "grad_norm": 0.5091750814580799, "learning_rate": 4.561047302466494e-06, "loss": 0.4736, "step": 10070 }, { "epoch": 3.945443319338108, "grad_norm": 0.49308737751715986, "learning_rate": 4.56095862502817e-06, "loss": 0.4517, "step": 10071 }, { "epoch": 3.9458384786367002, "grad_norm": 0.4895075330718553, "learning_rate": 4.5608699394956525e-06, "loss": 0.4472, "step": 10072 }, { "epoch": 3.9462336379352925, "grad_norm": 0.5031481365671037, "learning_rate": 4.56078124586929e-06, "loss": 0.4716, "step": 10073 }, { "epoch": 3.9466287972338847, "grad_norm": 0.4990715567557606, "learning_rate": 4.560692544149429e-06, "loss": 0.4452, "step": 10074 }, { "epoch": 3.947023956532477, "grad_norm": 0.4795562000811165, "learning_rate": 4.56060383433642e-06, "loss": 0.4639, "step": 10075 }, { "epoch": 3.947419115831069, "grad_norm": 0.4809738041038454, "learning_rate": 4.5605151164306095e-06, "loss": 0.4589, "step": 10076 }, { "epoch": 3.9478142751296614, "grad_norm": 0.5003235218268587, "learning_rate": 4.5604263904323474e-06, "loss": 0.4782, "step": 10077 }, { "epoch": 3.9482094344282537, "grad_norm": 0.49029440459937, "learning_rate": 4.560337656341981e-06, "loss": 0.45, "step": 10078 }, { "epoch": 3.948604593726846, "grad_norm": 0.48335540139031397, "learning_rate": 4.560248914159861e-06, "loss": 0.4601, "step": 10079 }, { "epoch": 3.948999753025438, "grad_norm": 0.4893171254147366, "learning_rate": 4.560160163886332e-06, "loss": 0.4486, "step": 10080 }, { "epoch": 3.9493949123240304, "grad_norm": 0.48236278214675543, "learning_rate": 4.560071405521746e-06, "loss": 0.4755, "step": 10081 }, { "epoch": 3.9497900716226226, "grad_norm": 0.49023533737652236, "learning_rate": 4.559982639066451e-06, "loss": 0.457, "step": 10082 }, { "epoch": 3.950185230921215, "grad_norm": 0.4812818608327351, "learning_rate": 4.559893864520795e-06, "loss": 0.4616, "step": 10083 }, { "epoch": 3.950580390219807, "grad_norm": 0.4809646923115125, "learning_rate": 4.559805081885126e-06, "loss": 0.4535, "step": 10084 }, { "epoch": 3.9509755495183994, "grad_norm": 0.5242179398310719, "learning_rate": 4.559716291159793e-06, "loss": 0.4589, "step": 10085 }, { "epoch": 3.9513707088169916, "grad_norm": 0.48289980979582037, "learning_rate": 4.559627492345147e-06, "loss": 0.4572, "step": 10086 }, { "epoch": 3.951765868115584, "grad_norm": 0.4805787354967715, "learning_rate": 4.5595386854415335e-06, "loss": 0.4507, "step": 10087 }, { "epoch": 3.952161027414176, "grad_norm": 0.4853162285383262, "learning_rate": 4.5594498704493025e-06, "loss": 0.473, "step": 10088 }, { "epoch": 3.9525561867127683, "grad_norm": 0.5148358522338808, "learning_rate": 4.559361047368803e-06, "loss": 0.4558, "step": 10089 }, { "epoch": 3.9529513460113606, "grad_norm": 0.49389057275002457, "learning_rate": 4.559272216200385e-06, "loss": 0.4529, "step": 10090 }, { "epoch": 3.9533465053099532, "grad_norm": 0.4800782868223698, "learning_rate": 4.559183376944395e-06, "loss": 0.4654, "step": 10091 }, { "epoch": 3.9537416646085455, "grad_norm": 0.49900896659440425, "learning_rate": 4.559094529601183e-06, "loss": 0.4511, "step": 10092 }, { "epoch": 3.9541368239071377, "grad_norm": 0.5014788964906836, "learning_rate": 4.5590056741711e-06, "loss": 0.4565, "step": 10093 }, { "epoch": 3.95453198320573, "grad_norm": 0.49370738821746696, "learning_rate": 4.558916810654491e-06, "loss": 0.442, "step": 10094 }, { "epoch": 3.954927142504322, "grad_norm": 0.49314453829677907, "learning_rate": 4.558827939051707e-06, "loss": 0.4664, "step": 10095 }, { "epoch": 3.9553223018029144, "grad_norm": 0.4850261607507114, "learning_rate": 4.558739059363098e-06, "loss": 0.4599, "step": 10096 }, { "epoch": 3.9557174611015067, "grad_norm": 0.5150383652631196, "learning_rate": 4.558650171589012e-06, "loss": 0.479, "step": 10097 }, { "epoch": 3.956112620400099, "grad_norm": 0.49222732950123294, "learning_rate": 4.558561275729798e-06, "loss": 0.46, "step": 10098 }, { "epoch": 3.956507779698691, "grad_norm": 0.5036146912323076, "learning_rate": 4.558472371785805e-06, "loss": 0.476, "step": 10099 }, { "epoch": 3.9569029389972834, "grad_norm": 0.49324709847980996, "learning_rate": 4.558383459757383e-06, "loss": 0.4721, "step": 10100 }, { "epoch": 3.9572980982958756, "grad_norm": 0.4688106876677952, "learning_rate": 4.55829453964488e-06, "loss": 0.4553, "step": 10101 }, { "epoch": 3.957693257594468, "grad_norm": 0.4864105983815226, "learning_rate": 4.558205611448646e-06, "loss": 0.4507, "step": 10102 }, { "epoch": 3.95808841689306, "grad_norm": 0.5127931976516101, "learning_rate": 4.5581166751690306e-06, "loss": 0.4646, "step": 10103 }, { "epoch": 3.9584835761916524, "grad_norm": 0.493192790264334, "learning_rate": 4.558027730806383e-06, "loss": 0.4634, "step": 10104 }, { "epoch": 3.9588787354902446, "grad_norm": 0.4995732048308892, "learning_rate": 4.557938778361052e-06, "loss": 0.4685, "step": 10105 }, { "epoch": 3.959273894788837, "grad_norm": 0.4833683966329439, "learning_rate": 4.557849817833386e-06, "loss": 0.4548, "step": 10106 }, { "epoch": 3.959669054087429, "grad_norm": 0.5172522955731031, "learning_rate": 4.5577608492237365e-06, "loss": 0.4625, "step": 10107 }, { "epoch": 3.9600642133860213, "grad_norm": 0.48501688273496174, "learning_rate": 4.557671872532452e-06, "loss": 0.4537, "step": 10108 }, { "epoch": 3.9604593726846136, "grad_norm": 0.5012142376692591, "learning_rate": 4.557582887759881e-06, "loss": 0.464, "step": 10109 }, { "epoch": 3.960854531983206, "grad_norm": 0.49905859888997944, "learning_rate": 4.557493894906375e-06, "loss": 0.4688, "step": 10110 }, { "epoch": 3.961249691281798, "grad_norm": 0.48977789559433366, "learning_rate": 4.5574048939722825e-06, "loss": 0.4545, "step": 10111 }, { "epoch": 3.9616448505803903, "grad_norm": 0.47820284946504593, "learning_rate": 4.557315884957952e-06, "loss": 0.4509, "step": 10112 }, { "epoch": 3.9620400098789825, "grad_norm": 0.5033813687269575, "learning_rate": 4.557226867863734e-06, "loss": 0.4541, "step": 10113 }, { "epoch": 3.9624351691775748, "grad_norm": 0.48559157024559013, "learning_rate": 4.5571378426899784e-06, "loss": 0.4678, "step": 10114 }, { "epoch": 3.962830328476167, "grad_norm": 0.48951951068737737, "learning_rate": 4.557048809437034e-06, "loss": 0.4826, "step": 10115 }, { "epoch": 3.9632254877747592, "grad_norm": 0.5107355330569413, "learning_rate": 4.556959768105253e-06, "loss": 0.4643, "step": 10116 }, { "epoch": 3.9636206470733515, "grad_norm": 0.4827625154057386, "learning_rate": 4.556870718694981e-06, "loss": 0.4592, "step": 10117 }, { "epoch": 3.9640158063719437, "grad_norm": 0.5016649652795582, "learning_rate": 4.55678166120657e-06, "loss": 0.4668, "step": 10118 }, { "epoch": 3.964410965670536, "grad_norm": 0.4945290431200768, "learning_rate": 4.55669259564037e-06, "loss": 0.4726, "step": 10119 }, { "epoch": 3.964806124969128, "grad_norm": 0.48945605735322806, "learning_rate": 4.55660352199673e-06, "loss": 0.4581, "step": 10120 } ], "logging_steps": 1, "max_steps": 50600, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 2530, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.540274358583296e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }