{ "best_metric": 0.12475299090147018, "best_model_checkpoint": "/home/datawork-iot-nos/Seatizen/models/multilabel/fine_scale/DinoVdeau-base-2024_09_03-batch-size32_epochs150_freeze/checkpoint-27573", "epoch": 111.0, "eval_steps": 500, "global_step": 30303, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.2079002079002079, "eval_f1_macro": 0.5105390450682302, "eval_f1_micro": 0.73108765167112, "eval_loss": 0.17516958713531494, "eval_roc_auc": 0.8186965528786462, "eval_runtime": 453.901, "eval_samples_per_second": 6.358, "eval_steps_per_second": 0.2, "learning_rate": 0.001, "step": 273 }, { "epoch": 1.8315018315018317, "grad_norm": 0.2602289021015167, "learning_rate": 0.001, "loss": 0.2857, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.23492723492723494, "eval_f1_macro": 0.5498069096094584, "eval_f1_micro": 0.7582569600553347, "eval_loss": 0.1577771008014679, "eval_roc_auc": 0.8363419499741919, "eval_runtime": 442.4314, "eval_samples_per_second": 6.523, "eval_steps_per_second": 0.206, "learning_rate": 0.001, "step": 546 }, { "epoch": 3.0, "eval_accuracy": 0.23146223146223147, "eval_f1_macro": 0.6037272443934714, "eval_f1_micro": 0.7721545657578696, "eval_loss": 0.15162432193756104, "eval_roc_auc": 0.8505384953411333, "eval_runtime": 429.5087, "eval_samples_per_second": 6.719, "eval_steps_per_second": 0.212, "learning_rate": 0.001, "step": 819 }, { "epoch": 3.663003663003663, "grad_norm": 0.24485518038272858, "learning_rate": 0.001, "loss": 0.1764, "step": 1000 }, { "epoch": 4.0, "eval_accuracy": 0.24220374220374222, "eval_f1_macro": 0.613953187695023, "eval_f1_micro": 0.7649537378914902, "eval_loss": 0.15218119323253632, "eval_roc_auc": 0.8386795656946878, "eval_runtime": 440.157, "eval_samples_per_second": 6.557, "eval_steps_per_second": 0.207, "learning_rate": 0.001, "step": 1092 }, { "epoch": 5.0, "eval_accuracy": 0.24220374220374222, "eval_f1_macro": 0.6161642626912543, "eval_f1_micro": 0.7719928186714542, "eval_loss": 0.14836864173412323, "eval_roc_auc": 0.840338176952158, "eval_runtime": 432.0583, "eval_samples_per_second": 6.68, "eval_steps_per_second": 0.211, "learning_rate": 0.001, "step": 1365 }, { "epoch": 5.4945054945054945, "grad_norm": 0.17316196858882904, "learning_rate": 0.001, "loss": 0.1677, "step": 1500 }, { "epoch": 6.0, "eval_accuracy": 0.2560637560637561, "eval_f1_macro": 0.6051867487843677, "eval_f1_micro": 0.775030471878809, "eval_loss": 0.14818257093429565, "eval_roc_auc": 0.8434755477910759, "eval_runtime": 432.9746, "eval_samples_per_second": 6.666, "eval_steps_per_second": 0.21, "learning_rate": 0.001, "step": 1638 }, { "epoch": 7.0, "eval_accuracy": 0.24185724185724186, "eval_f1_macro": 0.617739220969942, "eval_f1_micro": 0.7729166666666668, "eval_loss": 0.1486394852399826, "eval_roc_auc": 0.8431254755177443, "eval_runtime": 432.8426, "eval_samples_per_second": 6.668, "eval_steps_per_second": 0.21, "learning_rate": 0.001, "step": 1911 }, { "epoch": 7.326007326007326, "grad_norm": 0.31019529700279236, "learning_rate": 0.001, "loss": 0.1652, "step": 2000 }, { "epoch": 8.0, "eval_accuracy": 0.2512127512127512, "eval_f1_macro": 0.6171646674895677, "eval_f1_micro": 0.7767065175472426, "eval_loss": 0.14861202239990234, "eval_roc_auc": 0.8485322128731306, "eval_runtime": 436.0215, "eval_samples_per_second": 6.619, "eval_steps_per_second": 0.209, "learning_rate": 0.001, "step": 2184 }, { "epoch": 9.0, "eval_accuracy": 0.2512127512127512, "eval_f1_macro": 0.6366264906922544, "eval_f1_micro": 0.7805490458654168, "eval_loss": 0.14834754168987274, "eval_roc_auc": 0.857034765243127, "eval_runtime": 443.0125, "eval_samples_per_second": 6.514, "eval_steps_per_second": 0.205, "learning_rate": 0.001, "step": 2457 }, { "epoch": 9.157509157509157, "grad_norm": 0.1974957138299942, "learning_rate": 0.001, "loss": 0.1617, "step": 2500 }, { "epoch": 10.0, "eval_accuracy": 0.24532224532224534, "eval_f1_macro": 0.6081428044829309, "eval_f1_micro": 0.7682759232167399, "eval_loss": 0.15029709041118622, "eval_roc_auc": 0.8352362538484075, "eval_runtime": 434.6587, "eval_samples_per_second": 6.64, "eval_steps_per_second": 0.209, "learning_rate": 0.001, "step": 2730 }, { "epoch": 10.989010989010989, "grad_norm": 0.16183075308799744, "learning_rate": 0.001, "loss": 0.1615, "step": 3000 }, { "epoch": 11.0, "eval_accuracy": 0.2609147609147609, "eval_f1_macro": 0.6199915554248129, "eval_f1_micro": 0.7756647297059341, "eval_loss": 0.14407172799110413, "eval_roc_auc": 0.8408945796610934, "eval_runtime": 431.8807, "eval_samples_per_second": 6.682, "eval_steps_per_second": 0.211, "learning_rate": 0.001, "step": 3003 }, { "epoch": 12.0, "eval_accuracy": 0.2494802494802495, "eval_f1_macro": 0.6299207659511814, "eval_f1_micro": 0.781485559413907, "eval_loss": 0.14866559207439423, "eval_roc_auc": 0.8542998474050816, "eval_runtime": 435.7397, "eval_samples_per_second": 6.623, "eval_steps_per_second": 0.209, "learning_rate": 0.001, "step": 3276 }, { "epoch": 12.820512820512821, "grad_norm": 0.17374463379383087, "learning_rate": 0.001, "loss": 0.1614, "step": 3500 }, { "epoch": 13.0, "eval_accuracy": 0.25190575190575193, "eval_f1_macro": 0.6241659824597257, "eval_f1_micro": 0.7779037321241716, "eval_loss": 0.14902691543102264, "eval_roc_auc": 0.8445867301441496, "eval_runtime": 444.1918, "eval_samples_per_second": 6.497, "eval_steps_per_second": 0.205, "learning_rate": 0.001, "step": 3549 }, { "epoch": 14.0, "eval_accuracy": 0.26056826056826055, "eval_f1_macro": 0.6378982802249643, "eval_f1_micro": 0.7826389795829524, "eval_loss": 0.14337006211280823, "eval_roc_auc": 0.8474976901507599, "eval_runtime": 437.9843, "eval_samples_per_second": 6.589, "eval_steps_per_second": 0.208, "learning_rate": 0.001, "step": 3822 }, { "epoch": 14.652014652014651, "grad_norm": 0.18867848813533783, "learning_rate": 0.001, "loss": 0.1599, "step": 4000 }, { "epoch": 15.0, "eval_accuracy": 0.2553707553707554, "eval_f1_macro": 0.639716503598517, "eval_f1_micro": 0.7873585308562887, "eval_loss": 0.14354591071605682, "eval_roc_auc": 0.8551790656297652, "eval_runtime": 440.9279, "eval_samples_per_second": 6.545, "eval_steps_per_second": 0.206, "learning_rate": 0.001, "step": 4095 }, { "epoch": 16.0, "eval_accuracy": 0.25675675675675674, "eval_f1_macro": 0.6343613127126344, "eval_f1_micro": 0.7792974686292388, "eval_loss": 0.1439499706029892, "eval_roc_auc": 0.8464149986210657, "eval_runtime": 436.9638, "eval_samples_per_second": 6.605, "eval_steps_per_second": 0.208, "learning_rate": 0.001, "step": 4368 }, { "epoch": 16.483516483516482, "grad_norm": 0.16403253376483917, "learning_rate": 0.001, "loss": 0.1589, "step": 4500 }, { "epoch": 17.0, "eval_accuracy": 0.2543312543312543, "eval_f1_macro": 0.6422270697798029, "eval_f1_micro": 0.787784461363732, "eval_loss": 0.14478015899658203, "eval_roc_auc": 0.8595889192695618, "eval_runtime": 441.556, "eval_samples_per_second": 6.536, "eval_steps_per_second": 0.206, "learning_rate": 0.001, "step": 4641 }, { "epoch": 18.0, "eval_accuracy": 0.25675675675675674, "eval_f1_macro": 0.6417123667888478, "eval_f1_micro": 0.786493860845839, "eval_loss": 0.14397625625133514, "eval_roc_auc": 0.8551892735001003, "eval_runtime": 445.1896, "eval_samples_per_second": 6.483, "eval_steps_per_second": 0.204, "learning_rate": 0.001, "step": 4914 }, { "epoch": 18.315018315018314, "grad_norm": 1.3815889358520508, "learning_rate": 0.001, "loss": 0.1604, "step": 5000 }, { "epoch": 19.0, "eval_accuracy": 0.253984753984754, "eval_f1_macro": 0.6317583185615991, "eval_f1_micro": 0.7863510343356792, "eval_loss": 0.14199253916740417, "eval_roc_auc": 0.854983895441361, "eval_runtime": 435.0664, "eval_samples_per_second": 6.633, "eval_steps_per_second": 0.209, "learning_rate": 0.001, "step": 5187 }, { "epoch": 20.0, "eval_accuracy": 0.2588357588357588, "eval_f1_macro": 0.6408966299078661, "eval_f1_micro": 0.7868513006341401, "eval_loss": 0.14092272520065308, "eval_roc_auc": 0.8521780322337986, "eval_runtime": 434.5134, "eval_samples_per_second": 6.642, "eval_steps_per_second": 0.209, "learning_rate": 0.001, "step": 5460 }, { "epoch": 20.146520146520146, "grad_norm": 0.21049675345420837, "learning_rate": 0.001, "loss": 0.1586, "step": 5500 }, { "epoch": 21.0, "eval_accuracy": 0.26195426195426197, "eval_f1_macro": 0.6412583916380257, "eval_f1_micro": 0.7864882090503504, "eval_loss": 0.1425119787454605, "eval_roc_auc": 0.8561061843865996, "eval_runtime": 433.1377, "eval_samples_per_second": 6.663, "eval_steps_per_second": 0.21, "learning_rate": 0.001, "step": 5733 }, { "epoch": 21.978021978021978, "grad_norm": 0.159688800573349, "learning_rate": 0.001, "loss": 0.1587, "step": 6000 }, { "epoch": 22.0, "eval_accuracy": 0.23700623700623702, "eval_f1_macro": 0.6371452798177432, "eval_f1_micro": 0.7854284761587195, "eval_loss": 0.15379400551319122, "eval_roc_auc": 0.860841268018702, "eval_runtime": 439.1581, "eval_samples_per_second": 6.572, "eval_steps_per_second": 0.207, "learning_rate": 0.001, "step": 6006 }, { "epoch": 23.0, "eval_accuracy": 0.25571725571725573, "eval_f1_macro": 0.6390434486158698, "eval_f1_micro": 0.7841676771176165, "eval_loss": 0.1418805718421936, "eval_roc_auc": 0.8497106920533675, "eval_runtime": 434.4927, "eval_samples_per_second": 6.642, "eval_steps_per_second": 0.209, "learning_rate": 0.001, "step": 6279 }, { "epoch": 23.80952380952381, "grad_norm": 0.20623169839382172, "learning_rate": 0.001, "loss": 0.1592, "step": 6500 }, { "epoch": 24.0, "eval_accuracy": 0.2598752598752599, "eval_f1_macro": 0.6458978920546691, "eval_f1_micro": 0.7869535635312129, "eval_loss": 0.14135514199733734, "eval_roc_auc": 0.8561374786855376, "eval_runtime": 437.945, "eval_samples_per_second": 6.59, "eval_steps_per_second": 0.208, "learning_rate": 0.001, "step": 6552 }, { "epoch": 25.0, "eval_accuracy": 0.26853776853776856, "eval_f1_macro": 0.6262981090846956, "eval_f1_micro": 0.786773581652009, "eval_loss": 0.13985148072242737, "eval_roc_auc": 0.8523112308572252, "eval_runtime": 433.5621, "eval_samples_per_second": 6.656, "eval_steps_per_second": 0.21, "learning_rate": 0.001, "step": 6825 }, { "epoch": 25.641025641025642, "grad_norm": 0.167380690574646, "learning_rate": 0.001, "loss": 0.1586, "step": 7000 }, { "epoch": 26.0, "eval_accuracy": 0.2591822591822592, "eval_f1_macro": 0.6237830069375186, "eval_f1_micro": 0.7846557710221018, "eval_loss": 0.14649754762649536, "eval_roc_auc": 0.8560739377107973, "eval_runtime": 436.7443, "eval_samples_per_second": 6.608, "eval_steps_per_second": 0.208, "learning_rate": 0.001, "step": 7098 }, { "epoch": 27.0, "eval_accuracy": 0.23804573804573806, "eval_f1_macro": 0.6344307952131357, "eval_f1_micro": 0.7719951506754418, "eval_loss": 0.15506784617900848, "eval_roc_auc": 0.8432688820115058, "eval_runtime": 431.1317, "eval_samples_per_second": 6.694, "eval_steps_per_second": 0.211, "learning_rate": 0.001, "step": 7371 }, { "epoch": 27.47252747252747, "grad_norm": 0.17562341690063477, "learning_rate": 0.001, "loss": 0.16, "step": 7500 }, { "epoch": 28.0, "eval_accuracy": 0.2616077616077616, "eval_f1_macro": 0.6429949936408241, "eval_f1_micro": 0.7891238152420981, "eval_loss": 0.14431345462799072, "eval_roc_auc": 0.8549858001950897, "eval_runtime": 430.551, "eval_samples_per_second": 6.703, "eval_steps_per_second": 0.211, "learning_rate": 0.001, "step": 7644 }, { "epoch": 29.0, "eval_accuracy": 0.25675675675675674, "eval_f1_macro": 0.6415824285032449, "eval_f1_micro": 0.7873995663818392, "eval_loss": 0.14275498688220978, "eval_roc_auc": 0.8564611929231155, "eval_runtime": 443.3199, "eval_samples_per_second": 6.51, "eval_steps_per_second": 0.205, "learning_rate": 0.001, "step": 7917 }, { "epoch": 29.304029304029303, "grad_norm": 0.16407011449337006, "learning_rate": 0.001, "loss": 0.1589, "step": 8000 }, { "epoch": 30.0, "eval_accuracy": 0.2525987525987526, "eval_f1_macro": 0.6308133523221491, "eval_f1_micro": 0.7798808735936467, "eval_loss": 0.14164045453071594, "eval_roc_auc": 0.8425187578001007, "eval_runtime": 435.3871, "eval_samples_per_second": 6.629, "eval_steps_per_second": 0.209, "learning_rate": 0.001, "step": 8190 }, { "epoch": 31.0, "eval_accuracy": 0.26888426888426886, "eval_f1_macro": 0.6431010910213645, "eval_f1_micro": 0.7895365707945718, "eval_loss": 0.13976627588272095, "eval_roc_auc": 0.8565786303801245, "eval_runtime": 429.7695, "eval_samples_per_second": 6.715, "eval_steps_per_second": 0.212, "learning_rate": 0.001, "step": 8463 }, { "epoch": 31.135531135531135, "grad_norm": 0.3522001802921295, "learning_rate": 0.001, "loss": 0.1588, "step": 8500 }, { "epoch": 32.0, "eval_accuracy": 0.25675675675675674, "eval_f1_macro": 0.6520927708015384, "eval_f1_micro": 0.7891036166898235, "eval_loss": 0.1448184847831726, "eval_roc_auc": 0.8600551412790717, "eval_runtime": 437.4042, "eval_samples_per_second": 6.598, "eval_steps_per_second": 0.208, "learning_rate": 0.001, "step": 8736 }, { "epoch": 32.967032967032964, "grad_norm": 0.16505810618400574, "learning_rate": 0.001, "loss": 0.1581, "step": 9000 }, { "epoch": 33.0, "eval_accuracy": 0.26403326403326405, "eval_f1_macro": 0.6496848321151188, "eval_f1_micro": 0.7895652173913044, "eval_loss": 0.14042973518371582, "eval_roc_auc": 0.8582461081320644, "eval_runtime": 438.9972, "eval_samples_per_second": 6.574, "eval_steps_per_second": 0.207, "learning_rate": 0.001, "step": 9009 }, { "epoch": 34.0, "eval_accuracy": 0.25571725571725573, "eval_f1_macro": 0.6448790211155284, "eval_f1_micro": 0.7870906828033133, "eval_loss": 0.1426127403974533, "eval_roc_auc": 0.8537051886564672, "eval_runtime": 443.0007, "eval_samples_per_second": 6.515, "eval_steps_per_second": 0.205, "learning_rate": 0.001, "step": 9282 }, { "epoch": 34.798534798534796, "grad_norm": 0.17374606430530548, "learning_rate": 0.001, "loss": 0.1578, "step": 9500 }, { "epoch": 35.0, "eval_accuracy": 0.262993762993763, "eval_f1_macro": 0.6428423378015612, "eval_f1_micro": 0.7846327880264532, "eval_loss": 0.14135821163654327, "eval_roc_auc": 0.8487055052211715, "eval_runtime": 439.0163, "eval_samples_per_second": 6.574, "eval_steps_per_second": 0.207, "learning_rate": 0.001, "step": 9555 }, { "epoch": 36.0, "eval_accuracy": 0.26784476784476785, "eval_f1_macro": 0.6434020884943297, "eval_f1_micro": 0.7834209497328063, "eval_loss": 0.14652539789676666, "eval_roc_auc": 0.848444661270401, "eval_runtime": 441.3143, "eval_samples_per_second": 6.54, "eval_steps_per_second": 0.206, "learning_rate": 0.001, "step": 9828 }, { "epoch": 36.63003663003663, "grad_norm": 0.1908567249774933, "learning_rate": 0.001, "loss": 0.1576, "step": 10000 }, { "epoch": 37.0, "eval_accuracy": 0.2668052668052668, "eval_f1_macro": 0.6438477431550106, "eval_f1_micro": 0.792425408224331, "eval_loss": 0.13795886933803558, "eval_roc_auc": 0.8576696786814598, "eval_runtime": 438.7851, "eval_samples_per_second": 6.577, "eval_steps_per_second": 0.207, "learning_rate": 0.001, "step": 10101 }, { "epoch": 38.0, "eval_accuracy": 0.2636867636867637, "eval_f1_macro": 0.6475331965590188, "eval_f1_micro": 0.7892280686732029, "eval_loss": 0.13921019434928894, "eval_roc_auc": 0.8555102379171546, "eval_runtime": 440.2794, "eval_samples_per_second": 6.555, "eval_steps_per_second": 0.207, "learning_rate": 0.001, "step": 10374 }, { "epoch": 38.46153846153846, "grad_norm": 0.17312012612819672, "learning_rate": 0.001, "loss": 0.1556, "step": 10500 }, { "epoch": 39.0, "eval_accuracy": 0.24601524601524602, "eval_f1_macro": 0.659217552215385, "eval_f1_micro": 0.7871620243872598, "eval_loss": 0.14584119617938995, "eval_roc_auc": 0.8679696612972285, "eval_runtime": 440.7666, "eval_samples_per_second": 6.548, "eval_steps_per_second": 0.206, "learning_rate": 0.001, "step": 10647 }, { "epoch": 40.0, "eval_accuracy": 0.26992376992376993, "eval_f1_macro": 0.6469476365862663, "eval_f1_micro": 0.79463243873979, "eval_loss": 0.1389026641845703, "eval_roc_auc": 0.8659848006017948, "eval_runtime": 449.7344, "eval_samples_per_second": 6.417, "eval_steps_per_second": 0.202, "learning_rate": 0.001, "step": 10920 }, { "epoch": 40.29304029304029, "grad_norm": 0.17165251076221466, "learning_rate": 0.001, "loss": 0.1577, "step": 11000 }, { "epoch": 41.0, "eval_accuracy": 0.2616077616077616, "eval_f1_macro": 0.6509894683187031, "eval_f1_micro": 0.784842032071618, "eval_loss": 0.14020991325378418, "eval_roc_auc": 0.8491298897174419, "eval_runtime": 442.6364, "eval_samples_per_second": 6.52, "eval_steps_per_second": 0.206, "learning_rate": 0.001, "step": 11193 }, { "epoch": 42.0, "eval_accuracy": 0.27165627165627165, "eval_f1_macro": 0.6608924914997423, "eval_f1_micro": 0.7927685516081564, "eval_loss": 0.14042720198631287, "eval_roc_auc": 0.8624648827798459, "eval_runtime": 447.1222, "eval_samples_per_second": 6.455, "eval_steps_per_second": 0.204, "learning_rate": 0.001, "step": 11466 }, { "epoch": 42.124542124542124, "grad_norm": 0.16945631802082062, "learning_rate": 0.001, "loss": 0.1576, "step": 11500 }, { "epoch": 43.0, "eval_accuracy": 0.2695772695772696, "eval_f1_macro": 0.6427022769326964, "eval_f1_micro": 0.7930726352070125, "eval_loss": 0.13943640887737274, "eval_roc_auc": 0.8592814088524369, "eval_runtime": 443.0609, "eval_samples_per_second": 6.514, "eval_steps_per_second": 0.205, "learning_rate": 0.001, "step": 11739 }, { "epoch": 43.956043956043956, "grad_norm": 0.17723415791988373, "learning_rate": 0.0001, "loss": 0.1543, "step": 12000 }, { "epoch": 44.0, "eval_accuracy": 0.27546777546777546, "eval_f1_macro": 0.6567716426576066, "eval_f1_micro": 0.7989137353078458, "eval_loss": 0.1367315948009491, "eval_roc_auc": 0.8632369250728903, "eval_runtime": 444.9892, "eval_samples_per_second": 6.486, "eval_steps_per_second": 0.204, "learning_rate": 0.0001, "step": 12012 }, { "epoch": 45.0, "eval_accuracy": 0.28274428274428276, "eval_f1_macro": 0.6686203083248894, "eval_f1_micro": 0.8018308187828446, "eval_loss": 0.13616175949573517, "eval_roc_auc": 0.8651714707596159, "eval_runtime": 435.6671, "eval_samples_per_second": 6.624, "eval_steps_per_second": 0.209, "learning_rate": 0.0001, "step": 12285 }, { "epoch": 45.78754578754579, "grad_norm": 0.15646368265151978, "learning_rate": 0.0001, "loss": 0.1481, "step": 12500 }, { "epoch": 46.0, "eval_accuracy": 0.2851697851697852, "eval_f1_macro": 0.6640104860714046, "eval_f1_micro": 0.8021852369457503, "eval_loss": 0.13375289738178253, "eval_roc_auc": 0.8655685984983028, "eval_runtime": 439.641, "eval_samples_per_second": 6.564, "eval_steps_per_second": 0.207, "learning_rate": 0.0001, "step": 12558 }, { "epoch": 47.0, "eval_accuracy": 0.2785862785862786, "eval_f1_macro": 0.65726703563479, "eval_f1_micro": 0.7998804746862461, "eval_loss": 0.14095526933670044, "eval_roc_auc": 0.8620771714997334, "eval_runtime": 432.1404, "eval_samples_per_second": 6.678, "eval_steps_per_second": 0.211, "learning_rate": 0.0001, "step": 12831 }, { "epoch": 47.61904761904762, "grad_norm": 0.14561912417411804, "learning_rate": 0.0001, "loss": 0.1472, "step": 13000 }, { "epoch": 48.0, "eval_accuracy": 0.28482328482328484, "eval_f1_macro": 0.6728387979723557, "eval_f1_micro": 0.8044442566853957, "eval_loss": 0.13375185430049896, "eval_roc_auc": 0.8674991066436737, "eval_runtime": 444.9787, "eval_samples_per_second": 6.486, "eval_steps_per_second": 0.205, "learning_rate": 0.0001, "step": 13104 }, { "epoch": 49.0, "eval_accuracy": 0.2855162855162855, "eval_f1_macro": 0.674164075762875, "eval_f1_micro": 0.8058309037900874, "eval_loss": 0.13221527636051178, "eval_roc_auc": 0.8723556652741397, "eval_runtime": 443.2861, "eval_samples_per_second": 6.51, "eval_steps_per_second": 0.205, "learning_rate": 0.0001, "step": 13377 }, { "epoch": 49.45054945054945, "grad_norm": 0.18880312144756317, "learning_rate": 0.0001, "loss": 0.1448, "step": 13500 }, { "epoch": 50.0, "eval_accuracy": 0.28967428967428965, "eval_f1_macro": 0.6738599949249782, "eval_f1_micro": 0.8062985513331933, "eval_loss": 0.13315953314304352, "eval_roc_auc": 0.8702548292213903, "eval_runtime": 434.9136, "eval_samples_per_second": 6.636, "eval_steps_per_second": 0.209, "learning_rate": 0.0001, "step": 13650 }, { "epoch": 51.0, "eval_accuracy": 0.28967428967428965, "eval_f1_macro": 0.6770873238469556, "eval_f1_micro": 0.8062836021505377, "eval_loss": 0.13057135045528412, "eval_roc_auc": 0.8701618987014408, "eval_runtime": 431.3628, "eval_samples_per_second": 6.69, "eval_steps_per_second": 0.211, "learning_rate": 0.0001, "step": 13923 }, { "epoch": 51.282051282051285, "grad_norm": 0.17863284051418304, "learning_rate": 0.0001, "loss": 0.1432, "step": 14000 }, { "epoch": 52.0, "eval_accuracy": 0.2872487872487873, "eval_f1_macro": 0.6726562275384118, "eval_f1_micro": 0.8043922369765066, "eval_loss": 0.13108478486537933, "eval_roc_auc": 0.8653604706190395, "eval_runtime": 432.9945, "eval_samples_per_second": 6.665, "eval_steps_per_second": 0.21, "learning_rate": 0.0001, "step": 14196 }, { "epoch": 53.0, "eval_accuracy": 0.2872487872487873, "eval_f1_macro": 0.6702824874792834, "eval_f1_micro": 0.8070734160241367, "eval_loss": 0.13161474466323853, "eval_roc_auc": 0.8712745968092424, "eval_runtime": 437.0691, "eval_samples_per_second": 6.603, "eval_steps_per_second": 0.208, "learning_rate": 0.0001, "step": 14469 }, { "epoch": 53.11355311355312, "grad_norm": 0.15456052124500275, "learning_rate": 0.0001, "loss": 0.1438, "step": 14500 }, { "epoch": 54.0, "eval_accuracy": 0.2882882882882883, "eval_f1_macro": 0.6787531928667037, "eval_f1_micro": 0.8064162093710426, "eval_loss": 0.1315840184688568, "eval_roc_auc": 0.8688063579069815, "eval_runtime": 432.9273, "eval_samples_per_second": 6.666, "eval_steps_per_second": 0.21, "learning_rate": 0.0001, "step": 14742 }, { "epoch": 54.94505494505494, "grad_norm": 0.20175035297870636, "learning_rate": 0.0001, "loss": 0.1417, "step": 15000 }, { "epoch": 55.0, "eval_accuracy": 0.2875952875952876, "eval_f1_macro": 0.6698514928377199, "eval_f1_micro": 0.8061478697800111, "eval_loss": 0.13084293901920319, "eval_roc_auc": 0.8685749571564548, "eval_runtime": 434.5176, "eval_samples_per_second": 6.642, "eval_steps_per_second": 0.209, "learning_rate": 0.0001, "step": 15015 }, { "epoch": 56.0, "eval_accuracy": 0.29417879417879417, "eval_f1_macro": 0.6799502024965028, "eval_f1_micro": 0.8094286190238215, "eval_loss": 0.12969879806041718, "eval_roc_auc": 0.8743735506433774, "eval_runtime": 432.6673, "eval_samples_per_second": 6.67, "eval_steps_per_second": 0.21, "learning_rate": 0.0001, "step": 15288 }, { "epoch": 56.776556776556774, "grad_norm": 0.2052290290594101, "learning_rate": 0.0001, "loss": 0.1415, "step": 15500 }, { "epoch": 57.0, "eval_accuracy": 0.2934857934857935, "eval_f1_macro": 0.6716759101412201, "eval_f1_micro": 0.8086806577785254, "eval_loss": 0.1296372264623642, "eval_roc_auc": 0.8711020824592034, "eval_runtime": 430.2029, "eval_samples_per_second": 6.708, "eval_steps_per_second": 0.212, "learning_rate": 0.0001, "step": 15561 }, { "epoch": 58.0, "eval_accuracy": 0.29244629244629244, "eval_f1_macro": 0.6784509633805341, "eval_f1_micro": 0.8068982880161129, "eval_loss": 0.12973745167255402, "eval_roc_auc": 0.8707904271906546, "eval_runtime": 437.8438, "eval_samples_per_second": 6.591, "eval_steps_per_second": 0.208, "learning_rate": 0.0001, "step": 15834 }, { "epoch": 58.608058608058606, "grad_norm": 0.18146342039108276, "learning_rate": 0.0001, "loss": 0.1413, "step": 16000 }, { "epoch": 59.0, "eval_accuracy": 0.2910602910602911, "eval_f1_macro": 0.6811347101829983, "eval_f1_micro": 0.8087436297013858, "eval_loss": 0.12995606660842896, "eval_roc_auc": 0.8707232376735605, "eval_runtime": 440.1545, "eval_samples_per_second": 6.557, "eval_steps_per_second": 0.207, "learning_rate": 0.0001, "step": 16107 }, { "epoch": 60.0, "eval_accuracy": 0.28794178794178793, "eval_f1_macro": 0.6725887638706813, "eval_f1_micro": 0.8056052474657126, "eval_loss": 0.13024823367595673, "eval_roc_auc": 0.8658006699367622, "eval_runtime": 442.8738, "eval_samples_per_second": 6.517, "eval_steps_per_second": 0.205, "learning_rate": 0.0001, "step": 16380 }, { "epoch": 60.43956043956044, "grad_norm": 0.18909117579460144, "learning_rate": 0.0001, "loss": 0.1404, "step": 16500 }, { "epoch": 61.0, "eval_accuracy": 0.2948717948717949, "eval_f1_macro": 0.6842961167409227, "eval_f1_micro": 0.8095537925534148, "eval_loss": 0.12872998416423798, "eval_roc_auc": 0.8721413670658958, "eval_runtime": 442.546, "eval_samples_per_second": 6.521, "eval_steps_per_second": 0.206, "learning_rate": 0.0001, "step": 16653 }, { "epoch": 62.0, "eval_accuracy": 0.29002079002079, "eval_f1_macro": 0.6821531683206365, "eval_f1_micro": 0.8079526226734349, "eval_loss": 0.12909561395645142, "eval_roc_auc": 0.8690393280672706, "eval_runtime": 439.5646, "eval_samples_per_second": 6.566, "eval_steps_per_second": 0.207, "learning_rate": 0.0001, "step": 16926 }, { "epoch": 62.27106227106227, "grad_norm": 0.18229062855243683, "learning_rate": 0.0001, "loss": 0.1393, "step": 17000 }, { "epoch": 63.0, "eval_accuracy": 0.29799029799029797, "eval_f1_macro": 0.6812919501021206, "eval_f1_micro": 0.8075538806791719, "eval_loss": 0.12872986495494843, "eval_roc_auc": 0.8685427641356871, "eval_runtime": 439.5217, "eval_samples_per_second": 6.566, "eval_steps_per_second": 0.207, "learning_rate": 0.0001, "step": 17199 }, { "epoch": 64.0, "eval_accuracy": 0.2959112959112959, "eval_f1_macro": 0.6805602232602442, "eval_f1_micro": 0.8090726144558109, "eval_loss": 0.12864243984222412, "eval_roc_auc": 0.8722296430115927, "eval_runtime": 436.4112, "eval_samples_per_second": 6.613, "eval_steps_per_second": 0.209, "learning_rate": 0.0001, "step": 17472 }, { "epoch": 64.1025641025641, "grad_norm": 0.2255202978849411, "learning_rate": 0.0001, "loss": 0.1395, "step": 17500 }, { "epoch": 65.0, "eval_accuracy": 0.29313929313929316, "eval_f1_macro": 0.6837997472607307, "eval_f1_micro": 0.809268560334276, "eval_loss": 0.12800218164920807, "eval_roc_auc": 0.8704023269115411, "eval_runtime": 436.6673, "eval_samples_per_second": 6.609, "eval_steps_per_second": 0.208, "learning_rate": 0.0001, "step": 17745 }, { "epoch": 65.93406593406593, "grad_norm": 0.20691530406475067, "learning_rate": 0.0001, "loss": 0.1389, "step": 18000 }, { "epoch": 66.0, "eval_accuracy": 0.2959112959112959, "eval_f1_macro": 0.685457875933014, "eval_f1_micro": 0.8107521495951249, "eval_loss": 0.12777170538902283, "eval_roc_auc": 0.8744479165213509, "eval_runtime": 443.2239, "eval_samples_per_second": 6.511, "eval_steps_per_second": 0.205, "learning_rate": 0.0001, "step": 18018 }, { "epoch": 67.0, "eval_accuracy": 0.2948717948717949, "eval_f1_macro": 0.6849396578990685, "eval_f1_micro": 0.8098450774612694, "eval_loss": 0.12816764414310455, "eval_roc_auc": 0.874604334361006, "eval_runtime": 435.2924, "eval_samples_per_second": 6.63, "eval_steps_per_second": 0.209, "learning_rate": 0.0001, "step": 18291 }, { "epoch": 67.76556776556777, "grad_norm": 0.23671405017375946, "learning_rate": 0.0001, "loss": 0.1376, "step": 18500 }, { "epoch": 68.0, "eval_accuracy": 0.29799029799029797, "eval_f1_macro": 0.6903099963278952, "eval_f1_micro": 0.8123470107455503, "eval_loss": 0.12804801762104034, "eval_roc_auc": 0.8771048199390693, "eval_runtime": 435.6555, "eval_samples_per_second": 6.625, "eval_steps_per_second": 0.209, "learning_rate": 0.0001, "step": 18564 }, { "epoch": 69.0, "eval_accuracy": 0.29521829521829523, "eval_f1_macro": 0.6800351861453543, "eval_f1_micro": 0.8104663431103608, "eval_loss": 0.12803924083709717, "eval_roc_auc": 0.8710816667185332, "eval_runtime": 435.4705, "eval_samples_per_second": 6.627, "eval_steps_per_second": 0.209, "learning_rate": 0.0001, "step": 18837 }, { "epoch": 69.59706959706959, "grad_norm": 0.19641809165477753, "learning_rate": 0.0001, "loss": 0.1375, "step": 19000 }, { "epoch": 70.0, "eval_accuracy": 0.29313929313929316, "eval_f1_macro": 0.684802818649885, "eval_f1_micro": 0.8096462751380749, "eval_loss": 0.12764029204845428, "eval_roc_auc": 0.8709255305678097, "eval_runtime": 438.0603, "eval_samples_per_second": 6.588, "eval_steps_per_second": 0.208, "learning_rate": 0.0001, "step": 19110 }, { "epoch": 71.0, "eval_accuracy": 0.29036729036729036, "eval_f1_macro": 0.6796736257485385, "eval_f1_micro": 0.8072724183339705, "eval_loss": 0.12794704735279083, "eval_roc_auc": 0.8674678659997703, "eval_runtime": 438.6839, "eval_samples_per_second": 6.579, "eval_steps_per_second": 0.207, "learning_rate": 0.0001, "step": 19383 }, { "epoch": 71.42857142857143, "grad_norm": 0.23725061118602753, "learning_rate": 0.0001, "loss": 0.1368, "step": 19500 }, { "epoch": 72.0, "eval_accuracy": 0.2938322938322938, "eval_f1_macro": 0.6802343842914587, "eval_f1_micro": 0.8102650399663442, "eval_loss": 0.12780210375785828, "eval_roc_auc": 0.8718861300662425, "eval_runtime": 436.1264, "eval_samples_per_second": 6.617, "eval_steps_per_second": 0.209, "learning_rate": 0.0001, "step": 19656 }, { "epoch": 73.0, "eval_accuracy": 0.29764379764379767, "eval_f1_macro": 0.6805723882610378, "eval_f1_micro": 0.8091473263623224, "eval_loss": 0.12723641097545624, "eval_roc_auc": 0.8683039589903839, "eval_runtime": 430.6869, "eval_samples_per_second": 6.701, "eval_steps_per_second": 0.211, "learning_rate": 0.0001, "step": 19929 }, { "epoch": 73.26007326007326, "grad_norm": 0.2105712741613388, "learning_rate": 0.0001, "loss": 0.137, "step": 20000 }, { "epoch": 74.0, "eval_accuracy": 0.2934857934857935, "eval_f1_macro": 0.6777188921642516, "eval_f1_micro": 0.8064391831142698, "eval_loss": 0.12804573774337769, "eval_roc_auc": 0.8647877433658233, "eval_runtime": 435.4406, "eval_samples_per_second": 6.628, "eval_steps_per_second": 0.209, "learning_rate": 0.0001, "step": 20202 }, { "epoch": 75.0, "eval_accuracy": 0.29244629244629244, "eval_f1_macro": 0.6885203936930924, "eval_f1_micro": 0.8109922383050138, "eval_loss": 0.1273234635591507, "eval_roc_auc": 0.8730787473480999, "eval_runtime": 433.0115, "eval_samples_per_second": 6.665, "eval_steps_per_second": 0.21, "learning_rate": 0.0001, "step": 20475 }, { "epoch": 75.0915750915751, "grad_norm": 0.24889850616455078, "learning_rate": 0.0001, "loss": 0.1367, "step": 20500 }, { "epoch": 76.0, "eval_accuracy": 0.2972972972972973, "eval_f1_macro": 0.6810578369044884, "eval_f1_micro": 0.8088975345709815, "eval_loss": 0.1272992193698883, "eval_roc_auc": 0.8696399827660029, "eval_runtime": 432.2403, "eval_samples_per_second": 6.677, "eval_steps_per_second": 0.211, "learning_rate": 0.0001, "step": 20748 }, { "epoch": 76.92307692307692, "grad_norm": 0.24539624154567719, "learning_rate": 0.0001, "loss": 0.1358, "step": 21000 }, { "epoch": 77.0, "eval_accuracy": 0.29244629244629244, "eval_f1_macro": 0.6863183190306963, "eval_f1_micro": 0.8102101349375445, "eval_loss": 0.12745273113250732, "eval_roc_auc": 0.8738921845003481, "eval_runtime": 434.2537, "eval_samples_per_second": 6.646, "eval_steps_per_second": 0.21, "learning_rate": 0.0001, "step": 21021 }, { "epoch": 78.0, "eval_accuracy": 0.2945252945252945, "eval_f1_macro": 0.6897104532016692, "eval_f1_micro": 0.8121675531914894, "eval_loss": 0.12705788016319275, "eval_roc_auc": 0.876527719800532, "eval_runtime": 444.7222, "eval_samples_per_second": 6.489, "eval_steps_per_second": 0.205, "learning_rate": 0.0001, "step": 21294 }, { "epoch": 78.75457875457876, "grad_norm": 0.23895101249217987, "learning_rate": 0.0001, "loss": 0.1352, "step": 21500 }, { "epoch": 79.0, "eval_accuracy": 0.2934857934857935, "eval_f1_macro": 0.6881838490414868, "eval_f1_micro": 0.809842452990005, "eval_loss": 0.12710121273994446, "eval_roc_auc": 0.869729211088564, "eval_runtime": 435.3871, "eval_samples_per_second": 6.629, "eval_steps_per_second": 0.209, "learning_rate": 0.0001, "step": 21567 }, { "epoch": 80.0, "eval_accuracy": 0.2983367983367983, "eval_f1_macro": 0.6914032136958002, "eval_f1_micro": 0.8123911420751431, "eval_loss": 0.12715762853622437, "eval_roc_auc": 0.8772640412542829, "eval_runtime": 435.2387, "eval_samples_per_second": 6.631, "eval_steps_per_second": 0.209, "learning_rate": 0.0001, "step": 21840 }, { "epoch": 80.58608058608058, "grad_norm": 0.28309133648872375, "learning_rate": 0.0001, "loss": 0.1353, "step": 22000 }, { "epoch": 81.0, "eval_accuracy": 0.2966042966042966, "eval_f1_macro": 0.6899389708752343, "eval_f1_micro": 0.810378232667846, "eval_loss": 0.12650521099567413, "eval_roc_auc": 0.8716067214734959, "eval_runtime": 440.2197, "eval_samples_per_second": 6.556, "eval_steps_per_second": 0.207, "learning_rate": 0.0001, "step": 22113 }, { "epoch": 82.0, "eval_accuracy": 0.29140679140679143, "eval_f1_macro": 0.6844864031747653, "eval_f1_micro": 0.8105446364138047, "eval_loss": 0.12635371088981628, "eval_roc_auc": 0.8694408286912787, "eval_runtime": 441.9602, "eval_samples_per_second": 6.53, "eval_steps_per_second": 0.206, "learning_rate": 0.0001, "step": 22386 }, { "epoch": 82.41758241758242, "grad_norm": 0.24017925560474396, "learning_rate": 0.0001, "loss": 0.1337, "step": 22500 }, { "epoch": 83.0, "eval_accuracy": 0.2934857934857935, "eval_f1_macro": 0.6832392344549459, "eval_f1_micro": 0.8099670022844573, "eval_loss": 0.1272997260093689, "eval_roc_auc": 0.8701139445142557, "eval_runtime": 442.0471, "eval_samples_per_second": 6.529, "eval_steps_per_second": 0.206, "learning_rate": 0.0001, "step": 22659 }, { "epoch": 84.0, "eval_accuracy": 0.2959112959112959, "eval_f1_macro": 0.6944491344986764, "eval_f1_micro": 0.8124478558318038, "eval_loss": 0.12640425562858582, "eval_roc_auc": 0.875556912431764, "eval_runtime": 443.3633, "eval_samples_per_second": 6.509, "eval_steps_per_second": 0.205, "learning_rate": 0.0001, "step": 22932 }, { "epoch": 84.24908424908425, "grad_norm": 0.22998856008052826, "learning_rate": 0.0001, "loss": 0.1354, "step": 23000 }, { "epoch": 85.0, "eval_accuracy": 0.2972972972972973, "eval_f1_macro": 0.6879519222426981, "eval_f1_micro": 0.812659392115055, "eval_loss": 0.12647400796413422, "eval_roc_auc": 0.8750247349709565, "eval_runtime": 442.8099, "eval_samples_per_second": 6.517, "eval_steps_per_second": 0.206, "learning_rate": 0.0001, "step": 23205 }, { "epoch": 86.0, "eval_accuracy": 0.29521829521829523, "eval_f1_macro": 0.6933253774763921, "eval_f1_micro": 0.8135877542461731, "eval_loss": 0.12585221230983734, "eval_roc_auc": 0.8746291766420319, "eval_runtime": 440.9089, "eval_samples_per_second": 6.546, "eval_steps_per_second": 0.206, "learning_rate": 0.0001, "step": 23478 }, { "epoch": 86.08058608058609, "grad_norm": 0.29592010378837585, "learning_rate": 0.0001, "loss": 0.1334, "step": 23500 }, { "epoch": 87.0, "eval_accuracy": 0.2966042966042966, "eval_f1_macro": 0.6882459007361815, "eval_f1_micro": 0.8111366966715512, "eval_loss": 0.12641744315624237, "eval_roc_auc": 0.8737689120583252, "eval_runtime": 444.5125, "eval_samples_per_second": 6.493, "eval_steps_per_second": 0.205, "learning_rate": 0.0001, "step": 23751 }, { "epoch": 87.91208791208791, "grad_norm": 0.252650648355484, "learning_rate": 0.0001, "loss": 0.1335, "step": 24000 }, { "epoch": 88.0, "eval_accuracy": 0.29902979902979904, "eval_f1_macro": 0.6859575429209334, "eval_f1_micro": 0.8126931106471816, "eval_loss": 0.1263686865568161, "eval_roc_auc": 0.8754230967754396, "eval_runtime": 448.2495, "eval_samples_per_second": 6.438, "eval_steps_per_second": 0.203, "learning_rate": 0.0001, "step": 24024 }, { "epoch": 89.0, "eval_accuracy": 0.2983367983367983, "eval_f1_macro": 0.6990366097632199, "eval_f1_micro": 0.8140188460902628, "eval_loss": 0.12690132856369019, "eval_roc_auc": 0.8791768834795075, "eval_runtime": 441.4136, "eval_samples_per_second": 6.538, "eval_steps_per_second": 0.206, "learning_rate": 0.0001, "step": 24297 }, { "epoch": 89.74358974358974, "grad_norm": 0.2610660791397095, "learning_rate": 0.0001, "loss": 0.1332, "step": 24500 }, { "epoch": 90.0, "eval_accuracy": 0.29799029799029797, "eval_f1_macro": 0.6994167448254883, "eval_f1_micro": 0.8155163144617673, "eval_loss": 0.12612390518188477, "eval_roc_auc": 0.8798283494960182, "eval_runtime": 445.6794, "eval_samples_per_second": 6.476, "eval_steps_per_second": 0.204, "learning_rate": 0.0001, "step": 24570 }, { "epoch": 91.0, "eval_accuracy": 0.28932778932778935, "eval_f1_macro": 0.6827913109763548, "eval_f1_micro": 0.8108811552831535, "eval_loss": 0.1268243044614792, "eval_roc_auc": 0.8728467295646753, "eval_runtime": 442.1487, "eval_samples_per_second": 6.527, "eval_steps_per_second": 0.206, "learning_rate": 0.0001, "step": 24843 }, { "epoch": 91.57509157509158, "grad_norm": 0.28306326270103455, "learning_rate": 0.0001, "loss": 0.1326, "step": 25000 }, { "epoch": 92.0, "eval_accuracy": 0.29521829521829523, "eval_f1_macro": 0.6858483939371968, "eval_f1_micro": 0.8123787840458724, "eval_loss": 0.12613284587860107, "eval_roc_auc": 0.8724090520335794, "eval_runtime": 447.7751, "eval_samples_per_second": 6.445, "eval_steps_per_second": 0.203, "learning_rate": 0.0001, "step": 25116 }, { "epoch": 93.0, "eval_accuracy": 0.2966042966042966, "eval_f1_macro": 0.6897216822080747, "eval_f1_micro": 0.8138213420238991, "eval_loss": 0.1258293092250824, "eval_roc_auc": 0.8758502847472687, "eval_runtime": 442.7745, "eval_samples_per_second": 6.518, "eval_steps_per_second": 0.206, "learning_rate": 1e-05, "step": 25389 }, { "epoch": 93.4065934065934, "grad_norm": 0.36196333169937134, "learning_rate": 1e-05, "loss": 0.132, "step": 25500 }, { "epoch": 94.0, "eval_accuracy": 0.29764379764379767, "eval_f1_macro": 0.6940665827082791, "eval_f1_micro": 0.8137706015226304, "eval_loss": 0.12682591378688812, "eval_roc_auc": 0.8754785626674707, "eval_runtime": 447.9681, "eval_samples_per_second": 6.442, "eval_steps_per_second": 0.203, "learning_rate": 1e-05, "step": 25662 }, { "epoch": 95.0, "eval_accuracy": 0.2948717948717949, "eval_f1_macro": 0.6913394393323408, "eval_f1_micro": 0.8133975298304374, "eval_loss": 0.1256789118051529, "eval_roc_auc": 0.8750151441335194, "eval_runtime": 494.4109, "eval_samples_per_second": 5.837, "eval_steps_per_second": 0.184, "learning_rate": 1e-05, "step": 25935 }, { "epoch": 95.23809523809524, "grad_norm": 0.28360626101493835, "learning_rate": 1e-05, "loss": 0.1294, "step": 26000 }, { "epoch": 96.0, "eval_accuracy": 0.29764379764379767, "eval_f1_macro": 0.6957055849225957, "eval_f1_micro": 0.8147281313996739, "eval_loss": 0.12587758898735046, "eval_roc_auc": 0.876265078406102, "eval_runtime": 456.43, "eval_samples_per_second": 6.323, "eval_steps_per_second": 0.199, "learning_rate": 1e-05, "step": 26208 }, { "epoch": 97.0, "eval_accuracy": 0.2945252945252945, "eval_f1_macro": 0.6940781337907567, "eval_f1_micro": 0.8126029480086159, "eval_loss": 0.1256256103515625, "eval_roc_auc": 0.8720295902150387, "eval_runtime": 440.8704, "eval_samples_per_second": 6.546, "eval_steps_per_second": 0.206, "learning_rate": 1e-05, "step": 26481 }, { "epoch": 97.06959706959707, "grad_norm": 0.29758304357528687, "learning_rate": 1e-05, "loss": 0.1302, "step": 26500 }, { "epoch": 98.0, "eval_accuracy": 0.2993762993762994, "eval_f1_macro": 0.6951390304078455, "eval_f1_micro": 0.8158955813276801, "eval_loss": 0.1253080666065216, "eval_roc_auc": 0.8785118427392398, "eval_runtime": 432.3413, "eval_samples_per_second": 6.675, "eval_steps_per_second": 0.21, "learning_rate": 1e-05, "step": 26754 }, { "epoch": 98.9010989010989, "grad_norm": 0.3460623621940613, "learning_rate": 1e-05, "loss": 0.1298, "step": 27000 }, { "epoch": 99.0, "eval_accuracy": 0.2993762993762994, "eval_f1_macro": 0.6968244403216463, "eval_f1_micro": 0.8141971169963125, "eval_loss": 0.12485036998987198, "eval_roc_auc": 0.8751979200089282, "eval_runtime": 440.8458, "eval_samples_per_second": 6.547, "eval_steps_per_second": 0.206, "learning_rate": 1e-05, "step": 27027 }, { "epoch": 100.0, "eval_accuracy": 0.2972972972972973, "eval_f1_macro": 0.693647218520028, "eval_f1_micro": 0.8134507606084869, "eval_loss": 0.12519583106040955, "eval_roc_auc": 0.8731552996687338, "eval_runtime": 426.3213, "eval_samples_per_second": 6.77, "eval_steps_per_second": 0.213, "learning_rate": 1e-05, "step": 27300 }, { "epoch": 100.73260073260073, "grad_norm": 0.2845664918422699, "learning_rate": 1e-05, "loss": 0.1304, "step": 27500 }, { "epoch": 101.0, "eval_accuracy": 0.29902979902979904, "eval_f1_macro": 0.6961023046950545, "eval_f1_micro": 0.8148550421923302, "eval_loss": 0.12475299090147018, "eval_roc_auc": 0.8764704564648416, "eval_runtime": 432.286, "eval_samples_per_second": 6.676, "eval_steps_per_second": 0.211, "learning_rate": 1e-05, "step": 27573 }, { "epoch": 102.0, "eval_accuracy": 0.29625779625779625, "eval_f1_macro": 0.692743439816851, "eval_f1_micro": 0.81366198367965, "eval_loss": 0.12659381330013275, "eval_roc_auc": 0.8737986906025352, "eval_runtime": 429.8933, "eval_samples_per_second": 6.713, "eval_steps_per_second": 0.212, "learning_rate": 1e-05, "step": 27846 }, { "epoch": 102.56410256410257, "grad_norm": 0.276334673166275, "learning_rate": 1e-05, "loss": 0.1287, "step": 28000 }, { "epoch": 103.0, "eval_accuracy": 0.29902979902979904, "eval_f1_macro": 0.6954353634647259, "eval_f1_micro": 0.8146347596496376, "eval_loss": 0.124935083091259, "eval_roc_auc": 0.8754020640018715, "eval_runtime": 433.4496, "eval_samples_per_second": 6.658, "eval_steps_per_second": 0.21, "learning_rate": 1e-05, "step": 28119 }, { "epoch": 104.0, "eval_accuracy": 0.29764379764379767, "eval_f1_macro": 0.692659001716947, "eval_f1_micro": 0.8148796863922599, "eval_loss": 0.12519653141498566, "eval_roc_auc": 0.8769883885139594, "eval_runtime": 435.0406, "eval_samples_per_second": 6.634, "eval_steps_per_second": 0.209, "learning_rate": 1e-05, "step": 28392 }, { "epoch": 104.3956043956044, "grad_norm": 0.2987622320652008, "learning_rate": 1e-05, "loss": 0.1282, "step": 28500 }, { "epoch": 105.0, "eval_accuracy": 0.29902979902979904, "eval_f1_macro": 0.6961790886935857, "eval_f1_micro": 0.8152223750573132, "eval_loss": 0.12513257563114166, "eval_roc_auc": 0.8773257311303291, "eval_runtime": 430.7348, "eval_samples_per_second": 6.7, "eval_steps_per_second": 0.211, "learning_rate": 1e-05, "step": 28665 }, { "epoch": 106.0, "eval_accuracy": 0.29972279972279975, "eval_f1_macro": 0.6963861386142265, "eval_f1_micro": 0.8147252563995664, "eval_loss": 0.12511174380779266, "eval_roc_auc": 0.8769890055468574, "eval_runtime": 434.1489, "eval_samples_per_second": 6.647, "eval_steps_per_second": 0.21, "learning_rate": 1e-05, "step": 28938 }, { "epoch": 106.22710622710623, "grad_norm": 0.2862643301486969, "learning_rate": 1e-05, "loss": 0.1293, "step": 29000 }, { "epoch": 107.0, "eval_accuracy": 0.29799029799029797, "eval_f1_macro": 0.694620567930595, "eval_f1_micro": 0.8144894800685992, "eval_loss": 0.12498941272497177, "eval_roc_auc": 0.8758874676862554, "eval_runtime": 435.6169, "eval_samples_per_second": 6.625, "eval_steps_per_second": 0.209, "learning_rate": 1e-05, "step": 29211 }, { "epoch": 108.0, "eval_accuracy": 0.29972279972279975, "eval_f1_macro": 0.6934713387989168, "eval_f1_micro": 0.8144792584203683, "eval_loss": 0.1248873621225357, "eval_roc_auc": 0.8750706100255504, "eval_runtime": 426.658, "eval_samples_per_second": 6.764, "eval_steps_per_second": 0.213, "learning_rate": 1.0000000000000002e-06, "step": 29484 }, { "epoch": 108.05860805860806, "grad_norm": 0.2752939760684967, "learning_rate": 1.0000000000000002e-06, "loss": 0.129, "step": 29500 }, { "epoch": 109.0, "eval_accuracy": 0.29521829521829523, "eval_f1_macro": 0.6900779361953018, "eval_f1_micro": 0.8116150302210575, "eval_loss": 0.12527066469192505, "eval_roc_auc": 0.8712697142010926, "eval_runtime": 434.9611, "eval_samples_per_second": 6.635, "eval_steps_per_second": 0.209, "learning_rate": 1.0000000000000002e-06, "step": 29757 }, { "epoch": 109.89010989010988, "grad_norm": 0.2989753484725952, "learning_rate": 1.0000000000000002e-06, "loss": 0.1293, "step": 30000 }, { "epoch": 110.0, "eval_accuracy": 0.29799029799029797, "eval_f1_macro": 0.69491512245201, "eval_f1_micro": 0.8143917285082964, "eval_loss": 0.125152125954628, "eval_roc_auc": 0.8768043785727546, "eval_runtime": 434.3393, "eval_samples_per_second": 6.645, "eval_steps_per_second": 0.21, "learning_rate": 1.0000000000000002e-06, "step": 30030 }, { "epoch": 111.0, "eval_accuracy": 0.2983367983367983, "eval_f1_macro": 0.6932228755688746, "eval_f1_micro": 0.8137025263510123, "eval_loss": 0.12495684623718262, "eval_roc_auc": 0.8754655513215771, "eval_runtime": 431.8608, "eval_samples_per_second": 6.683, "eval_steps_per_second": 0.211, "learning_rate": 1.0000000000000002e-06, "step": 30303 }, { "epoch": 111.0, "learning_rate": 1.0000000000000002e-06, "step": 30303, "total_flos": 4.1153080208666034e+20, "train_loss": 0.14764341744551096, "train_runtime": 198162.6547, "train_samples_per_second": 6.598, "train_steps_per_second": 0.207 } ], "logging_steps": 500, "max_steps": 40950, "num_input_tokens_seen": 0, "num_train_epochs": 150, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 10, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.1153080208666034e+20, "train_batch_size": 32, "trial_name": null, "trial_params": null }