{ "best_metric": 0.1244530975818634, "best_model_checkpoint": "/home1/datawork/mcontini/models/multilabel/huggingface/dinov2-large-2024_05_27-_batch-size32_epochs150_freeze/checkpoint-24570", "epoch": 100.0, "eval_steps": 500, "global_step": 27300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.232726022688209, "eval_f1_macro": 0.4712591871520079, "eval_f1_micro": 0.7380235658381353, "eval_loss": 0.1701383888721466, "eval_roc_auc": 0.8226166045759734, "eval_runtime": 579.6646, "eval_samples_per_second": 5.018, "eval_steps_per_second": 0.157, "learning_rate": 0.001, "step": 273 }, { "epoch": 1.8315018315018317, "grad_norm": 0.3410358726978302, "learning_rate": 0.001, "loss": 0.2748, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.24888277758679958, "eval_f1_macro": 0.5722657636852799, "eval_f1_micro": 0.7568708574323469, "eval_loss": 0.15890291333198547, "eval_roc_auc": 0.8372263575023127, "eval_runtime": 572.4604, "eval_samples_per_second": 5.082, "eval_steps_per_second": 0.159, "learning_rate": 0.001, "step": 546 }, { "epoch": 3.0, "eval_accuracy": 0.24785149535922998, "eval_f1_macro": 0.6104117366005594, "eval_f1_micro": 0.7723932964583505, "eval_loss": 0.15134122967720032, "eval_roc_auc": 0.851560035472562, "eval_runtime": 581.798, "eval_samples_per_second": 5.0, "eval_steps_per_second": 0.156, "learning_rate": 0.001, "step": 819 }, { "epoch": 3.663003663003663, "grad_norm": 0.28447234630584717, "learning_rate": 0.001, "loss": 0.1714, "step": 1000 }, { "epoch": 4.0, "eval_accuracy": 0.24853901684427637, "eval_f1_macro": 0.599745200497783, "eval_f1_micro": 0.7608496532472631, "eval_loss": 0.15164224803447723, "eval_roc_auc": 0.8311391237980399, "eval_runtime": 574.55, "eval_samples_per_second": 5.063, "eval_steps_per_second": 0.158, "learning_rate": 0.001, "step": 1092 }, { "epoch": 5.0, "eval_accuracy": 0.24750773461670678, "eval_f1_macro": 0.5935106518877853, "eval_f1_micro": 0.7692371752165224, "eval_loss": 0.15243493020534515, "eval_roc_auc": 0.8475084947536117, "eval_runtime": 565.8483, "eval_samples_per_second": 5.141, "eval_steps_per_second": 0.161, "learning_rate": 0.001, "step": 1365 }, { "epoch": 5.4945054945054945, "grad_norm": 0.32554125785827637, "learning_rate": 0.001, "loss": 0.1661, "step": 1500 }, { "epoch": 6.0, "eval_accuracy": 0.24269508422138192, "eval_f1_macro": 0.613788061665736, "eval_f1_micro": 0.7718080548414739, "eval_loss": 0.14673969149589539, "eval_roc_auc": 0.8414237469351327, "eval_runtime": 563.6053, "eval_samples_per_second": 5.161, "eval_steps_per_second": 0.161, "learning_rate": 0.001, "step": 1638 }, { "epoch": 7.0, "eval_accuracy": 0.2509453420419388, "eval_f1_macro": 0.6309898748773293, "eval_f1_micro": 0.7732481363152289, "eval_loss": 0.1506606936454773, "eval_roc_auc": 0.8436874105670831, "eval_runtime": 563.2783, "eval_samples_per_second": 5.164, "eval_steps_per_second": 0.162, "learning_rate": 0.001, "step": 1911 }, { "epoch": 7.326007326007326, "grad_norm": 0.22656740248203278, "learning_rate": 0.001, "loss": 0.1611, "step": 2000 }, { "epoch": 8.0, "eval_accuracy": 0.2609144035751117, "eval_f1_macro": 0.6403740520777896, "eval_f1_micro": 0.7828014555188422, "eval_loss": 0.14430351555347443, "eval_roc_auc": 0.8536999365685817, "eval_runtime": 570.8881, "eval_samples_per_second": 5.096, "eval_steps_per_second": 0.159, "learning_rate": 0.001, "step": 2184 }, { "epoch": 9.0, "eval_accuracy": 0.25128910278446204, "eval_f1_macro": 0.6366498775226099, "eval_f1_micro": 0.781416038551835, "eval_loss": 0.14617429673671722, "eval_roc_auc": 0.8571263621918263, "eval_runtime": 575.7471, "eval_samples_per_second": 5.053, "eval_steps_per_second": 0.158, "learning_rate": 0.001, "step": 2457 }, { "epoch": 9.157509157509157, "grad_norm": 0.1823957860469818, "learning_rate": 0.001, "loss": 0.1606, "step": 2500 }, { "epoch": 10.0, "eval_accuracy": 0.2688209006531454, "eval_f1_macro": 0.6254472467805158, "eval_f1_micro": 0.7794745970641737, "eval_loss": 0.14414818584918976, "eval_roc_auc": 0.8488544375964642, "eval_runtime": 577.67, "eval_samples_per_second": 5.036, "eval_steps_per_second": 0.158, "learning_rate": 0.001, "step": 2730 }, { "epoch": 10.989010989010989, "grad_norm": 0.1724083572626114, "learning_rate": 0.001, "loss": 0.1592, "step": 3000 }, { "epoch": 11.0, "eval_accuracy": 0.2595393606050189, "eval_f1_macro": 0.6357434835994208, "eval_f1_micro": 0.7780349253103302, "eval_loss": 0.14589238166809082, "eval_roc_auc": 0.8494945098415877, "eval_runtime": 576.9242, "eval_samples_per_second": 5.042, "eval_steps_per_second": 0.158, "learning_rate": 0.001, "step": 3003 }, { "epoch": 12.0, "eval_accuracy": 0.2554142316947405, "eval_f1_macro": 0.638389910481932, "eval_f1_micro": 0.7823495795575149, "eval_loss": 0.14458976686000824, "eval_roc_auc": 0.8601197613283508, "eval_runtime": 579.3816, "eval_samples_per_second": 5.021, "eval_steps_per_second": 0.157, "learning_rate": 0.001, "step": 3276 }, { "epoch": 12.820512820512821, "grad_norm": 0.18148617446422577, "learning_rate": 0.001, "loss": 0.1582, "step": 3500 }, { "epoch": 13.0, "eval_accuracy": 0.2561017531797869, "eval_f1_macro": 0.6574365741219022, "eval_f1_micro": 0.786284091383703, "eval_loss": 0.14142437279224396, "eval_roc_auc": 0.8559800911193071, "eval_runtime": 580.1191, "eval_samples_per_second": 5.014, "eval_steps_per_second": 0.157, "learning_rate": 0.001, "step": 3549 }, { "epoch": 14.0, "eval_accuracy": 0.24682021313166036, "eval_f1_macro": 0.6245865910731833, "eval_f1_micro": 0.7766990291262137, "eval_loss": 0.1581379920244217, "eval_roc_auc": 0.8490862605596214, "eval_runtime": 579.658, "eval_samples_per_second": 5.018, "eval_steps_per_second": 0.157, "learning_rate": 0.001, "step": 3822 }, { "epoch": 14.652014652014651, "grad_norm": 0.17000725865364075, "learning_rate": 0.001, "loss": 0.1575, "step": 4000 }, { "epoch": 15.0, "eval_accuracy": 0.2598831213475421, "eval_f1_macro": 0.6552072842486797, "eval_f1_micro": 0.7859620485615181, "eval_loss": 0.1447945237159729, "eval_roc_auc": 0.8622108111394943, "eval_runtime": 578.0771, "eval_samples_per_second": 5.032, "eval_steps_per_second": 0.157, "learning_rate": 0.001, "step": 4095 }, { "epoch": 16.0, "eval_accuracy": 0.2605706428325885, "eval_f1_macro": 0.6495757946819554, "eval_f1_micro": 0.7853051058530511, "eval_loss": 0.1438169628381729, "eval_roc_auc": 0.8571207689487904, "eval_runtime": 577.2113, "eval_samples_per_second": 5.04, "eval_steps_per_second": 0.158, "learning_rate": 0.001, "step": 4368 }, { "epoch": 16.483516483516482, "grad_norm": 0.16623707115650177, "learning_rate": 0.001, "loss": 0.158, "step": 4500 }, { "epoch": 17.0, "eval_accuracy": 0.2506015812994156, "eval_f1_macro": 0.6310969679900952, "eval_f1_micro": 0.7824457675812967, "eval_loss": 0.14359386265277863, "eval_roc_auc": 0.8547290741339532, "eval_runtime": 583.6271, "eval_samples_per_second": 4.984, "eval_steps_per_second": 0.156, "learning_rate": 0.001, "step": 4641 }, { "epoch": 18.0, "eval_accuracy": 0.2564455139223101, "eval_f1_macro": 0.6531950395959965, "eval_f1_micro": 0.7848311343456975, "eval_loss": 0.1412857472896576, "eval_roc_auc": 0.8569977513721464, "eval_runtime": 571.2179, "eval_samples_per_second": 5.093, "eval_steps_per_second": 0.159, "learning_rate": 0.001, "step": 4914 }, { "epoch": 18.315018315018314, "grad_norm": 0.14722026884555817, "learning_rate": 0.001, "loss": 0.1571, "step": 5000 }, { "epoch": 19.0, "eval_accuracy": 0.26022688209006534, "eval_f1_macro": 0.6486819478687708, "eval_f1_micro": 0.7833830386020918, "eval_loss": 0.14079046249389648, "eval_roc_auc": 0.852859282271344, "eval_runtime": 574.5699, "eval_samples_per_second": 5.063, "eval_steps_per_second": 0.158, "learning_rate": 0.001, "step": 5187 }, { "epoch": 20.0, "eval_accuracy": 0.26400825025782054, "eval_f1_macro": 0.6262168341318395, "eval_f1_micro": 0.7775968460747342, "eval_loss": 0.14640754461288452, "eval_roc_auc": 0.8444495064377587, "eval_runtime": 579.1715, "eval_samples_per_second": 5.023, "eval_steps_per_second": 0.157, "learning_rate": 0.001, "step": 5460 }, { "epoch": 20.146520146520146, "grad_norm": 0.16411201655864716, "learning_rate": 0.001, "loss": 0.1579, "step": 5500 }, { "epoch": 21.0, "eval_accuracy": 0.2653832932279134, "eval_f1_macro": 0.6582044080070929, "eval_f1_micro": 0.7890916719110552, "eval_loss": 0.1412632316350937, "eval_roc_auc": 0.8562690934879753, "eval_runtime": 565.162, "eval_samples_per_second": 5.147, "eval_steps_per_second": 0.161, "learning_rate": 0.001, "step": 5733 }, { "epoch": 21.978021978021978, "grad_norm": 0.14830045402050018, "learning_rate": 0.001, "loss": 0.1564, "step": 6000 }, { "epoch": 22.0, "eval_accuracy": 0.2543829494671708, "eval_f1_macro": 0.6586947128782558, "eval_f1_micro": 0.7871090517954659, "eval_loss": 0.14168681204319, "eval_roc_auc": 0.8590452124964612, "eval_runtime": 558.0731, "eval_samples_per_second": 5.213, "eval_steps_per_second": 0.163, "learning_rate": 0.001, "step": 6006 }, { "epoch": 23.0, "eval_accuracy": 0.269852182880715, "eval_f1_macro": 0.6427873985434494, "eval_f1_micro": 0.7863651704353696, "eval_loss": 0.1393543779850006, "eval_roc_auc": 0.8532977785132612, "eval_runtime": 555.9424, "eval_samples_per_second": 5.233, "eval_steps_per_second": 0.164, "learning_rate": 0.001, "step": 6279 }, { "epoch": 23.80952380952381, "grad_norm": 0.1764509379863739, "learning_rate": 0.001, "loss": 0.1554, "step": 6500 }, { "epoch": 24.0, "eval_accuracy": 0.2588518391199725, "eval_f1_macro": 0.6618962794412713, "eval_f1_micro": 0.7857706852844616, "eval_loss": 0.14052371680736542, "eval_roc_auc": 0.8570680235127297, "eval_runtime": 561.2023, "eval_samples_per_second": 5.184, "eval_steps_per_second": 0.162, "learning_rate": 0.001, "step": 6552 }, { "epoch": 25.0, "eval_accuracy": 0.2653832932279134, "eval_f1_macro": 0.653320279245233, "eval_f1_micro": 0.7897693920335429, "eval_loss": 0.1392364352941513, "eval_roc_auc": 0.8568084813100703, "eval_runtime": 557.349, "eval_samples_per_second": 5.219, "eval_steps_per_second": 0.163, "learning_rate": 0.001, "step": 6825 }, { "epoch": 25.641025641025642, "grad_norm": 0.17609645426273346, "learning_rate": 0.001, "loss": 0.1554, "step": 7000 }, { "epoch": 26.0, "eval_accuracy": 0.27019594362323823, "eval_f1_macro": 0.6529431984792132, "eval_f1_micro": 0.7838044308632545, "eval_loss": 0.14239099621772766, "eval_roc_auc": 0.8503066581053809, "eval_runtime": 557.4603, "eval_samples_per_second": 5.218, "eval_steps_per_second": 0.163, "learning_rate": 0.001, "step": 7098 }, { "epoch": 27.0, "eval_accuracy": 0.2671020969405294, "eval_f1_macro": 0.6810613208979668, "eval_f1_micro": 0.7974886125815585, "eval_loss": 0.1386287957429886, "eval_roc_auc": 0.8670442949969421, "eval_runtime": 549.4206, "eval_samples_per_second": 5.295, "eval_steps_per_second": 0.166, "learning_rate": 0.001, "step": 7371 }, { "epoch": 27.47252747252747, "grad_norm": 0.1621919423341751, "learning_rate": 0.001, "loss": 0.156, "step": 7500 }, { "epoch": 28.0, "eval_accuracy": 0.2650395324853902, "eval_f1_macro": 0.6474807711800876, "eval_f1_micro": 0.7791304347826087, "eval_loss": 0.15519200265407562, "eval_roc_auc": 0.8544148384115767, "eval_runtime": 547.1571, "eval_samples_per_second": 5.317, "eval_steps_per_second": 0.166, "learning_rate": 0.001, "step": 7644 }, { "epoch": 29.0, "eval_accuracy": 0.27019594362323823, "eval_f1_macro": 0.6550381793679035, "eval_f1_micro": 0.7913651213762871, "eval_loss": 0.14190098643302917, "eval_roc_auc": 0.8614869597044164, "eval_runtime": 559.1951, "eval_samples_per_second": 5.202, "eval_steps_per_second": 0.163, "learning_rate": 0.001, "step": 7917 }, { "epoch": 29.304029304029303, "grad_norm": 0.16609038412570953, "learning_rate": 0.001, "loss": 0.1548, "step": 8000 }, { "epoch": 30.0, "eval_accuracy": 0.2767273977311791, "eval_f1_macro": 0.663185953977854, "eval_f1_micro": 0.7857173292428311, "eval_loss": 0.13986903429031372, "eval_roc_auc": 0.8502714393738423, "eval_runtime": 549.5664, "eval_samples_per_second": 5.293, "eval_steps_per_second": 0.166, "learning_rate": 0.001, "step": 8190 }, { "epoch": 31.0, "eval_accuracy": 0.27260226882090066, "eval_f1_macro": 0.6554744698272669, "eval_f1_micro": 0.7881844380403459, "eval_loss": 0.13765402138233185, "eval_roc_auc": 0.8530413436678441, "eval_runtime": 541.1802, "eval_samples_per_second": 5.375, "eval_steps_per_second": 0.168, "learning_rate": 0.001, "step": 8463 }, { "epoch": 31.135531135531135, "grad_norm": 0.15226389467716217, "learning_rate": 0.001, "loss": 0.1554, "step": 8500 }, { "epoch": 32.0, "eval_accuracy": 0.2677896184255758, "eval_f1_macro": 0.6596978272887946, "eval_f1_micro": 0.7914770376499792, "eval_loss": 0.13866138458251953, "eval_roc_auc": 0.861421701170076, "eval_runtime": 550.2376, "eval_samples_per_second": 5.287, "eval_steps_per_second": 0.165, "learning_rate": 0.001, "step": 8736 }, { "epoch": 32.967032967032964, "grad_norm": 0.15690498054027557, "learning_rate": 0.001, "loss": 0.1551, "step": 9000 }, { "epoch": 33.0, "eval_accuracy": 0.2605706428325885, "eval_f1_macro": 0.6583814800932023, "eval_f1_micro": 0.7887546855476885, "eval_loss": 0.13930276036262512, "eval_roc_auc": 0.8580923964636631, "eval_runtime": 550.269, "eval_samples_per_second": 5.287, "eval_steps_per_second": 0.165, "learning_rate": 0.001, "step": 9009 }, { "epoch": 34.0, "eval_accuracy": 0.2763836369886559, "eval_f1_macro": 0.6636727922636001, "eval_f1_micro": 0.795303262082937, "eval_loss": 0.1374826431274414, "eval_roc_auc": 0.8637208325253699, "eval_runtime": 543.5944, "eval_samples_per_second": 5.351, "eval_steps_per_second": 0.167, "learning_rate": 0.001, "step": 9282 }, { "epoch": 34.798534798534796, "grad_norm": 0.166019469499588, "learning_rate": 0.001, "loss": 0.1544, "step": 9500 }, { "epoch": 35.0, "eval_accuracy": 0.25850807837744927, "eval_f1_macro": 0.6442491093834092, "eval_f1_micro": 0.7860775988902434, "eval_loss": 0.14001137018203735, "eval_roc_auc": 0.8541119565138938, "eval_runtime": 537.9679, "eval_samples_per_second": 5.407, "eval_steps_per_second": 0.169, "learning_rate": 0.001, "step": 9555 }, { "epoch": 36.0, "eval_accuracy": 0.26916466139566864, "eval_f1_macro": 0.6541220211466265, "eval_f1_micro": 0.7890085033301218, "eval_loss": 0.13899104297161102, "eval_roc_auc": 0.856659816065419, "eval_runtime": 532.0356, "eval_samples_per_second": 5.468, "eval_steps_per_second": 0.171, "learning_rate": 0.001, "step": 9828 }, { "epoch": 36.63003663003663, "grad_norm": 0.1606360822916031, "learning_rate": 0.001, "loss": 0.1555, "step": 10000 }, { "epoch": 37.0, "eval_accuracy": 0.2667583361980062, "eval_f1_macro": 0.6602790790864311, "eval_f1_micro": 0.788356222091162, "eval_loss": 0.14101693034172058, "eval_roc_auc": 0.8547597772428587, "eval_runtime": 536.869, "eval_samples_per_second": 5.418, "eval_steps_per_second": 0.17, "learning_rate": 0.001, "step": 10101 }, { "epoch": 38.0, "eval_accuracy": 0.2633207287727741, "eval_f1_macro": 0.6508514926081754, "eval_f1_micro": 0.7864065343433915, "eval_loss": 0.13849563896656036, "eval_roc_auc": 0.8524644165509108, "eval_runtime": 538.0729, "eval_samples_per_second": 5.406, "eval_steps_per_second": 0.169, "learning_rate": 0.001, "step": 10374 }, { "epoch": 38.46153846153846, "grad_norm": 0.1459091752767563, "learning_rate": 0.001, "loss": 0.1547, "step": 10500 }, { "epoch": 39.0, "eval_accuracy": 0.26263320728772777, "eval_f1_macro": 0.6513021077089046, "eval_f1_micro": 0.7819844457738655, "eval_loss": 0.14249388873577118, "eval_roc_auc": 0.8475227906130532, "eval_runtime": 528.7471, "eval_samples_per_second": 5.502, "eval_steps_per_second": 0.172, "learning_rate": 0.001, "step": 10647 }, { "epoch": 40.0, "eval_accuracy": 0.2633207287727741, "eval_f1_macro": 0.6421624481517915, "eval_f1_micro": 0.7819497946916141, "eval_loss": 0.1512959599494934, "eval_roc_auc": 0.850003278224088, "eval_runtime": 526.9207, "eval_samples_per_second": 5.521, "eval_steps_per_second": 0.173, "learning_rate": 0.001, "step": 10920 }, { "epoch": 40.29304029304029, "grad_norm": 0.16056223213672638, "learning_rate": 0.0001, "loss": 0.1527, "step": 11000 }, { "epoch": 41.0, "eval_accuracy": 0.27157098659333107, "eval_f1_macro": 0.6708412782877394, "eval_f1_micro": 0.795353889863792, "eval_loss": 0.1416281908750534, "eval_roc_auc": 0.8608108400991562, "eval_runtime": 520.8949, "eval_samples_per_second": 5.585, "eval_steps_per_second": 0.175, "learning_rate": 0.0001, "step": 11193 }, { "epoch": 42.0, "eval_accuracy": 0.2811962873839807, "eval_f1_macro": 0.6820172839356666, "eval_f1_micro": 0.8014906832298136, "eval_loss": 0.13480685651302338, "eval_roc_auc": 0.8666871757284966, "eval_runtime": 521.536, "eval_samples_per_second": 5.578, "eval_steps_per_second": 0.174, "learning_rate": 0.0001, "step": 11466 }, { "epoch": 42.124542124542124, "grad_norm": 0.13122691214084625, "learning_rate": 0.0001, "loss": 0.1455, "step": 11500 }, { "epoch": 43.0, "eval_accuracy": 0.2756961155036095, "eval_f1_macro": 0.681931169239128, "eval_f1_micro": 0.8014919187733112, "eval_loss": 0.1342025101184845, "eval_roc_auc": 0.8664528250448902, "eval_runtime": 522.5262, "eval_samples_per_second": 5.567, "eval_steps_per_second": 0.174, "learning_rate": 0.0001, "step": 11739 }, { "epoch": 43.956043956043956, "grad_norm": 0.15952740609645844, "learning_rate": 0.0001, "loss": 0.1416, "step": 12000 }, { "epoch": 44.0, "eval_accuracy": 0.2811962873839807, "eval_f1_macro": 0.683693351140427, "eval_f1_micro": 0.8019789631231031, "eval_loss": 0.1327475756406784, "eval_roc_auc": 0.8658142028998129, "eval_runtime": 518.6048, "eval_samples_per_second": 5.609, "eval_steps_per_second": 0.175, "learning_rate": 0.0001, "step": 12012 }, { "epoch": 45.0, "eval_accuracy": 0.2811962873839807, "eval_f1_macro": 0.6900135704395078, "eval_f1_micro": 0.8049446006284108, "eval_loss": 0.1318245828151703, "eval_roc_auc": 0.8690323816564128, "eval_runtime": 519.4737, "eval_samples_per_second": 5.6, "eval_steps_per_second": 0.175, "learning_rate": 0.0001, "step": 12285 }, { "epoch": 45.78754578754579, "grad_norm": 0.18243736028671265, "learning_rate": 0.0001, "loss": 0.1402, "step": 12500 }, { "epoch": 46.0, "eval_accuracy": 0.28910278446201443, "eval_f1_macro": 0.6920134474185277, "eval_f1_micro": 0.8063969585520062, "eval_loss": 0.13027183711528778, "eval_roc_auc": 0.869961249113055, "eval_runtime": 520.8317, "eval_samples_per_second": 5.585, "eval_steps_per_second": 0.175, "learning_rate": 0.0001, "step": 12558 }, { "epoch": 47.0, "eval_accuracy": 0.284977655551736, "eval_f1_macro": 0.6938459582689339, "eval_f1_micro": 0.8065087538619978, "eval_loss": 0.12985946238040924, "eval_roc_auc": 0.8709245066726377, "eval_runtime": 520.349, "eval_samples_per_second": 5.59, "eval_steps_per_second": 0.175, "learning_rate": 0.0001, "step": 12831 }, { "epoch": 47.61904761904762, "grad_norm": 0.20866894721984863, "learning_rate": 0.0001, "loss": 0.1387, "step": 13000 }, { "epoch": 48.0, "eval_accuracy": 0.2853214162942592, "eval_f1_macro": 0.6917397436201066, "eval_f1_micro": 0.8031727379553465, "eval_loss": 0.12981055676937103, "eval_roc_auc": 0.8638141011256728, "eval_runtime": 515.7048, "eval_samples_per_second": 5.641, "eval_steps_per_second": 0.176, "learning_rate": 0.0001, "step": 13104 }, { "epoch": 49.0, "eval_accuracy": 0.2839463733241664, "eval_f1_macro": 0.6980761423122126, "eval_f1_micro": 0.8081048867699644, "eval_loss": 0.1301460713148117, "eval_roc_auc": 0.8724715693697419, "eval_runtime": 519.0148, "eval_samples_per_second": 5.605, "eval_steps_per_second": 0.175, "learning_rate": 0.0001, "step": 13377 }, { "epoch": 49.45054945054945, "grad_norm": 0.16819314658641815, "learning_rate": 0.0001, "loss": 0.1391, "step": 13500 }, { "epoch": 50.0, "eval_accuracy": 0.2829150910965968, "eval_f1_macro": 0.6968263757426811, "eval_f1_micro": 0.8056895691232739, "eval_loss": 0.1294524371623993, "eval_roc_auc": 0.8677359534045406, "eval_runtime": 516.1382, "eval_samples_per_second": 5.636, "eval_steps_per_second": 0.176, "learning_rate": 0.0001, "step": 13650 }, { "epoch": 51.0, "eval_accuracy": 0.2846338948092128, "eval_f1_macro": 0.6981227572539419, "eval_f1_micro": 0.8078541374474054, "eval_loss": 0.12989668548107147, "eval_roc_auc": 0.8712741682837021, "eval_runtime": 519.4542, "eval_samples_per_second": 5.6, "eval_steps_per_second": 0.175, "learning_rate": 0.0001, "step": 13923 }, { "epoch": 51.282051282051285, "grad_norm": 0.2057354748249054, "learning_rate": 0.0001, "loss": 0.1374, "step": 14000 }, { "epoch": 52.0, "eval_accuracy": 0.284977655551736, "eval_f1_macro": 0.7032059573412642, "eval_f1_micro": 0.809621541745341, "eval_loss": 0.13097986578941345, "eval_roc_auc": 0.8736802406111642, "eval_runtime": 517.4492, "eval_samples_per_second": 5.622, "eval_steps_per_second": 0.176, "learning_rate": 0.0001, "step": 14196 }, { "epoch": 53.0, "eval_accuracy": 0.288415262976968, "eval_f1_macro": 0.6952081515364695, "eval_f1_micro": 0.8082875892525485, "eval_loss": 0.12910524010658264, "eval_roc_auc": 0.8713581964654615, "eval_runtime": 515.3537, "eval_samples_per_second": 5.645, "eval_steps_per_second": 0.177, "learning_rate": 0.0001, "step": 14469 }, { "epoch": 53.11355311355312, "grad_norm": 0.19681566953659058, "learning_rate": 0.0001, "loss": 0.1367, "step": 14500 }, { "epoch": 54.0, "eval_accuracy": 0.2860089377793056, "eval_f1_macro": 0.6914506394370794, "eval_f1_micro": 0.8055729885778838, "eval_loss": 0.1276824176311493, "eval_roc_auc": 0.8659785304346413, "eval_runtime": 517.0408, "eval_samples_per_second": 5.626, "eval_steps_per_second": 0.176, "learning_rate": 0.0001, "step": 14742 }, { "epoch": 54.94505494505494, "grad_norm": 0.21626819670200348, "learning_rate": 0.0001, "loss": 0.1364, "step": 15000 }, { "epoch": 55.0, "eval_accuracy": 0.28979030594706084, "eval_f1_macro": 0.7051415507931676, "eval_f1_micro": 0.8091508143727464, "eval_loss": 0.12751279771327972, "eval_roc_auc": 0.8706409740591669, "eval_runtime": 516.7044, "eval_samples_per_second": 5.63, "eval_steps_per_second": 0.176, "learning_rate": 0.0001, "step": 15015 }, { "epoch": 56.0, "eval_accuracy": 0.2911653489171537, "eval_f1_macro": 0.6990943862949641, "eval_f1_micro": 0.8077718065316246, "eval_loss": 0.12798655033111572, "eval_roc_auc": 0.8706103127849166, "eval_runtime": 520.6769, "eval_samples_per_second": 5.587, "eval_steps_per_second": 0.175, "learning_rate": 0.0001, "step": 15288 }, { "epoch": 56.776556776556774, "grad_norm": 0.2569683790206909, "learning_rate": 0.0001, "loss": 0.135, "step": 15500 }, { "epoch": 57.0, "eval_accuracy": 0.29150910965967686, "eval_f1_macro": 0.7001268142729874, "eval_f1_micro": 0.8107930240210597, "eval_loss": 0.1279618740081787, "eval_roc_auc": 0.8737307073185808, "eval_runtime": 522.0888, "eval_samples_per_second": 5.572, "eval_steps_per_second": 0.174, "learning_rate": 0.0001, "step": 15561 }, { "epoch": 58.0, "eval_accuracy": 0.290134066689584, "eval_f1_macro": 0.7039327958876614, "eval_f1_micro": 0.8108946874106743, "eval_loss": 0.1280883550643921, "eval_roc_auc": 0.8759047076910806, "eval_runtime": 524.5592, "eval_samples_per_second": 5.546, "eval_steps_per_second": 0.173, "learning_rate": 0.0001, "step": 15834 }, { "epoch": 58.608058608058606, "grad_norm": 0.209602490067482, "learning_rate": 0.0001, "loss": 0.1345, "step": 16000 }, { "epoch": 59.0, "eval_accuracy": 0.2873839807493984, "eval_f1_macro": 0.699653006099352, "eval_f1_micro": 0.8071845383437488, "eval_loss": 0.1287168562412262, "eval_roc_auc": 0.8699666770335968, "eval_runtime": 523.8253, "eval_samples_per_second": 5.553, "eval_steps_per_second": 0.174, "learning_rate": 0.0001, "step": 16107 }, { "epoch": 60.0, "eval_accuracy": 0.28875902371949125, "eval_f1_macro": 0.7042073996338176, "eval_f1_micro": 0.8103491168421926, "eval_loss": 0.1270500272512436, "eval_roc_auc": 0.8711666058400855, "eval_runtime": 535.2866, "eval_samples_per_second": 5.434, "eval_steps_per_second": 0.17, "learning_rate": 0.0001, "step": 16380 }, { "epoch": 60.43956043956044, "grad_norm": 0.2388932704925537, "learning_rate": 0.0001, "loss": 0.134, "step": 16500 }, { "epoch": 61.0, "eval_accuracy": 0.28944654520453766, "eval_f1_macro": 0.6994480698947442, "eval_f1_micro": 0.8072888368788399, "eval_loss": 0.1269637793302536, "eval_roc_auc": 0.8672195202075096, "eval_runtime": 529.882, "eval_samples_per_second": 5.49, "eval_steps_per_second": 0.172, "learning_rate": 0.0001, "step": 16653 }, { "epoch": 62.0, "eval_accuracy": 0.28979030594706084, "eval_f1_macro": 0.7105518005302388, "eval_f1_micro": 0.8124407826982492, "eval_loss": 0.12639474868774414, "eval_roc_auc": 0.8741971299571496, "eval_runtime": 525.1851, "eval_samples_per_second": 5.539, "eval_steps_per_second": 0.173, "learning_rate": 0.0001, "step": 16926 }, { "epoch": 62.27106227106227, "grad_norm": 0.2082633525133133, "learning_rate": 0.0001, "loss": 0.1331, "step": 17000 }, { "epoch": 63.0, "eval_accuracy": 0.2918528704022001, "eval_f1_macro": 0.7042257858113937, "eval_f1_micro": 0.8093336660843524, "eval_loss": 0.12643341720104218, "eval_roc_auc": 0.8697033641675914, "eval_runtime": 525.5019, "eval_samples_per_second": 5.536, "eval_steps_per_second": 0.173, "learning_rate": 0.0001, "step": 17199 }, { "epoch": 64.0, "eval_accuracy": 0.2918528704022001, "eval_f1_macro": 0.7054117610081568, "eval_f1_micro": 0.8119739624362535, "eval_loss": 0.12570597231388092, "eval_roc_auc": 0.8720692041572721, "eval_runtime": 522.3213, "eval_samples_per_second": 5.569, "eval_steps_per_second": 0.174, "learning_rate": 0.0001, "step": 17472 }, { "epoch": 64.1025641025641, "grad_norm": 0.25887489318847656, "learning_rate": 0.0001, "loss": 0.1327, "step": 17500 }, { "epoch": 65.0, "eval_accuracy": 0.29322791337229287, "eval_f1_macro": 0.7040599127700347, "eval_f1_micro": 0.8103770839396333, "eval_loss": 0.12599390745162964, "eval_roc_auc": 0.8704049661183646, "eval_runtime": 523.7812, "eval_samples_per_second": 5.554, "eval_steps_per_second": 0.174, "learning_rate": 0.0001, "step": 17745 }, { "epoch": 65.93406593406593, "grad_norm": 0.2609516382217407, "learning_rate": 0.0001, "loss": 0.1319, "step": 18000 }, { "epoch": 66.0, "eval_accuracy": 0.29769680302509455, "eval_f1_macro": 0.7083351143800681, "eval_f1_micro": 0.8141795311606633, "eval_loss": 0.12674611806869507, "eval_roc_auc": 0.8776967697557255, "eval_runtime": 525.4493, "eval_samples_per_second": 5.536, "eval_steps_per_second": 0.173, "learning_rate": 0.0001, "step": 18018 }, { "epoch": 67.0, "eval_accuracy": 0.28979030594706084, "eval_f1_macro": 0.6998024530144022, "eval_f1_micro": 0.8090950582963362, "eval_loss": 0.12676431238651276, "eval_roc_auc": 0.8702765206211787, "eval_runtime": 528.6841, "eval_samples_per_second": 5.502, "eval_steps_per_second": 0.172, "learning_rate": 0.0001, "step": 18291 }, { "epoch": 67.76556776556777, "grad_norm": 0.31347450613975525, "learning_rate": 0.0001, "loss": 0.1319, "step": 18500 }, { "epoch": 68.0, "eval_accuracy": 0.2928841526297697, "eval_f1_macro": 0.7034736625177254, "eval_f1_micro": 0.8127327032445482, "eval_loss": 0.12638631463050842, "eval_roc_auc": 0.8763513248964829, "eval_runtime": 524.2249, "eval_samples_per_second": 5.549, "eval_steps_per_second": 0.174, "learning_rate": 0.0001, "step": 18564 }, { "epoch": 69.0, "eval_accuracy": 0.2952904778274321, "eval_f1_macro": 0.7078892431331377, "eval_f1_micro": 0.8131967584022379, "eval_loss": 0.12608103454113007, "eval_roc_auc": 0.8750227852703141, "eval_runtime": 527.0116, "eval_samples_per_second": 5.52, "eval_steps_per_second": 0.173, "learning_rate": 0.0001, "step": 18837 }, { "epoch": 69.59706959706959, "grad_norm": 0.24166111648082733, "learning_rate": 0.0001, "loss": 0.1308, "step": 19000 }, { "epoch": 70.0, "eval_accuracy": 0.29150910965967686, "eval_f1_macro": 0.7081157868651535, "eval_f1_micro": 0.8136722606120435, "eval_loss": 0.12582050263881683, "eval_roc_auc": 0.875321565859663, "eval_runtime": 523.4787, "eval_samples_per_second": 5.557, "eval_steps_per_second": 0.174, "learning_rate": 0.0001, "step": 19110 }, { "epoch": 71.0, "eval_accuracy": 0.2918528704022001, "eval_f1_macro": 0.7044517956080781, "eval_f1_micro": 0.8123295595405339, "eval_loss": 0.12533149123191833, "eval_roc_auc": 0.8732599343798731, "eval_runtime": 528.0987, "eval_samples_per_second": 5.508, "eval_steps_per_second": 0.172, "learning_rate": 1e-05, "step": 19383 }, { "epoch": 71.42857142857143, "grad_norm": 0.2781914174556732, "learning_rate": 1e-05, "loss": 0.1294, "step": 19500 }, { "epoch": 72.0, "eval_accuracy": 0.2966655207975249, "eval_f1_macro": 0.7099295458861072, "eval_f1_micro": 0.8159506713723581, "eval_loss": 0.1258901059627533, "eval_roc_auc": 0.8806755842224759, "eval_runtime": 528.1564, "eval_samples_per_second": 5.508, "eval_steps_per_second": 0.172, "learning_rate": 1e-05, "step": 19656 }, { "epoch": 73.0, "eval_accuracy": 0.2949467170849089, "eval_f1_macro": 0.7116557450872655, "eval_f1_micro": 0.8159496670343587, "eval_loss": 0.12526649236679077, "eval_roc_auc": 0.8785730989334561, "eval_runtime": 529.1223, "eval_samples_per_second": 5.498, "eval_steps_per_second": 0.172, "learning_rate": 1e-05, "step": 19929 }, { "epoch": 73.26007326007326, "grad_norm": 0.278796911239624, "learning_rate": 1e-05, "loss": 0.1287, "step": 20000 }, { "epoch": 74.0, "eval_accuracy": 0.29769680302509455, "eval_f1_macro": 0.7159515864206437, "eval_f1_micro": 0.8156100747030249, "eval_loss": 0.12490212172269821, "eval_roc_auc": 0.8786256372123677, "eval_runtime": 527.6257, "eval_samples_per_second": 5.513, "eval_steps_per_second": 0.172, "learning_rate": 1e-05, "step": 20202 }, { "epoch": 75.0, "eval_accuracy": 0.2966655207975249, "eval_f1_macro": 0.7082306828309269, "eval_f1_micro": 0.8135426082669078, "eval_loss": 0.12504002451896667, "eval_roc_auc": 0.8755625874067082, "eval_runtime": 534.3244, "eval_samples_per_second": 5.444, "eval_steps_per_second": 0.17, "learning_rate": 1e-05, "step": 20475 }, { "epoch": 75.0915750915751, "grad_norm": 0.27821090817451477, "learning_rate": 1e-05, "loss": 0.1282, "step": 20500 }, { "epoch": 76.0, "eval_accuracy": 0.2966655207975249, "eval_f1_macro": 0.6998917153140419, "eval_f1_micro": 0.8099675513769865, "eval_loss": 0.12634462118148804, "eval_roc_auc": 0.8700019375997908, "eval_runtime": 530.2495, "eval_samples_per_second": 5.486, "eval_steps_per_second": 0.172, "learning_rate": 1e-05, "step": 20748 }, { "epoch": 76.92307692307692, "grad_norm": 0.2594936192035675, "learning_rate": 1e-05, "loss": 0.1285, "step": 21000 }, { "epoch": 77.0, "eval_accuracy": 0.2966655207975249, "eval_f1_macro": 0.7104044870773909, "eval_f1_micro": 0.8142915811088296, "eval_loss": 0.1249643936753273, "eval_roc_auc": 0.8761213663476708, "eval_runtime": 520.8557, "eval_samples_per_second": 5.585, "eval_steps_per_second": 0.175, "learning_rate": 1e-05, "step": 21021 }, { "epoch": 78.0, "eval_accuracy": 0.2939154348573393, "eval_f1_macro": 0.7076718539561497, "eval_f1_micro": 0.812339968613199, "eval_loss": 0.12509745359420776, "eval_roc_auc": 0.8734096354102718, "eval_runtime": 523.1264, "eval_samples_per_second": 5.561, "eval_steps_per_second": 0.174, "learning_rate": 1e-05, "step": 21294 }, { "epoch": 78.75457875457876, "grad_norm": 0.24959908425807953, "learning_rate": 1e-05, "loss": 0.1281, "step": 21500 }, { "epoch": 79.0, "eval_accuracy": 0.29838432451014096, "eval_f1_macro": 0.7097766100728804, "eval_f1_micro": 0.8147326016360423, "eval_loss": 0.12465520948171616, "eval_roc_auc": 0.876088219187627, "eval_runtime": 536.4997, "eval_samples_per_second": 5.422, "eval_steps_per_second": 0.17, "learning_rate": 1e-05, "step": 21567 }, { "epoch": 80.0, "eval_accuracy": 0.2990718459951873, "eval_f1_macro": 0.7133791911991404, "eval_f1_micro": 0.8166140393490405, "eval_loss": 0.12526248395442963, "eval_roc_auc": 0.8816640751357672, "eval_runtime": 530.0362, "eval_samples_per_second": 5.488, "eval_steps_per_second": 0.172, "learning_rate": 1e-05, "step": 21840 }, { "epoch": 80.58608058608058, "grad_norm": 0.2447408139705658, "learning_rate": 1e-05, "loss": 0.1281, "step": 22000 }, { "epoch": 81.0, "eval_accuracy": 0.2952904778274321, "eval_f1_macro": 0.705898272950067, "eval_f1_micro": 0.8121923983622152, "eval_loss": 0.12510864436626434, "eval_roc_auc": 0.8729086155116128, "eval_runtime": 529.107, "eval_samples_per_second": 5.498, "eval_steps_per_second": 0.172, "learning_rate": 1e-05, "step": 22113 }, { "epoch": 82.0, "eval_accuracy": 0.29975936748023374, "eval_f1_macro": 0.7095032932540235, "eval_f1_micro": 0.8150326797385622, "eval_loss": 0.12532733380794525, "eval_roc_auc": 0.8780991768028508, "eval_runtime": 520.4428, "eval_samples_per_second": 5.589, "eval_steps_per_second": 0.175, "learning_rate": 1e-05, "step": 22386 }, { "epoch": 82.41758241758242, "grad_norm": 0.29986944794654846, "learning_rate": 1e-05, "loss": 0.1269, "step": 22500 }, { "epoch": 83.0, "eval_accuracy": 0.29597799931247853, "eval_f1_macro": 0.7124383950303705, "eval_f1_micro": 0.815855206584497, "eval_loss": 0.12474868446588516, "eval_roc_auc": 0.87784419205748, "eval_runtime": 521.522, "eval_samples_per_second": 5.578, "eval_steps_per_second": 0.174, "learning_rate": 1e-05, "step": 22659 }, { "epoch": 84.0, "eval_accuracy": 0.3007906497078034, "eval_f1_macro": 0.7138847615465347, "eval_f1_micro": 0.8175330467926365, "eval_loss": 0.12511858344078064, "eval_roc_auc": 0.8816953578814657, "eval_runtime": 527.0352, "eval_samples_per_second": 5.52, "eval_steps_per_second": 0.173, "learning_rate": 1e-05, "step": 22932 }, { "epoch": 84.24908424908425, "grad_norm": 0.23814542591571808, "learning_rate": 1e-05, "loss": 0.1267, "step": 23000 }, { "epoch": 85.0, "eval_accuracy": 0.2966655207975249, "eval_f1_macro": 0.7054571621251418, "eval_f1_micro": 0.8132141082960754, "eval_loss": 0.12457013875246048, "eval_roc_auc": 0.8731936400597855, "eval_runtime": 570.3692, "eval_samples_per_second": 5.1, "eval_steps_per_second": 0.16, "learning_rate": 1e-05, "step": 23205 }, { "epoch": 86.0, "eval_accuracy": 0.2946029563423857, "eval_f1_macro": 0.7142702846379808, "eval_f1_micro": 0.8143732269868025, "eval_loss": 0.1251869946718216, "eval_roc_auc": 0.875841355405741, "eval_runtime": 518.8454, "eval_samples_per_second": 5.607, "eval_steps_per_second": 0.175, "learning_rate": 1e-05, "step": 23478 }, { "epoch": 86.08058608058609, "grad_norm": 0.30401352047920227, "learning_rate": 1e-05, "loss": 0.1274, "step": 23500 }, { "epoch": 87.0, "eval_accuracy": 0.2935716741148161, "eval_f1_macro": 0.7081357577756824, "eval_f1_micro": 0.8135328455150868, "eval_loss": 0.12492978572845459, "eval_roc_auc": 0.8754128863763097, "eval_runtime": 516.882, "eval_samples_per_second": 5.628, "eval_steps_per_second": 0.176, "learning_rate": 1e-05, "step": 23751 }, { "epoch": 87.91208791208791, "grad_norm": 0.25764307379722595, "learning_rate": 1e-05, "loss": 0.1263, "step": 24000 }, { "epoch": 88.0, "eval_accuracy": 0.2990718459951873, "eval_f1_macro": 0.7099379006276698, "eval_f1_micro": 0.815831263487927, "eval_loss": 0.12513719499111176, "eval_roc_auc": 0.8794975742553038, "eval_runtime": 519.5458, "eval_samples_per_second": 5.599, "eval_steps_per_second": 0.175, "learning_rate": 1e-05, "step": 24024 }, { "epoch": 89.0, "eval_accuracy": 0.2963217600550017, "eval_f1_macro": 0.7092910188720426, "eval_f1_micro": 0.8143914473684211, "eval_loss": 0.12514576315879822, "eval_roc_auc": 0.8758152934234348, "eval_runtime": 518.776, "eval_samples_per_second": 5.607, "eval_steps_per_second": 0.175, "learning_rate": 1e-05, "step": 24297 }, { "epoch": 89.74358974358974, "grad_norm": 0.25485533475875854, "learning_rate": 1e-05, "loss": 0.1272, "step": 24500 }, { "epoch": 90.0, "eval_accuracy": 0.2942591955998625, "eval_f1_macro": 0.7121664381657501, "eval_f1_micro": 0.8134516195584898, "eval_loss": 0.1244530975818634, "eval_roc_auc": 0.8742738658040219, "eval_runtime": 522.4184, "eval_samples_per_second": 5.568, "eval_steps_per_second": 0.174, "learning_rate": 1e-05, "step": 24570 }, { "epoch": 91.0, "eval_accuracy": 0.2990718459951873, "eval_f1_macro": 0.7106178930468596, "eval_f1_micro": 0.8153902768123646, "eval_loss": 0.12501013278961182, "eval_roc_auc": 0.8780205765416332, "eval_runtime": 521.4209, "eval_samples_per_second": 5.579, "eval_steps_per_second": 0.175, "learning_rate": 1e-05, "step": 24843 }, { "epoch": 91.57509157509158, "grad_norm": 0.24377481639385223, "learning_rate": 1e-05, "loss": 0.1275, "step": 25000 }, { "epoch": 92.0, "eval_accuracy": 0.2973530422825713, "eval_f1_macro": 0.7140173113811211, "eval_f1_micro": 0.8163049232398094, "eval_loss": 0.12525025010108948, "eval_roc_auc": 0.8797377671737511, "eval_runtime": 518.7634, "eval_samples_per_second": 5.608, "eval_steps_per_second": 0.175, "learning_rate": 1e-05, "step": 25116 }, { "epoch": 93.0, "eval_accuracy": 0.29872808525266414, "eval_f1_macro": 0.7129019083206937, "eval_f1_micro": 0.8148661314641998, "eval_loss": 0.12471849471330643, "eval_roc_auc": 0.8787177863576121, "eval_runtime": 519.6644, "eval_samples_per_second": 5.598, "eval_steps_per_second": 0.175, "learning_rate": 1e-05, "step": 25389 }, { "epoch": 93.4065934065934, "grad_norm": 0.2000974863767624, "learning_rate": 1e-05, "loss": 0.1257, "step": 25500 }, { "epoch": 94.0, "eval_accuracy": 0.2980405637676177, "eval_f1_macro": 0.7053935701419592, "eval_f1_micro": 0.8141884924726748, "eval_loss": 0.12515641748905182, "eval_roc_auc": 0.8748001190563892, "eval_runtime": 516.6712, "eval_samples_per_second": 5.63, "eval_steps_per_second": 0.176, "learning_rate": 1e-05, "step": 25662 }, { "epoch": 95.0, "eval_accuracy": 0.30147817119284975, "eval_f1_macro": 0.7134995447430972, "eval_f1_micro": 0.8165906870726147, "eval_loss": 0.12481416761875153, "eval_roc_auc": 0.8800302912139601, "eval_runtime": 518.3317, "eval_samples_per_second": 5.612, "eval_steps_per_second": 0.176, "learning_rate": 1e-05, "step": 25935 }, { "epoch": 95.23809523809524, "grad_norm": 0.284708708524704, "learning_rate": 1e-05, "loss": 0.1271, "step": 26000 }, { "epoch": 96.0, "eval_accuracy": 0.2980405637676177, "eval_f1_macro": 0.7110442004495683, "eval_f1_micro": 0.8160666176830762, "eval_loss": 0.12492986023426056, "eval_roc_auc": 0.8788918921131211, "eval_runtime": 518.2372, "eval_samples_per_second": 5.613, "eval_steps_per_second": 0.176, "learning_rate": 1e-05, "step": 26208 }, { "epoch": 97.0, "eval_accuracy": 0.30147817119284975, "eval_f1_macro": 0.7158597011246477, "eval_f1_micro": 0.8168590473093806, "eval_loss": 0.12459924072027206, "eval_roc_auc": 0.8806157536086776, "eval_runtime": 516.8314, "eval_samples_per_second": 5.629, "eval_steps_per_second": 0.176, "learning_rate": 1.0000000000000002e-06, "step": 26481 }, { "epoch": 97.06959706959707, "grad_norm": 0.2898052930831909, "learning_rate": 1.0000000000000002e-06, "loss": 0.1272, "step": 26500 }, { "epoch": 98.0, "eval_accuracy": 0.29975936748023374, "eval_f1_macro": 0.707122866121441, "eval_f1_micro": 0.8149457415323906, "eval_loss": 0.12447398155927658, "eval_roc_auc": 0.8762181147848592, "eval_runtime": 516.1798, "eval_samples_per_second": 5.636, "eval_steps_per_second": 0.176, "learning_rate": 1.0000000000000002e-06, "step": 26754 }, { "epoch": 98.9010989010989, "grad_norm": 0.2835540473461151, "learning_rate": 1.0000000000000002e-06, "loss": 0.126, "step": 27000 }, { "epoch": 99.0, "eval_accuracy": 0.30216569267789617, "eval_f1_macro": 0.7182970295785608, "eval_f1_micro": 0.8165748111859562, "eval_loss": 0.12462905794382095, "eval_roc_auc": 0.8791385487378576, "eval_runtime": 525.6968, "eval_samples_per_second": 5.534, "eval_steps_per_second": 0.173, "learning_rate": 1.0000000000000002e-06, "step": 27027 }, { "epoch": 100.0, "eval_accuracy": 0.30147817119284975, "eval_f1_macro": 0.7136275002413193, "eval_f1_micro": 0.8161644284310514, "eval_loss": 0.12463195621967316, "eval_roc_auc": 0.8780520664444814, "eval_runtime": 523.9869, "eval_samples_per_second": 5.552, "eval_steps_per_second": 0.174, "learning_rate": 1.0000000000000002e-06, "step": 27300 }, { "epoch": 100.0, "learning_rate": 1.0000000000000002e-06, "step": 27300, "total_flos": 1.2912305794345248e+21, "train_loss": 0.1443542043106023, "train_runtime": 217230.0758, "train_samples_per_second": 6.013, "train_steps_per_second": 0.189 } ], "logging_steps": 500, "max_steps": 40950, "num_input_tokens_seen": 0, "num_train_epochs": 150, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 10, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.2912305794345248e+21, "train_batch_size": 32, "trial_name": null, "trial_params": null }