lombardata's picture
Evaluation on the test set completed on 2024_05_30.
c294b98 verified
raw
history blame contribute delete
No virus
50.5 kB
{
"best_metric": 0.1244530975818634,
"best_model_checkpoint": "/home1/datawork/mcontini/models/multilabel/huggingface/dinov2-large-2024_05_27-_batch-size32_epochs150_freeze/checkpoint-24570",
"epoch": 100.0,
"eval_steps": 500,
"global_step": 27300,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.232726022688209,
"eval_f1_macro": 0.4712591871520079,
"eval_f1_micro": 0.7380235658381353,
"eval_loss": 0.1701383888721466,
"eval_roc_auc": 0.8226166045759734,
"eval_runtime": 579.6646,
"eval_samples_per_second": 5.018,
"eval_steps_per_second": 0.157,
"learning_rate": 0.001,
"step": 273
},
{
"epoch": 1.8315018315018317,
"grad_norm": 0.3410358726978302,
"learning_rate": 0.001,
"loss": 0.2748,
"step": 500
},
{
"epoch": 2.0,
"eval_accuracy": 0.24888277758679958,
"eval_f1_macro": 0.5722657636852799,
"eval_f1_micro": 0.7568708574323469,
"eval_loss": 0.15890291333198547,
"eval_roc_auc": 0.8372263575023127,
"eval_runtime": 572.4604,
"eval_samples_per_second": 5.082,
"eval_steps_per_second": 0.159,
"learning_rate": 0.001,
"step": 546
},
{
"epoch": 3.0,
"eval_accuracy": 0.24785149535922998,
"eval_f1_macro": 0.6104117366005594,
"eval_f1_micro": 0.7723932964583505,
"eval_loss": 0.15134122967720032,
"eval_roc_auc": 0.851560035472562,
"eval_runtime": 581.798,
"eval_samples_per_second": 5.0,
"eval_steps_per_second": 0.156,
"learning_rate": 0.001,
"step": 819
},
{
"epoch": 3.663003663003663,
"grad_norm": 0.28447234630584717,
"learning_rate": 0.001,
"loss": 0.1714,
"step": 1000
},
{
"epoch": 4.0,
"eval_accuracy": 0.24853901684427637,
"eval_f1_macro": 0.599745200497783,
"eval_f1_micro": 0.7608496532472631,
"eval_loss": 0.15164224803447723,
"eval_roc_auc": 0.8311391237980399,
"eval_runtime": 574.55,
"eval_samples_per_second": 5.063,
"eval_steps_per_second": 0.158,
"learning_rate": 0.001,
"step": 1092
},
{
"epoch": 5.0,
"eval_accuracy": 0.24750773461670678,
"eval_f1_macro": 0.5935106518877853,
"eval_f1_micro": 0.7692371752165224,
"eval_loss": 0.15243493020534515,
"eval_roc_auc": 0.8475084947536117,
"eval_runtime": 565.8483,
"eval_samples_per_second": 5.141,
"eval_steps_per_second": 0.161,
"learning_rate": 0.001,
"step": 1365
},
{
"epoch": 5.4945054945054945,
"grad_norm": 0.32554125785827637,
"learning_rate": 0.001,
"loss": 0.1661,
"step": 1500
},
{
"epoch": 6.0,
"eval_accuracy": 0.24269508422138192,
"eval_f1_macro": 0.613788061665736,
"eval_f1_micro": 0.7718080548414739,
"eval_loss": 0.14673969149589539,
"eval_roc_auc": 0.8414237469351327,
"eval_runtime": 563.6053,
"eval_samples_per_second": 5.161,
"eval_steps_per_second": 0.161,
"learning_rate": 0.001,
"step": 1638
},
{
"epoch": 7.0,
"eval_accuracy": 0.2509453420419388,
"eval_f1_macro": 0.6309898748773293,
"eval_f1_micro": 0.7732481363152289,
"eval_loss": 0.1506606936454773,
"eval_roc_auc": 0.8436874105670831,
"eval_runtime": 563.2783,
"eval_samples_per_second": 5.164,
"eval_steps_per_second": 0.162,
"learning_rate": 0.001,
"step": 1911
},
{
"epoch": 7.326007326007326,
"grad_norm": 0.22656740248203278,
"learning_rate": 0.001,
"loss": 0.1611,
"step": 2000
},
{
"epoch": 8.0,
"eval_accuracy": 0.2609144035751117,
"eval_f1_macro": 0.6403740520777896,
"eval_f1_micro": 0.7828014555188422,
"eval_loss": 0.14430351555347443,
"eval_roc_auc": 0.8536999365685817,
"eval_runtime": 570.8881,
"eval_samples_per_second": 5.096,
"eval_steps_per_second": 0.159,
"learning_rate": 0.001,
"step": 2184
},
{
"epoch": 9.0,
"eval_accuracy": 0.25128910278446204,
"eval_f1_macro": 0.6366498775226099,
"eval_f1_micro": 0.781416038551835,
"eval_loss": 0.14617429673671722,
"eval_roc_auc": 0.8571263621918263,
"eval_runtime": 575.7471,
"eval_samples_per_second": 5.053,
"eval_steps_per_second": 0.158,
"learning_rate": 0.001,
"step": 2457
},
{
"epoch": 9.157509157509157,
"grad_norm": 0.1823957860469818,
"learning_rate": 0.001,
"loss": 0.1606,
"step": 2500
},
{
"epoch": 10.0,
"eval_accuracy": 0.2688209006531454,
"eval_f1_macro": 0.6254472467805158,
"eval_f1_micro": 0.7794745970641737,
"eval_loss": 0.14414818584918976,
"eval_roc_auc": 0.8488544375964642,
"eval_runtime": 577.67,
"eval_samples_per_second": 5.036,
"eval_steps_per_second": 0.158,
"learning_rate": 0.001,
"step": 2730
},
{
"epoch": 10.989010989010989,
"grad_norm": 0.1724083572626114,
"learning_rate": 0.001,
"loss": 0.1592,
"step": 3000
},
{
"epoch": 11.0,
"eval_accuracy": 0.2595393606050189,
"eval_f1_macro": 0.6357434835994208,
"eval_f1_micro": 0.7780349253103302,
"eval_loss": 0.14589238166809082,
"eval_roc_auc": 0.8494945098415877,
"eval_runtime": 576.9242,
"eval_samples_per_second": 5.042,
"eval_steps_per_second": 0.158,
"learning_rate": 0.001,
"step": 3003
},
{
"epoch": 12.0,
"eval_accuracy": 0.2554142316947405,
"eval_f1_macro": 0.638389910481932,
"eval_f1_micro": 0.7823495795575149,
"eval_loss": 0.14458976686000824,
"eval_roc_auc": 0.8601197613283508,
"eval_runtime": 579.3816,
"eval_samples_per_second": 5.021,
"eval_steps_per_second": 0.157,
"learning_rate": 0.001,
"step": 3276
},
{
"epoch": 12.820512820512821,
"grad_norm": 0.18148617446422577,
"learning_rate": 0.001,
"loss": 0.1582,
"step": 3500
},
{
"epoch": 13.0,
"eval_accuracy": 0.2561017531797869,
"eval_f1_macro": 0.6574365741219022,
"eval_f1_micro": 0.786284091383703,
"eval_loss": 0.14142437279224396,
"eval_roc_auc": 0.8559800911193071,
"eval_runtime": 580.1191,
"eval_samples_per_second": 5.014,
"eval_steps_per_second": 0.157,
"learning_rate": 0.001,
"step": 3549
},
{
"epoch": 14.0,
"eval_accuracy": 0.24682021313166036,
"eval_f1_macro": 0.6245865910731833,
"eval_f1_micro": 0.7766990291262137,
"eval_loss": 0.1581379920244217,
"eval_roc_auc": 0.8490862605596214,
"eval_runtime": 579.658,
"eval_samples_per_second": 5.018,
"eval_steps_per_second": 0.157,
"learning_rate": 0.001,
"step": 3822
},
{
"epoch": 14.652014652014651,
"grad_norm": 0.17000725865364075,
"learning_rate": 0.001,
"loss": 0.1575,
"step": 4000
},
{
"epoch": 15.0,
"eval_accuracy": 0.2598831213475421,
"eval_f1_macro": 0.6552072842486797,
"eval_f1_micro": 0.7859620485615181,
"eval_loss": 0.1447945237159729,
"eval_roc_auc": 0.8622108111394943,
"eval_runtime": 578.0771,
"eval_samples_per_second": 5.032,
"eval_steps_per_second": 0.157,
"learning_rate": 0.001,
"step": 4095
},
{
"epoch": 16.0,
"eval_accuracy": 0.2605706428325885,
"eval_f1_macro": 0.6495757946819554,
"eval_f1_micro": 0.7853051058530511,
"eval_loss": 0.1438169628381729,
"eval_roc_auc": 0.8571207689487904,
"eval_runtime": 577.2113,
"eval_samples_per_second": 5.04,
"eval_steps_per_second": 0.158,
"learning_rate": 0.001,
"step": 4368
},
{
"epoch": 16.483516483516482,
"grad_norm": 0.16623707115650177,
"learning_rate": 0.001,
"loss": 0.158,
"step": 4500
},
{
"epoch": 17.0,
"eval_accuracy": 0.2506015812994156,
"eval_f1_macro": 0.6310969679900952,
"eval_f1_micro": 0.7824457675812967,
"eval_loss": 0.14359386265277863,
"eval_roc_auc": 0.8547290741339532,
"eval_runtime": 583.6271,
"eval_samples_per_second": 4.984,
"eval_steps_per_second": 0.156,
"learning_rate": 0.001,
"step": 4641
},
{
"epoch": 18.0,
"eval_accuracy": 0.2564455139223101,
"eval_f1_macro": 0.6531950395959965,
"eval_f1_micro": 0.7848311343456975,
"eval_loss": 0.1412857472896576,
"eval_roc_auc": 0.8569977513721464,
"eval_runtime": 571.2179,
"eval_samples_per_second": 5.093,
"eval_steps_per_second": 0.159,
"learning_rate": 0.001,
"step": 4914
},
{
"epoch": 18.315018315018314,
"grad_norm": 0.14722026884555817,
"learning_rate": 0.001,
"loss": 0.1571,
"step": 5000
},
{
"epoch": 19.0,
"eval_accuracy": 0.26022688209006534,
"eval_f1_macro": 0.6486819478687708,
"eval_f1_micro": 0.7833830386020918,
"eval_loss": 0.14079046249389648,
"eval_roc_auc": 0.852859282271344,
"eval_runtime": 574.5699,
"eval_samples_per_second": 5.063,
"eval_steps_per_second": 0.158,
"learning_rate": 0.001,
"step": 5187
},
{
"epoch": 20.0,
"eval_accuracy": 0.26400825025782054,
"eval_f1_macro": 0.6262168341318395,
"eval_f1_micro": 0.7775968460747342,
"eval_loss": 0.14640754461288452,
"eval_roc_auc": 0.8444495064377587,
"eval_runtime": 579.1715,
"eval_samples_per_second": 5.023,
"eval_steps_per_second": 0.157,
"learning_rate": 0.001,
"step": 5460
},
{
"epoch": 20.146520146520146,
"grad_norm": 0.16411201655864716,
"learning_rate": 0.001,
"loss": 0.1579,
"step": 5500
},
{
"epoch": 21.0,
"eval_accuracy": 0.2653832932279134,
"eval_f1_macro": 0.6582044080070929,
"eval_f1_micro": 0.7890916719110552,
"eval_loss": 0.1412632316350937,
"eval_roc_auc": 0.8562690934879753,
"eval_runtime": 565.162,
"eval_samples_per_second": 5.147,
"eval_steps_per_second": 0.161,
"learning_rate": 0.001,
"step": 5733
},
{
"epoch": 21.978021978021978,
"grad_norm": 0.14830045402050018,
"learning_rate": 0.001,
"loss": 0.1564,
"step": 6000
},
{
"epoch": 22.0,
"eval_accuracy": 0.2543829494671708,
"eval_f1_macro": 0.6586947128782558,
"eval_f1_micro": 0.7871090517954659,
"eval_loss": 0.14168681204319,
"eval_roc_auc": 0.8590452124964612,
"eval_runtime": 558.0731,
"eval_samples_per_second": 5.213,
"eval_steps_per_second": 0.163,
"learning_rate": 0.001,
"step": 6006
},
{
"epoch": 23.0,
"eval_accuracy": 0.269852182880715,
"eval_f1_macro": 0.6427873985434494,
"eval_f1_micro": 0.7863651704353696,
"eval_loss": 0.1393543779850006,
"eval_roc_auc": 0.8532977785132612,
"eval_runtime": 555.9424,
"eval_samples_per_second": 5.233,
"eval_steps_per_second": 0.164,
"learning_rate": 0.001,
"step": 6279
},
{
"epoch": 23.80952380952381,
"grad_norm": 0.1764509379863739,
"learning_rate": 0.001,
"loss": 0.1554,
"step": 6500
},
{
"epoch": 24.0,
"eval_accuracy": 0.2588518391199725,
"eval_f1_macro": 0.6618962794412713,
"eval_f1_micro": 0.7857706852844616,
"eval_loss": 0.14052371680736542,
"eval_roc_auc": 0.8570680235127297,
"eval_runtime": 561.2023,
"eval_samples_per_second": 5.184,
"eval_steps_per_second": 0.162,
"learning_rate": 0.001,
"step": 6552
},
{
"epoch": 25.0,
"eval_accuracy": 0.2653832932279134,
"eval_f1_macro": 0.653320279245233,
"eval_f1_micro": 0.7897693920335429,
"eval_loss": 0.1392364352941513,
"eval_roc_auc": 0.8568084813100703,
"eval_runtime": 557.349,
"eval_samples_per_second": 5.219,
"eval_steps_per_second": 0.163,
"learning_rate": 0.001,
"step": 6825
},
{
"epoch": 25.641025641025642,
"grad_norm": 0.17609645426273346,
"learning_rate": 0.001,
"loss": 0.1554,
"step": 7000
},
{
"epoch": 26.0,
"eval_accuracy": 0.27019594362323823,
"eval_f1_macro": 0.6529431984792132,
"eval_f1_micro": 0.7838044308632545,
"eval_loss": 0.14239099621772766,
"eval_roc_auc": 0.8503066581053809,
"eval_runtime": 557.4603,
"eval_samples_per_second": 5.218,
"eval_steps_per_second": 0.163,
"learning_rate": 0.001,
"step": 7098
},
{
"epoch": 27.0,
"eval_accuracy": 0.2671020969405294,
"eval_f1_macro": 0.6810613208979668,
"eval_f1_micro": 0.7974886125815585,
"eval_loss": 0.1386287957429886,
"eval_roc_auc": 0.8670442949969421,
"eval_runtime": 549.4206,
"eval_samples_per_second": 5.295,
"eval_steps_per_second": 0.166,
"learning_rate": 0.001,
"step": 7371
},
{
"epoch": 27.47252747252747,
"grad_norm": 0.1621919423341751,
"learning_rate": 0.001,
"loss": 0.156,
"step": 7500
},
{
"epoch": 28.0,
"eval_accuracy": 0.2650395324853902,
"eval_f1_macro": 0.6474807711800876,
"eval_f1_micro": 0.7791304347826087,
"eval_loss": 0.15519200265407562,
"eval_roc_auc": 0.8544148384115767,
"eval_runtime": 547.1571,
"eval_samples_per_second": 5.317,
"eval_steps_per_second": 0.166,
"learning_rate": 0.001,
"step": 7644
},
{
"epoch": 29.0,
"eval_accuracy": 0.27019594362323823,
"eval_f1_macro": 0.6550381793679035,
"eval_f1_micro": 0.7913651213762871,
"eval_loss": 0.14190098643302917,
"eval_roc_auc": 0.8614869597044164,
"eval_runtime": 559.1951,
"eval_samples_per_second": 5.202,
"eval_steps_per_second": 0.163,
"learning_rate": 0.001,
"step": 7917
},
{
"epoch": 29.304029304029303,
"grad_norm": 0.16609038412570953,
"learning_rate": 0.001,
"loss": 0.1548,
"step": 8000
},
{
"epoch": 30.0,
"eval_accuracy": 0.2767273977311791,
"eval_f1_macro": 0.663185953977854,
"eval_f1_micro": 0.7857173292428311,
"eval_loss": 0.13986903429031372,
"eval_roc_auc": 0.8502714393738423,
"eval_runtime": 549.5664,
"eval_samples_per_second": 5.293,
"eval_steps_per_second": 0.166,
"learning_rate": 0.001,
"step": 8190
},
{
"epoch": 31.0,
"eval_accuracy": 0.27260226882090066,
"eval_f1_macro": 0.6554744698272669,
"eval_f1_micro": 0.7881844380403459,
"eval_loss": 0.13765402138233185,
"eval_roc_auc": 0.8530413436678441,
"eval_runtime": 541.1802,
"eval_samples_per_second": 5.375,
"eval_steps_per_second": 0.168,
"learning_rate": 0.001,
"step": 8463
},
{
"epoch": 31.135531135531135,
"grad_norm": 0.15226389467716217,
"learning_rate": 0.001,
"loss": 0.1554,
"step": 8500
},
{
"epoch": 32.0,
"eval_accuracy": 0.2677896184255758,
"eval_f1_macro": 0.6596978272887946,
"eval_f1_micro": 0.7914770376499792,
"eval_loss": 0.13866138458251953,
"eval_roc_auc": 0.861421701170076,
"eval_runtime": 550.2376,
"eval_samples_per_second": 5.287,
"eval_steps_per_second": 0.165,
"learning_rate": 0.001,
"step": 8736
},
{
"epoch": 32.967032967032964,
"grad_norm": 0.15690498054027557,
"learning_rate": 0.001,
"loss": 0.1551,
"step": 9000
},
{
"epoch": 33.0,
"eval_accuracy": 0.2605706428325885,
"eval_f1_macro": 0.6583814800932023,
"eval_f1_micro": 0.7887546855476885,
"eval_loss": 0.13930276036262512,
"eval_roc_auc": 0.8580923964636631,
"eval_runtime": 550.269,
"eval_samples_per_second": 5.287,
"eval_steps_per_second": 0.165,
"learning_rate": 0.001,
"step": 9009
},
{
"epoch": 34.0,
"eval_accuracy": 0.2763836369886559,
"eval_f1_macro": 0.6636727922636001,
"eval_f1_micro": 0.795303262082937,
"eval_loss": 0.1374826431274414,
"eval_roc_auc": 0.8637208325253699,
"eval_runtime": 543.5944,
"eval_samples_per_second": 5.351,
"eval_steps_per_second": 0.167,
"learning_rate": 0.001,
"step": 9282
},
{
"epoch": 34.798534798534796,
"grad_norm": 0.166019469499588,
"learning_rate": 0.001,
"loss": 0.1544,
"step": 9500
},
{
"epoch": 35.0,
"eval_accuracy": 0.25850807837744927,
"eval_f1_macro": 0.6442491093834092,
"eval_f1_micro": 0.7860775988902434,
"eval_loss": 0.14001137018203735,
"eval_roc_auc": 0.8541119565138938,
"eval_runtime": 537.9679,
"eval_samples_per_second": 5.407,
"eval_steps_per_second": 0.169,
"learning_rate": 0.001,
"step": 9555
},
{
"epoch": 36.0,
"eval_accuracy": 0.26916466139566864,
"eval_f1_macro": 0.6541220211466265,
"eval_f1_micro": 0.7890085033301218,
"eval_loss": 0.13899104297161102,
"eval_roc_auc": 0.856659816065419,
"eval_runtime": 532.0356,
"eval_samples_per_second": 5.468,
"eval_steps_per_second": 0.171,
"learning_rate": 0.001,
"step": 9828
},
{
"epoch": 36.63003663003663,
"grad_norm": 0.1606360822916031,
"learning_rate": 0.001,
"loss": 0.1555,
"step": 10000
},
{
"epoch": 37.0,
"eval_accuracy": 0.2667583361980062,
"eval_f1_macro": 0.6602790790864311,
"eval_f1_micro": 0.788356222091162,
"eval_loss": 0.14101693034172058,
"eval_roc_auc": 0.8547597772428587,
"eval_runtime": 536.869,
"eval_samples_per_second": 5.418,
"eval_steps_per_second": 0.17,
"learning_rate": 0.001,
"step": 10101
},
{
"epoch": 38.0,
"eval_accuracy": 0.2633207287727741,
"eval_f1_macro": 0.6508514926081754,
"eval_f1_micro": 0.7864065343433915,
"eval_loss": 0.13849563896656036,
"eval_roc_auc": 0.8524644165509108,
"eval_runtime": 538.0729,
"eval_samples_per_second": 5.406,
"eval_steps_per_second": 0.169,
"learning_rate": 0.001,
"step": 10374
},
{
"epoch": 38.46153846153846,
"grad_norm": 0.1459091752767563,
"learning_rate": 0.001,
"loss": 0.1547,
"step": 10500
},
{
"epoch": 39.0,
"eval_accuracy": 0.26263320728772777,
"eval_f1_macro": 0.6513021077089046,
"eval_f1_micro": 0.7819844457738655,
"eval_loss": 0.14249388873577118,
"eval_roc_auc": 0.8475227906130532,
"eval_runtime": 528.7471,
"eval_samples_per_second": 5.502,
"eval_steps_per_second": 0.172,
"learning_rate": 0.001,
"step": 10647
},
{
"epoch": 40.0,
"eval_accuracy": 0.2633207287727741,
"eval_f1_macro": 0.6421624481517915,
"eval_f1_micro": 0.7819497946916141,
"eval_loss": 0.1512959599494934,
"eval_roc_auc": 0.850003278224088,
"eval_runtime": 526.9207,
"eval_samples_per_second": 5.521,
"eval_steps_per_second": 0.173,
"learning_rate": 0.001,
"step": 10920
},
{
"epoch": 40.29304029304029,
"grad_norm": 0.16056223213672638,
"learning_rate": 0.0001,
"loss": 0.1527,
"step": 11000
},
{
"epoch": 41.0,
"eval_accuracy": 0.27157098659333107,
"eval_f1_macro": 0.6708412782877394,
"eval_f1_micro": 0.795353889863792,
"eval_loss": 0.1416281908750534,
"eval_roc_auc": 0.8608108400991562,
"eval_runtime": 520.8949,
"eval_samples_per_second": 5.585,
"eval_steps_per_second": 0.175,
"learning_rate": 0.0001,
"step": 11193
},
{
"epoch": 42.0,
"eval_accuracy": 0.2811962873839807,
"eval_f1_macro": 0.6820172839356666,
"eval_f1_micro": 0.8014906832298136,
"eval_loss": 0.13480685651302338,
"eval_roc_auc": 0.8666871757284966,
"eval_runtime": 521.536,
"eval_samples_per_second": 5.578,
"eval_steps_per_second": 0.174,
"learning_rate": 0.0001,
"step": 11466
},
{
"epoch": 42.124542124542124,
"grad_norm": 0.13122691214084625,
"learning_rate": 0.0001,
"loss": 0.1455,
"step": 11500
},
{
"epoch": 43.0,
"eval_accuracy": 0.2756961155036095,
"eval_f1_macro": 0.681931169239128,
"eval_f1_micro": 0.8014919187733112,
"eval_loss": 0.1342025101184845,
"eval_roc_auc": 0.8664528250448902,
"eval_runtime": 522.5262,
"eval_samples_per_second": 5.567,
"eval_steps_per_second": 0.174,
"learning_rate": 0.0001,
"step": 11739
},
{
"epoch": 43.956043956043956,
"grad_norm": 0.15952740609645844,
"learning_rate": 0.0001,
"loss": 0.1416,
"step": 12000
},
{
"epoch": 44.0,
"eval_accuracy": 0.2811962873839807,
"eval_f1_macro": 0.683693351140427,
"eval_f1_micro": 0.8019789631231031,
"eval_loss": 0.1327475756406784,
"eval_roc_auc": 0.8658142028998129,
"eval_runtime": 518.6048,
"eval_samples_per_second": 5.609,
"eval_steps_per_second": 0.175,
"learning_rate": 0.0001,
"step": 12012
},
{
"epoch": 45.0,
"eval_accuracy": 0.2811962873839807,
"eval_f1_macro": 0.6900135704395078,
"eval_f1_micro": 0.8049446006284108,
"eval_loss": 0.1318245828151703,
"eval_roc_auc": 0.8690323816564128,
"eval_runtime": 519.4737,
"eval_samples_per_second": 5.6,
"eval_steps_per_second": 0.175,
"learning_rate": 0.0001,
"step": 12285
},
{
"epoch": 45.78754578754579,
"grad_norm": 0.18243736028671265,
"learning_rate": 0.0001,
"loss": 0.1402,
"step": 12500
},
{
"epoch": 46.0,
"eval_accuracy": 0.28910278446201443,
"eval_f1_macro": 0.6920134474185277,
"eval_f1_micro": 0.8063969585520062,
"eval_loss": 0.13027183711528778,
"eval_roc_auc": 0.869961249113055,
"eval_runtime": 520.8317,
"eval_samples_per_second": 5.585,
"eval_steps_per_second": 0.175,
"learning_rate": 0.0001,
"step": 12558
},
{
"epoch": 47.0,
"eval_accuracy": 0.284977655551736,
"eval_f1_macro": 0.6938459582689339,
"eval_f1_micro": 0.8065087538619978,
"eval_loss": 0.12985946238040924,
"eval_roc_auc": 0.8709245066726377,
"eval_runtime": 520.349,
"eval_samples_per_second": 5.59,
"eval_steps_per_second": 0.175,
"learning_rate": 0.0001,
"step": 12831
},
{
"epoch": 47.61904761904762,
"grad_norm": 0.20866894721984863,
"learning_rate": 0.0001,
"loss": 0.1387,
"step": 13000
},
{
"epoch": 48.0,
"eval_accuracy": 0.2853214162942592,
"eval_f1_macro": 0.6917397436201066,
"eval_f1_micro": 0.8031727379553465,
"eval_loss": 0.12981055676937103,
"eval_roc_auc": 0.8638141011256728,
"eval_runtime": 515.7048,
"eval_samples_per_second": 5.641,
"eval_steps_per_second": 0.176,
"learning_rate": 0.0001,
"step": 13104
},
{
"epoch": 49.0,
"eval_accuracy": 0.2839463733241664,
"eval_f1_macro": 0.6980761423122126,
"eval_f1_micro": 0.8081048867699644,
"eval_loss": 0.1301460713148117,
"eval_roc_auc": 0.8724715693697419,
"eval_runtime": 519.0148,
"eval_samples_per_second": 5.605,
"eval_steps_per_second": 0.175,
"learning_rate": 0.0001,
"step": 13377
},
{
"epoch": 49.45054945054945,
"grad_norm": 0.16819314658641815,
"learning_rate": 0.0001,
"loss": 0.1391,
"step": 13500
},
{
"epoch": 50.0,
"eval_accuracy": 0.2829150910965968,
"eval_f1_macro": 0.6968263757426811,
"eval_f1_micro": 0.8056895691232739,
"eval_loss": 0.1294524371623993,
"eval_roc_auc": 0.8677359534045406,
"eval_runtime": 516.1382,
"eval_samples_per_second": 5.636,
"eval_steps_per_second": 0.176,
"learning_rate": 0.0001,
"step": 13650
},
{
"epoch": 51.0,
"eval_accuracy": 0.2846338948092128,
"eval_f1_macro": 0.6981227572539419,
"eval_f1_micro": 0.8078541374474054,
"eval_loss": 0.12989668548107147,
"eval_roc_auc": 0.8712741682837021,
"eval_runtime": 519.4542,
"eval_samples_per_second": 5.6,
"eval_steps_per_second": 0.175,
"learning_rate": 0.0001,
"step": 13923
},
{
"epoch": 51.282051282051285,
"grad_norm": 0.2057354748249054,
"learning_rate": 0.0001,
"loss": 0.1374,
"step": 14000
},
{
"epoch": 52.0,
"eval_accuracy": 0.284977655551736,
"eval_f1_macro": 0.7032059573412642,
"eval_f1_micro": 0.809621541745341,
"eval_loss": 0.13097986578941345,
"eval_roc_auc": 0.8736802406111642,
"eval_runtime": 517.4492,
"eval_samples_per_second": 5.622,
"eval_steps_per_second": 0.176,
"learning_rate": 0.0001,
"step": 14196
},
{
"epoch": 53.0,
"eval_accuracy": 0.288415262976968,
"eval_f1_macro": 0.6952081515364695,
"eval_f1_micro": 0.8082875892525485,
"eval_loss": 0.12910524010658264,
"eval_roc_auc": 0.8713581964654615,
"eval_runtime": 515.3537,
"eval_samples_per_second": 5.645,
"eval_steps_per_second": 0.177,
"learning_rate": 0.0001,
"step": 14469
},
{
"epoch": 53.11355311355312,
"grad_norm": 0.19681566953659058,
"learning_rate": 0.0001,
"loss": 0.1367,
"step": 14500
},
{
"epoch": 54.0,
"eval_accuracy": 0.2860089377793056,
"eval_f1_macro": 0.6914506394370794,
"eval_f1_micro": 0.8055729885778838,
"eval_loss": 0.1276824176311493,
"eval_roc_auc": 0.8659785304346413,
"eval_runtime": 517.0408,
"eval_samples_per_second": 5.626,
"eval_steps_per_second": 0.176,
"learning_rate": 0.0001,
"step": 14742
},
{
"epoch": 54.94505494505494,
"grad_norm": 0.21626819670200348,
"learning_rate": 0.0001,
"loss": 0.1364,
"step": 15000
},
{
"epoch": 55.0,
"eval_accuracy": 0.28979030594706084,
"eval_f1_macro": 0.7051415507931676,
"eval_f1_micro": 0.8091508143727464,
"eval_loss": 0.12751279771327972,
"eval_roc_auc": 0.8706409740591669,
"eval_runtime": 516.7044,
"eval_samples_per_second": 5.63,
"eval_steps_per_second": 0.176,
"learning_rate": 0.0001,
"step": 15015
},
{
"epoch": 56.0,
"eval_accuracy": 0.2911653489171537,
"eval_f1_macro": 0.6990943862949641,
"eval_f1_micro": 0.8077718065316246,
"eval_loss": 0.12798655033111572,
"eval_roc_auc": 0.8706103127849166,
"eval_runtime": 520.6769,
"eval_samples_per_second": 5.587,
"eval_steps_per_second": 0.175,
"learning_rate": 0.0001,
"step": 15288
},
{
"epoch": 56.776556776556774,
"grad_norm": 0.2569683790206909,
"learning_rate": 0.0001,
"loss": 0.135,
"step": 15500
},
{
"epoch": 57.0,
"eval_accuracy": 0.29150910965967686,
"eval_f1_macro": 0.7001268142729874,
"eval_f1_micro": 0.8107930240210597,
"eval_loss": 0.1279618740081787,
"eval_roc_auc": 0.8737307073185808,
"eval_runtime": 522.0888,
"eval_samples_per_second": 5.572,
"eval_steps_per_second": 0.174,
"learning_rate": 0.0001,
"step": 15561
},
{
"epoch": 58.0,
"eval_accuracy": 0.290134066689584,
"eval_f1_macro": 0.7039327958876614,
"eval_f1_micro": 0.8108946874106743,
"eval_loss": 0.1280883550643921,
"eval_roc_auc": 0.8759047076910806,
"eval_runtime": 524.5592,
"eval_samples_per_second": 5.546,
"eval_steps_per_second": 0.173,
"learning_rate": 0.0001,
"step": 15834
},
{
"epoch": 58.608058608058606,
"grad_norm": 0.209602490067482,
"learning_rate": 0.0001,
"loss": 0.1345,
"step": 16000
},
{
"epoch": 59.0,
"eval_accuracy": 0.2873839807493984,
"eval_f1_macro": 0.699653006099352,
"eval_f1_micro": 0.8071845383437488,
"eval_loss": 0.1287168562412262,
"eval_roc_auc": 0.8699666770335968,
"eval_runtime": 523.8253,
"eval_samples_per_second": 5.553,
"eval_steps_per_second": 0.174,
"learning_rate": 0.0001,
"step": 16107
},
{
"epoch": 60.0,
"eval_accuracy": 0.28875902371949125,
"eval_f1_macro": 0.7042073996338176,
"eval_f1_micro": 0.8103491168421926,
"eval_loss": 0.1270500272512436,
"eval_roc_auc": 0.8711666058400855,
"eval_runtime": 535.2866,
"eval_samples_per_second": 5.434,
"eval_steps_per_second": 0.17,
"learning_rate": 0.0001,
"step": 16380
},
{
"epoch": 60.43956043956044,
"grad_norm": 0.2388932704925537,
"learning_rate": 0.0001,
"loss": 0.134,
"step": 16500
},
{
"epoch": 61.0,
"eval_accuracy": 0.28944654520453766,
"eval_f1_macro": 0.6994480698947442,
"eval_f1_micro": 0.8072888368788399,
"eval_loss": 0.1269637793302536,
"eval_roc_auc": 0.8672195202075096,
"eval_runtime": 529.882,
"eval_samples_per_second": 5.49,
"eval_steps_per_second": 0.172,
"learning_rate": 0.0001,
"step": 16653
},
{
"epoch": 62.0,
"eval_accuracy": 0.28979030594706084,
"eval_f1_macro": 0.7105518005302388,
"eval_f1_micro": 0.8124407826982492,
"eval_loss": 0.12639474868774414,
"eval_roc_auc": 0.8741971299571496,
"eval_runtime": 525.1851,
"eval_samples_per_second": 5.539,
"eval_steps_per_second": 0.173,
"learning_rate": 0.0001,
"step": 16926
},
{
"epoch": 62.27106227106227,
"grad_norm": 0.2082633525133133,
"learning_rate": 0.0001,
"loss": 0.1331,
"step": 17000
},
{
"epoch": 63.0,
"eval_accuracy": 0.2918528704022001,
"eval_f1_macro": 0.7042257858113937,
"eval_f1_micro": 0.8093336660843524,
"eval_loss": 0.12643341720104218,
"eval_roc_auc": 0.8697033641675914,
"eval_runtime": 525.5019,
"eval_samples_per_second": 5.536,
"eval_steps_per_second": 0.173,
"learning_rate": 0.0001,
"step": 17199
},
{
"epoch": 64.0,
"eval_accuracy": 0.2918528704022001,
"eval_f1_macro": 0.7054117610081568,
"eval_f1_micro": 0.8119739624362535,
"eval_loss": 0.12570597231388092,
"eval_roc_auc": 0.8720692041572721,
"eval_runtime": 522.3213,
"eval_samples_per_second": 5.569,
"eval_steps_per_second": 0.174,
"learning_rate": 0.0001,
"step": 17472
},
{
"epoch": 64.1025641025641,
"grad_norm": 0.25887489318847656,
"learning_rate": 0.0001,
"loss": 0.1327,
"step": 17500
},
{
"epoch": 65.0,
"eval_accuracy": 0.29322791337229287,
"eval_f1_macro": 0.7040599127700347,
"eval_f1_micro": 0.8103770839396333,
"eval_loss": 0.12599390745162964,
"eval_roc_auc": 0.8704049661183646,
"eval_runtime": 523.7812,
"eval_samples_per_second": 5.554,
"eval_steps_per_second": 0.174,
"learning_rate": 0.0001,
"step": 17745
},
{
"epoch": 65.93406593406593,
"grad_norm": 0.2609516382217407,
"learning_rate": 0.0001,
"loss": 0.1319,
"step": 18000
},
{
"epoch": 66.0,
"eval_accuracy": 0.29769680302509455,
"eval_f1_macro": 0.7083351143800681,
"eval_f1_micro": 0.8141795311606633,
"eval_loss": 0.12674611806869507,
"eval_roc_auc": 0.8776967697557255,
"eval_runtime": 525.4493,
"eval_samples_per_second": 5.536,
"eval_steps_per_second": 0.173,
"learning_rate": 0.0001,
"step": 18018
},
{
"epoch": 67.0,
"eval_accuracy": 0.28979030594706084,
"eval_f1_macro": 0.6998024530144022,
"eval_f1_micro": 0.8090950582963362,
"eval_loss": 0.12676431238651276,
"eval_roc_auc": 0.8702765206211787,
"eval_runtime": 528.6841,
"eval_samples_per_second": 5.502,
"eval_steps_per_second": 0.172,
"learning_rate": 0.0001,
"step": 18291
},
{
"epoch": 67.76556776556777,
"grad_norm": 0.31347450613975525,
"learning_rate": 0.0001,
"loss": 0.1319,
"step": 18500
},
{
"epoch": 68.0,
"eval_accuracy": 0.2928841526297697,
"eval_f1_macro": 0.7034736625177254,
"eval_f1_micro": 0.8127327032445482,
"eval_loss": 0.12638631463050842,
"eval_roc_auc": 0.8763513248964829,
"eval_runtime": 524.2249,
"eval_samples_per_second": 5.549,
"eval_steps_per_second": 0.174,
"learning_rate": 0.0001,
"step": 18564
},
{
"epoch": 69.0,
"eval_accuracy": 0.2952904778274321,
"eval_f1_macro": 0.7078892431331377,
"eval_f1_micro": 0.8131967584022379,
"eval_loss": 0.12608103454113007,
"eval_roc_auc": 0.8750227852703141,
"eval_runtime": 527.0116,
"eval_samples_per_second": 5.52,
"eval_steps_per_second": 0.173,
"learning_rate": 0.0001,
"step": 18837
},
{
"epoch": 69.59706959706959,
"grad_norm": 0.24166111648082733,
"learning_rate": 0.0001,
"loss": 0.1308,
"step": 19000
},
{
"epoch": 70.0,
"eval_accuracy": 0.29150910965967686,
"eval_f1_macro": 0.7081157868651535,
"eval_f1_micro": 0.8136722606120435,
"eval_loss": 0.12582050263881683,
"eval_roc_auc": 0.875321565859663,
"eval_runtime": 523.4787,
"eval_samples_per_second": 5.557,
"eval_steps_per_second": 0.174,
"learning_rate": 0.0001,
"step": 19110
},
{
"epoch": 71.0,
"eval_accuracy": 0.2918528704022001,
"eval_f1_macro": 0.7044517956080781,
"eval_f1_micro": 0.8123295595405339,
"eval_loss": 0.12533149123191833,
"eval_roc_auc": 0.8732599343798731,
"eval_runtime": 528.0987,
"eval_samples_per_second": 5.508,
"eval_steps_per_second": 0.172,
"learning_rate": 1e-05,
"step": 19383
},
{
"epoch": 71.42857142857143,
"grad_norm": 0.2781914174556732,
"learning_rate": 1e-05,
"loss": 0.1294,
"step": 19500
},
{
"epoch": 72.0,
"eval_accuracy": 0.2966655207975249,
"eval_f1_macro": 0.7099295458861072,
"eval_f1_micro": 0.8159506713723581,
"eval_loss": 0.1258901059627533,
"eval_roc_auc": 0.8806755842224759,
"eval_runtime": 528.1564,
"eval_samples_per_second": 5.508,
"eval_steps_per_second": 0.172,
"learning_rate": 1e-05,
"step": 19656
},
{
"epoch": 73.0,
"eval_accuracy": 0.2949467170849089,
"eval_f1_macro": 0.7116557450872655,
"eval_f1_micro": 0.8159496670343587,
"eval_loss": 0.12526649236679077,
"eval_roc_auc": 0.8785730989334561,
"eval_runtime": 529.1223,
"eval_samples_per_second": 5.498,
"eval_steps_per_second": 0.172,
"learning_rate": 1e-05,
"step": 19929
},
{
"epoch": 73.26007326007326,
"grad_norm": 0.278796911239624,
"learning_rate": 1e-05,
"loss": 0.1287,
"step": 20000
},
{
"epoch": 74.0,
"eval_accuracy": 0.29769680302509455,
"eval_f1_macro": 0.7159515864206437,
"eval_f1_micro": 0.8156100747030249,
"eval_loss": 0.12490212172269821,
"eval_roc_auc": 0.8786256372123677,
"eval_runtime": 527.6257,
"eval_samples_per_second": 5.513,
"eval_steps_per_second": 0.172,
"learning_rate": 1e-05,
"step": 20202
},
{
"epoch": 75.0,
"eval_accuracy": 0.2966655207975249,
"eval_f1_macro": 0.7082306828309269,
"eval_f1_micro": 0.8135426082669078,
"eval_loss": 0.12504002451896667,
"eval_roc_auc": 0.8755625874067082,
"eval_runtime": 534.3244,
"eval_samples_per_second": 5.444,
"eval_steps_per_second": 0.17,
"learning_rate": 1e-05,
"step": 20475
},
{
"epoch": 75.0915750915751,
"grad_norm": 0.27821090817451477,
"learning_rate": 1e-05,
"loss": 0.1282,
"step": 20500
},
{
"epoch": 76.0,
"eval_accuracy": 0.2966655207975249,
"eval_f1_macro": 0.6998917153140419,
"eval_f1_micro": 0.8099675513769865,
"eval_loss": 0.12634462118148804,
"eval_roc_auc": 0.8700019375997908,
"eval_runtime": 530.2495,
"eval_samples_per_second": 5.486,
"eval_steps_per_second": 0.172,
"learning_rate": 1e-05,
"step": 20748
},
{
"epoch": 76.92307692307692,
"grad_norm": 0.2594936192035675,
"learning_rate": 1e-05,
"loss": 0.1285,
"step": 21000
},
{
"epoch": 77.0,
"eval_accuracy": 0.2966655207975249,
"eval_f1_macro": 0.7104044870773909,
"eval_f1_micro": 0.8142915811088296,
"eval_loss": 0.1249643936753273,
"eval_roc_auc": 0.8761213663476708,
"eval_runtime": 520.8557,
"eval_samples_per_second": 5.585,
"eval_steps_per_second": 0.175,
"learning_rate": 1e-05,
"step": 21021
},
{
"epoch": 78.0,
"eval_accuracy": 0.2939154348573393,
"eval_f1_macro": 0.7076718539561497,
"eval_f1_micro": 0.812339968613199,
"eval_loss": 0.12509745359420776,
"eval_roc_auc": 0.8734096354102718,
"eval_runtime": 523.1264,
"eval_samples_per_second": 5.561,
"eval_steps_per_second": 0.174,
"learning_rate": 1e-05,
"step": 21294
},
{
"epoch": 78.75457875457876,
"grad_norm": 0.24959908425807953,
"learning_rate": 1e-05,
"loss": 0.1281,
"step": 21500
},
{
"epoch": 79.0,
"eval_accuracy": 0.29838432451014096,
"eval_f1_macro": 0.7097766100728804,
"eval_f1_micro": 0.8147326016360423,
"eval_loss": 0.12465520948171616,
"eval_roc_auc": 0.876088219187627,
"eval_runtime": 536.4997,
"eval_samples_per_second": 5.422,
"eval_steps_per_second": 0.17,
"learning_rate": 1e-05,
"step": 21567
},
{
"epoch": 80.0,
"eval_accuracy": 0.2990718459951873,
"eval_f1_macro": 0.7133791911991404,
"eval_f1_micro": 0.8166140393490405,
"eval_loss": 0.12526248395442963,
"eval_roc_auc": 0.8816640751357672,
"eval_runtime": 530.0362,
"eval_samples_per_second": 5.488,
"eval_steps_per_second": 0.172,
"learning_rate": 1e-05,
"step": 21840
},
{
"epoch": 80.58608058608058,
"grad_norm": 0.2447408139705658,
"learning_rate": 1e-05,
"loss": 0.1281,
"step": 22000
},
{
"epoch": 81.0,
"eval_accuracy": 0.2952904778274321,
"eval_f1_macro": 0.705898272950067,
"eval_f1_micro": 0.8121923983622152,
"eval_loss": 0.12510864436626434,
"eval_roc_auc": 0.8729086155116128,
"eval_runtime": 529.107,
"eval_samples_per_second": 5.498,
"eval_steps_per_second": 0.172,
"learning_rate": 1e-05,
"step": 22113
},
{
"epoch": 82.0,
"eval_accuracy": 0.29975936748023374,
"eval_f1_macro": 0.7095032932540235,
"eval_f1_micro": 0.8150326797385622,
"eval_loss": 0.12532733380794525,
"eval_roc_auc": 0.8780991768028508,
"eval_runtime": 520.4428,
"eval_samples_per_second": 5.589,
"eval_steps_per_second": 0.175,
"learning_rate": 1e-05,
"step": 22386
},
{
"epoch": 82.41758241758242,
"grad_norm": 0.29986944794654846,
"learning_rate": 1e-05,
"loss": 0.1269,
"step": 22500
},
{
"epoch": 83.0,
"eval_accuracy": 0.29597799931247853,
"eval_f1_macro": 0.7124383950303705,
"eval_f1_micro": 0.815855206584497,
"eval_loss": 0.12474868446588516,
"eval_roc_auc": 0.87784419205748,
"eval_runtime": 521.522,
"eval_samples_per_second": 5.578,
"eval_steps_per_second": 0.174,
"learning_rate": 1e-05,
"step": 22659
},
{
"epoch": 84.0,
"eval_accuracy": 0.3007906497078034,
"eval_f1_macro": 0.7138847615465347,
"eval_f1_micro": 0.8175330467926365,
"eval_loss": 0.12511858344078064,
"eval_roc_auc": 0.8816953578814657,
"eval_runtime": 527.0352,
"eval_samples_per_second": 5.52,
"eval_steps_per_second": 0.173,
"learning_rate": 1e-05,
"step": 22932
},
{
"epoch": 84.24908424908425,
"grad_norm": 0.23814542591571808,
"learning_rate": 1e-05,
"loss": 0.1267,
"step": 23000
},
{
"epoch": 85.0,
"eval_accuracy": 0.2966655207975249,
"eval_f1_macro": 0.7054571621251418,
"eval_f1_micro": 0.8132141082960754,
"eval_loss": 0.12457013875246048,
"eval_roc_auc": 0.8731936400597855,
"eval_runtime": 570.3692,
"eval_samples_per_second": 5.1,
"eval_steps_per_second": 0.16,
"learning_rate": 1e-05,
"step": 23205
},
{
"epoch": 86.0,
"eval_accuracy": 0.2946029563423857,
"eval_f1_macro": 0.7142702846379808,
"eval_f1_micro": 0.8143732269868025,
"eval_loss": 0.1251869946718216,
"eval_roc_auc": 0.875841355405741,
"eval_runtime": 518.8454,
"eval_samples_per_second": 5.607,
"eval_steps_per_second": 0.175,
"learning_rate": 1e-05,
"step": 23478
},
{
"epoch": 86.08058608058609,
"grad_norm": 0.30401352047920227,
"learning_rate": 1e-05,
"loss": 0.1274,
"step": 23500
},
{
"epoch": 87.0,
"eval_accuracy": 0.2935716741148161,
"eval_f1_macro": 0.7081357577756824,
"eval_f1_micro": 0.8135328455150868,
"eval_loss": 0.12492978572845459,
"eval_roc_auc": 0.8754128863763097,
"eval_runtime": 516.882,
"eval_samples_per_second": 5.628,
"eval_steps_per_second": 0.176,
"learning_rate": 1e-05,
"step": 23751
},
{
"epoch": 87.91208791208791,
"grad_norm": 0.25764307379722595,
"learning_rate": 1e-05,
"loss": 0.1263,
"step": 24000
},
{
"epoch": 88.0,
"eval_accuracy": 0.2990718459951873,
"eval_f1_macro": 0.7099379006276698,
"eval_f1_micro": 0.815831263487927,
"eval_loss": 0.12513719499111176,
"eval_roc_auc": 0.8794975742553038,
"eval_runtime": 519.5458,
"eval_samples_per_second": 5.599,
"eval_steps_per_second": 0.175,
"learning_rate": 1e-05,
"step": 24024
},
{
"epoch": 89.0,
"eval_accuracy": 0.2963217600550017,
"eval_f1_macro": 0.7092910188720426,
"eval_f1_micro": 0.8143914473684211,
"eval_loss": 0.12514576315879822,
"eval_roc_auc": 0.8758152934234348,
"eval_runtime": 518.776,
"eval_samples_per_second": 5.607,
"eval_steps_per_second": 0.175,
"learning_rate": 1e-05,
"step": 24297
},
{
"epoch": 89.74358974358974,
"grad_norm": 0.25485533475875854,
"learning_rate": 1e-05,
"loss": 0.1272,
"step": 24500
},
{
"epoch": 90.0,
"eval_accuracy": 0.2942591955998625,
"eval_f1_macro": 0.7121664381657501,
"eval_f1_micro": 0.8134516195584898,
"eval_loss": 0.1244530975818634,
"eval_roc_auc": 0.8742738658040219,
"eval_runtime": 522.4184,
"eval_samples_per_second": 5.568,
"eval_steps_per_second": 0.174,
"learning_rate": 1e-05,
"step": 24570
},
{
"epoch": 91.0,
"eval_accuracy": 0.2990718459951873,
"eval_f1_macro": 0.7106178930468596,
"eval_f1_micro": 0.8153902768123646,
"eval_loss": 0.12501013278961182,
"eval_roc_auc": 0.8780205765416332,
"eval_runtime": 521.4209,
"eval_samples_per_second": 5.579,
"eval_steps_per_second": 0.175,
"learning_rate": 1e-05,
"step": 24843
},
{
"epoch": 91.57509157509158,
"grad_norm": 0.24377481639385223,
"learning_rate": 1e-05,
"loss": 0.1275,
"step": 25000
},
{
"epoch": 92.0,
"eval_accuracy": 0.2973530422825713,
"eval_f1_macro": 0.7140173113811211,
"eval_f1_micro": 0.8163049232398094,
"eval_loss": 0.12525025010108948,
"eval_roc_auc": 0.8797377671737511,
"eval_runtime": 518.7634,
"eval_samples_per_second": 5.608,
"eval_steps_per_second": 0.175,
"learning_rate": 1e-05,
"step": 25116
},
{
"epoch": 93.0,
"eval_accuracy": 0.29872808525266414,
"eval_f1_macro": 0.7129019083206937,
"eval_f1_micro": 0.8148661314641998,
"eval_loss": 0.12471849471330643,
"eval_roc_auc": 0.8787177863576121,
"eval_runtime": 519.6644,
"eval_samples_per_second": 5.598,
"eval_steps_per_second": 0.175,
"learning_rate": 1e-05,
"step": 25389
},
{
"epoch": 93.4065934065934,
"grad_norm": 0.2000974863767624,
"learning_rate": 1e-05,
"loss": 0.1257,
"step": 25500
},
{
"epoch": 94.0,
"eval_accuracy": 0.2980405637676177,
"eval_f1_macro": 0.7053935701419592,
"eval_f1_micro": 0.8141884924726748,
"eval_loss": 0.12515641748905182,
"eval_roc_auc": 0.8748001190563892,
"eval_runtime": 516.6712,
"eval_samples_per_second": 5.63,
"eval_steps_per_second": 0.176,
"learning_rate": 1e-05,
"step": 25662
},
{
"epoch": 95.0,
"eval_accuracy": 0.30147817119284975,
"eval_f1_macro": 0.7134995447430972,
"eval_f1_micro": 0.8165906870726147,
"eval_loss": 0.12481416761875153,
"eval_roc_auc": 0.8800302912139601,
"eval_runtime": 518.3317,
"eval_samples_per_second": 5.612,
"eval_steps_per_second": 0.176,
"learning_rate": 1e-05,
"step": 25935
},
{
"epoch": 95.23809523809524,
"grad_norm": 0.284708708524704,
"learning_rate": 1e-05,
"loss": 0.1271,
"step": 26000
},
{
"epoch": 96.0,
"eval_accuracy": 0.2980405637676177,
"eval_f1_macro": 0.7110442004495683,
"eval_f1_micro": 0.8160666176830762,
"eval_loss": 0.12492986023426056,
"eval_roc_auc": 0.8788918921131211,
"eval_runtime": 518.2372,
"eval_samples_per_second": 5.613,
"eval_steps_per_second": 0.176,
"learning_rate": 1e-05,
"step": 26208
},
{
"epoch": 97.0,
"eval_accuracy": 0.30147817119284975,
"eval_f1_macro": 0.7158597011246477,
"eval_f1_micro": 0.8168590473093806,
"eval_loss": 0.12459924072027206,
"eval_roc_auc": 0.8806157536086776,
"eval_runtime": 516.8314,
"eval_samples_per_second": 5.629,
"eval_steps_per_second": 0.176,
"learning_rate": 1.0000000000000002e-06,
"step": 26481
},
{
"epoch": 97.06959706959707,
"grad_norm": 0.2898052930831909,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.1272,
"step": 26500
},
{
"epoch": 98.0,
"eval_accuracy": 0.29975936748023374,
"eval_f1_macro": 0.707122866121441,
"eval_f1_micro": 0.8149457415323906,
"eval_loss": 0.12447398155927658,
"eval_roc_auc": 0.8762181147848592,
"eval_runtime": 516.1798,
"eval_samples_per_second": 5.636,
"eval_steps_per_second": 0.176,
"learning_rate": 1.0000000000000002e-06,
"step": 26754
},
{
"epoch": 98.9010989010989,
"grad_norm": 0.2835540473461151,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.126,
"step": 27000
},
{
"epoch": 99.0,
"eval_accuracy": 0.30216569267789617,
"eval_f1_macro": 0.7182970295785608,
"eval_f1_micro": 0.8165748111859562,
"eval_loss": 0.12462905794382095,
"eval_roc_auc": 0.8791385487378576,
"eval_runtime": 525.6968,
"eval_samples_per_second": 5.534,
"eval_steps_per_second": 0.173,
"learning_rate": 1.0000000000000002e-06,
"step": 27027
},
{
"epoch": 100.0,
"eval_accuracy": 0.30147817119284975,
"eval_f1_macro": 0.7136275002413193,
"eval_f1_micro": 0.8161644284310514,
"eval_loss": 0.12463195621967316,
"eval_roc_auc": 0.8780520664444814,
"eval_runtime": 523.9869,
"eval_samples_per_second": 5.552,
"eval_steps_per_second": 0.174,
"learning_rate": 1.0000000000000002e-06,
"step": 27300
},
{
"epoch": 100.0,
"learning_rate": 1.0000000000000002e-06,
"step": 27300,
"total_flos": 1.2912305794345248e+21,
"train_loss": 0.1443542043106023,
"train_runtime": 217230.0758,
"train_samples_per_second": 6.013,
"train_steps_per_second": 0.189
}
],
"logging_steps": 500,
"max_steps": 40950,
"num_input_tokens_seen": 0,
"num_train_epochs": 150,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 10,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.2912305794345248e+21,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}