|
{ |
|
"best_metric": 0.1244530975818634, |
|
"best_model_checkpoint": "/home1/datawork/mcontini/models/multilabel/huggingface/dinov2-large-2024_05_27-_batch-size32_epochs150_freeze/checkpoint-24570", |
|
"epoch": 100.0, |
|
"eval_steps": 500, |
|
"global_step": 27300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.232726022688209, |
|
"eval_f1_macro": 0.4712591871520079, |
|
"eval_f1_micro": 0.7380235658381353, |
|
"eval_loss": 0.1701383888721466, |
|
"eval_roc_auc": 0.8226166045759734, |
|
"eval_runtime": 579.6646, |
|
"eval_samples_per_second": 5.018, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.001, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.8315018315018317, |
|
"grad_norm": 0.3410358726978302, |
|
"learning_rate": 0.001, |
|
"loss": 0.2748, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.24888277758679958, |
|
"eval_f1_macro": 0.5722657636852799, |
|
"eval_f1_micro": 0.7568708574323469, |
|
"eval_loss": 0.15890291333198547, |
|
"eval_roc_auc": 0.8372263575023127, |
|
"eval_runtime": 572.4604, |
|
"eval_samples_per_second": 5.082, |
|
"eval_steps_per_second": 0.159, |
|
"learning_rate": 0.001, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.24785149535922998, |
|
"eval_f1_macro": 0.6104117366005594, |
|
"eval_f1_micro": 0.7723932964583505, |
|
"eval_loss": 0.15134122967720032, |
|
"eval_roc_auc": 0.851560035472562, |
|
"eval_runtime": 581.798, |
|
"eval_samples_per_second": 5.0, |
|
"eval_steps_per_second": 0.156, |
|
"learning_rate": 0.001, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 3.663003663003663, |
|
"grad_norm": 0.28447234630584717, |
|
"learning_rate": 0.001, |
|
"loss": 0.1714, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.24853901684427637, |
|
"eval_f1_macro": 0.599745200497783, |
|
"eval_f1_micro": 0.7608496532472631, |
|
"eval_loss": 0.15164224803447723, |
|
"eval_roc_auc": 0.8311391237980399, |
|
"eval_runtime": 574.55, |
|
"eval_samples_per_second": 5.063, |
|
"eval_steps_per_second": 0.158, |
|
"learning_rate": 0.001, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.24750773461670678, |
|
"eval_f1_macro": 0.5935106518877853, |
|
"eval_f1_micro": 0.7692371752165224, |
|
"eval_loss": 0.15243493020534515, |
|
"eval_roc_auc": 0.8475084947536117, |
|
"eval_runtime": 565.8483, |
|
"eval_samples_per_second": 5.141, |
|
"eval_steps_per_second": 0.161, |
|
"learning_rate": 0.001, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 5.4945054945054945, |
|
"grad_norm": 0.32554125785827637, |
|
"learning_rate": 0.001, |
|
"loss": 0.1661, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.24269508422138192, |
|
"eval_f1_macro": 0.613788061665736, |
|
"eval_f1_micro": 0.7718080548414739, |
|
"eval_loss": 0.14673969149589539, |
|
"eval_roc_auc": 0.8414237469351327, |
|
"eval_runtime": 563.6053, |
|
"eval_samples_per_second": 5.161, |
|
"eval_steps_per_second": 0.161, |
|
"learning_rate": 0.001, |
|
"step": 1638 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.2509453420419388, |
|
"eval_f1_macro": 0.6309898748773293, |
|
"eval_f1_micro": 0.7732481363152289, |
|
"eval_loss": 0.1506606936454773, |
|
"eval_roc_auc": 0.8436874105670831, |
|
"eval_runtime": 563.2783, |
|
"eval_samples_per_second": 5.164, |
|
"eval_steps_per_second": 0.162, |
|
"learning_rate": 0.001, |
|
"step": 1911 |
|
}, |
|
{ |
|
"epoch": 7.326007326007326, |
|
"grad_norm": 0.22656740248203278, |
|
"learning_rate": 0.001, |
|
"loss": 0.1611, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.2609144035751117, |
|
"eval_f1_macro": 0.6403740520777896, |
|
"eval_f1_micro": 0.7828014555188422, |
|
"eval_loss": 0.14430351555347443, |
|
"eval_roc_auc": 0.8536999365685817, |
|
"eval_runtime": 570.8881, |
|
"eval_samples_per_second": 5.096, |
|
"eval_steps_per_second": 0.159, |
|
"learning_rate": 0.001, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.25128910278446204, |
|
"eval_f1_macro": 0.6366498775226099, |
|
"eval_f1_micro": 0.781416038551835, |
|
"eval_loss": 0.14617429673671722, |
|
"eval_roc_auc": 0.8571263621918263, |
|
"eval_runtime": 575.7471, |
|
"eval_samples_per_second": 5.053, |
|
"eval_steps_per_second": 0.158, |
|
"learning_rate": 0.001, |
|
"step": 2457 |
|
}, |
|
{ |
|
"epoch": 9.157509157509157, |
|
"grad_norm": 0.1823957860469818, |
|
"learning_rate": 0.001, |
|
"loss": 0.1606, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.2688209006531454, |
|
"eval_f1_macro": 0.6254472467805158, |
|
"eval_f1_micro": 0.7794745970641737, |
|
"eval_loss": 0.14414818584918976, |
|
"eval_roc_auc": 0.8488544375964642, |
|
"eval_runtime": 577.67, |
|
"eval_samples_per_second": 5.036, |
|
"eval_steps_per_second": 0.158, |
|
"learning_rate": 0.001, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 10.989010989010989, |
|
"grad_norm": 0.1724083572626114, |
|
"learning_rate": 0.001, |
|
"loss": 0.1592, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.2595393606050189, |
|
"eval_f1_macro": 0.6357434835994208, |
|
"eval_f1_micro": 0.7780349253103302, |
|
"eval_loss": 0.14589238166809082, |
|
"eval_roc_auc": 0.8494945098415877, |
|
"eval_runtime": 576.9242, |
|
"eval_samples_per_second": 5.042, |
|
"eval_steps_per_second": 0.158, |
|
"learning_rate": 0.001, |
|
"step": 3003 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.2554142316947405, |
|
"eval_f1_macro": 0.638389910481932, |
|
"eval_f1_micro": 0.7823495795575149, |
|
"eval_loss": 0.14458976686000824, |
|
"eval_roc_auc": 0.8601197613283508, |
|
"eval_runtime": 579.3816, |
|
"eval_samples_per_second": 5.021, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.001, |
|
"step": 3276 |
|
}, |
|
{ |
|
"epoch": 12.820512820512821, |
|
"grad_norm": 0.18148617446422577, |
|
"learning_rate": 0.001, |
|
"loss": 0.1582, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.2561017531797869, |
|
"eval_f1_macro": 0.6574365741219022, |
|
"eval_f1_micro": 0.786284091383703, |
|
"eval_loss": 0.14142437279224396, |
|
"eval_roc_auc": 0.8559800911193071, |
|
"eval_runtime": 580.1191, |
|
"eval_samples_per_second": 5.014, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.001, |
|
"step": 3549 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.24682021313166036, |
|
"eval_f1_macro": 0.6245865910731833, |
|
"eval_f1_micro": 0.7766990291262137, |
|
"eval_loss": 0.1581379920244217, |
|
"eval_roc_auc": 0.8490862605596214, |
|
"eval_runtime": 579.658, |
|
"eval_samples_per_second": 5.018, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.001, |
|
"step": 3822 |
|
}, |
|
{ |
|
"epoch": 14.652014652014651, |
|
"grad_norm": 0.17000725865364075, |
|
"learning_rate": 0.001, |
|
"loss": 0.1575, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.2598831213475421, |
|
"eval_f1_macro": 0.6552072842486797, |
|
"eval_f1_micro": 0.7859620485615181, |
|
"eval_loss": 0.1447945237159729, |
|
"eval_roc_auc": 0.8622108111394943, |
|
"eval_runtime": 578.0771, |
|
"eval_samples_per_second": 5.032, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.001, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.2605706428325885, |
|
"eval_f1_macro": 0.6495757946819554, |
|
"eval_f1_micro": 0.7853051058530511, |
|
"eval_loss": 0.1438169628381729, |
|
"eval_roc_auc": 0.8571207689487904, |
|
"eval_runtime": 577.2113, |
|
"eval_samples_per_second": 5.04, |
|
"eval_steps_per_second": 0.158, |
|
"learning_rate": 0.001, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 16.483516483516482, |
|
"grad_norm": 0.16623707115650177, |
|
"learning_rate": 0.001, |
|
"loss": 0.158, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.2506015812994156, |
|
"eval_f1_macro": 0.6310969679900952, |
|
"eval_f1_micro": 0.7824457675812967, |
|
"eval_loss": 0.14359386265277863, |
|
"eval_roc_auc": 0.8547290741339532, |
|
"eval_runtime": 583.6271, |
|
"eval_samples_per_second": 4.984, |
|
"eval_steps_per_second": 0.156, |
|
"learning_rate": 0.001, |
|
"step": 4641 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.2564455139223101, |
|
"eval_f1_macro": 0.6531950395959965, |
|
"eval_f1_micro": 0.7848311343456975, |
|
"eval_loss": 0.1412857472896576, |
|
"eval_roc_auc": 0.8569977513721464, |
|
"eval_runtime": 571.2179, |
|
"eval_samples_per_second": 5.093, |
|
"eval_steps_per_second": 0.159, |
|
"learning_rate": 0.001, |
|
"step": 4914 |
|
}, |
|
{ |
|
"epoch": 18.315018315018314, |
|
"grad_norm": 0.14722026884555817, |
|
"learning_rate": 0.001, |
|
"loss": 0.1571, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.26022688209006534, |
|
"eval_f1_macro": 0.6486819478687708, |
|
"eval_f1_micro": 0.7833830386020918, |
|
"eval_loss": 0.14079046249389648, |
|
"eval_roc_auc": 0.852859282271344, |
|
"eval_runtime": 574.5699, |
|
"eval_samples_per_second": 5.063, |
|
"eval_steps_per_second": 0.158, |
|
"learning_rate": 0.001, |
|
"step": 5187 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.26400825025782054, |
|
"eval_f1_macro": 0.6262168341318395, |
|
"eval_f1_micro": 0.7775968460747342, |
|
"eval_loss": 0.14640754461288452, |
|
"eval_roc_auc": 0.8444495064377587, |
|
"eval_runtime": 579.1715, |
|
"eval_samples_per_second": 5.023, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.001, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 20.146520146520146, |
|
"grad_norm": 0.16411201655864716, |
|
"learning_rate": 0.001, |
|
"loss": 0.1579, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.2653832932279134, |
|
"eval_f1_macro": 0.6582044080070929, |
|
"eval_f1_micro": 0.7890916719110552, |
|
"eval_loss": 0.1412632316350937, |
|
"eval_roc_auc": 0.8562690934879753, |
|
"eval_runtime": 565.162, |
|
"eval_samples_per_second": 5.147, |
|
"eval_steps_per_second": 0.161, |
|
"learning_rate": 0.001, |
|
"step": 5733 |
|
}, |
|
{ |
|
"epoch": 21.978021978021978, |
|
"grad_norm": 0.14830045402050018, |
|
"learning_rate": 0.001, |
|
"loss": 0.1564, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.2543829494671708, |
|
"eval_f1_macro": 0.6586947128782558, |
|
"eval_f1_micro": 0.7871090517954659, |
|
"eval_loss": 0.14168681204319, |
|
"eval_roc_auc": 0.8590452124964612, |
|
"eval_runtime": 558.0731, |
|
"eval_samples_per_second": 5.213, |
|
"eval_steps_per_second": 0.163, |
|
"learning_rate": 0.001, |
|
"step": 6006 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.269852182880715, |
|
"eval_f1_macro": 0.6427873985434494, |
|
"eval_f1_micro": 0.7863651704353696, |
|
"eval_loss": 0.1393543779850006, |
|
"eval_roc_auc": 0.8532977785132612, |
|
"eval_runtime": 555.9424, |
|
"eval_samples_per_second": 5.233, |
|
"eval_steps_per_second": 0.164, |
|
"learning_rate": 0.001, |
|
"step": 6279 |
|
}, |
|
{ |
|
"epoch": 23.80952380952381, |
|
"grad_norm": 0.1764509379863739, |
|
"learning_rate": 0.001, |
|
"loss": 0.1554, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.2588518391199725, |
|
"eval_f1_macro": 0.6618962794412713, |
|
"eval_f1_micro": 0.7857706852844616, |
|
"eval_loss": 0.14052371680736542, |
|
"eval_roc_auc": 0.8570680235127297, |
|
"eval_runtime": 561.2023, |
|
"eval_samples_per_second": 5.184, |
|
"eval_steps_per_second": 0.162, |
|
"learning_rate": 0.001, |
|
"step": 6552 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.2653832932279134, |
|
"eval_f1_macro": 0.653320279245233, |
|
"eval_f1_micro": 0.7897693920335429, |
|
"eval_loss": 0.1392364352941513, |
|
"eval_roc_auc": 0.8568084813100703, |
|
"eval_runtime": 557.349, |
|
"eval_samples_per_second": 5.219, |
|
"eval_steps_per_second": 0.163, |
|
"learning_rate": 0.001, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 25.641025641025642, |
|
"grad_norm": 0.17609645426273346, |
|
"learning_rate": 0.001, |
|
"loss": 0.1554, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.27019594362323823, |
|
"eval_f1_macro": 0.6529431984792132, |
|
"eval_f1_micro": 0.7838044308632545, |
|
"eval_loss": 0.14239099621772766, |
|
"eval_roc_auc": 0.8503066581053809, |
|
"eval_runtime": 557.4603, |
|
"eval_samples_per_second": 5.218, |
|
"eval_steps_per_second": 0.163, |
|
"learning_rate": 0.001, |
|
"step": 7098 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.2671020969405294, |
|
"eval_f1_macro": 0.6810613208979668, |
|
"eval_f1_micro": 0.7974886125815585, |
|
"eval_loss": 0.1386287957429886, |
|
"eval_roc_auc": 0.8670442949969421, |
|
"eval_runtime": 549.4206, |
|
"eval_samples_per_second": 5.295, |
|
"eval_steps_per_second": 0.166, |
|
"learning_rate": 0.001, |
|
"step": 7371 |
|
}, |
|
{ |
|
"epoch": 27.47252747252747, |
|
"grad_norm": 0.1621919423341751, |
|
"learning_rate": 0.001, |
|
"loss": 0.156, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.2650395324853902, |
|
"eval_f1_macro": 0.6474807711800876, |
|
"eval_f1_micro": 0.7791304347826087, |
|
"eval_loss": 0.15519200265407562, |
|
"eval_roc_auc": 0.8544148384115767, |
|
"eval_runtime": 547.1571, |
|
"eval_samples_per_second": 5.317, |
|
"eval_steps_per_second": 0.166, |
|
"learning_rate": 0.001, |
|
"step": 7644 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.27019594362323823, |
|
"eval_f1_macro": 0.6550381793679035, |
|
"eval_f1_micro": 0.7913651213762871, |
|
"eval_loss": 0.14190098643302917, |
|
"eval_roc_auc": 0.8614869597044164, |
|
"eval_runtime": 559.1951, |
|
"eval_samples_per_second": 5.202, |
|
"eval_steps_per_second": 0.163, |
|
"learning_rate": 0.001, |
|
"step": 7917 |
|
}, |
|
{ |
|
"epoch": 29.304029304029303, |
|
"grad_norm": 0.16609038412570953, |
|
"learning_rate": 0.001, |
|
"loss": 0.1548, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.2767273977311791, |
|
"eval_f1_macro": 0.663185953977854, |
|
"eval_f1_micro": 0.7857173292428311, |
|
"eval_loss": 0.13986903429031372, |
|
"eval_roc_auc": 0.8502714393738423, |
|
"eval_runtime": 549.5664, |
|
"eval_samples_per_second": 5.293, |
|
"eval_steps_per_second": 0.166, |
|
"learning_rate": 0.001, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.27260226882090066, |
|
"eval_f1_macro": 0.6554744698272669, |
|
"eval_f1_micro": 0.7881844380403459, |
|
"eval_loss": 0.13765402138233185, |
|
"eval_roc_auc": 0.8530413436678441, |
|
"eval_runtime": 541.1802, |
|
"eval_samples_per_second": 5.375, |
|
"eval_steps_per_second": 0.168, |
|
"learning_rate": 0.001, |
|
"step": 8463 |
|
}, |
|
{ |
|
"epoch": 31.135531135531135, |
|
"grad_norm": 0.15226389467716217, |
|
"learning_rate": 0.001, |
|
"loss": 0.1554, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.2677896184255758, |
|
"eval_f1_macro": 0.6596978272887946, |
|
"eval_f1_micro": 0.7914770376499792, |
|
"eval_loss": 0.13866138458251953, |
|
"eval_roc_auc": 0.861421701170076, |
|
"eval_runtime": 550.2376, |
|
"eval_samples_per_second": 5.287, |
|
"eval_steps_per_second": 0.165, |
|
"learning_rate": 0.001, |
|
"step": 8736 |
|
}, |
|
{ |
|
"epoch": 32.967032967032964, |
|
"grad_norm": 0.15690498054027557, |
|
"learning_rate": 0.001, |
|
"loss": 0.1551, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.2605706428325885, |
|
"eval_f1_macro": 0.6583814800932023, |
|
"eval_f1_micro": 0.7887546855476885, |
|
"eval_loss": 0.13930276036262512, |
|
"eval_roc_auc": 0.8580923964636631, |
|
"eval_runtime": 550.269, |
|
"eval_samples_per_second": 5.287, |
|
"eval_steps_per_second": 0.165, |
|
"learning_rate": 0.001, |
|
"step": 9009 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.2763836369886559, |
|
"eval_f1_macro": 0.6636727922636001, |
|
"eval_f1_micro": 0.795303262082937, |
|
"eval_loss": 0.1374826431274414, |
|
"eval_roc_auc": 0.8637208325253699, |
|
"eval_runtime": 543.5944, |
|
"eval_samples_per_second": 5.351, |
|
"eval_steps_per_second": 0.167, |
|
"learning_rate": 0.001, |
|
"step": 9282 |
|
}, |
|
{ |
|
"epoch": 34.798534798534796, |
|
"grad_norm": 0.166019469499588, |
|
"learning_rate": 0.001, |
|
"loss": 0.1544, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.25850807837744927, |
|
"eval_f1_macro": 0.6442491093834092, |
|
"eval_f1_micro": 0.7860775988902434, |
|
"eval_loss": 0.14001137018203735, |
|
"eval_roc_auc": 0.8541119565138938, |
|
"eval_runtime": 537.9679, |
|
"eval_samples_per_second": 5.407, |
|
"eval_steps_per_second": 0.169, |
|
"learning_rate": 0.001, |
|
"step": 9555 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.26916466139566864, |
|
"eval_f1_macro": 0.6541220211466265, |
|
"eval_f1_micro": 0.7890085033301218, |
|
"eval_loss": 0.13899104297161102, |
|
"eval_roc_auc": 0.856659816065419, |
|
"eval_runtime": 532.0356, |
|
"eval_samples_per_second": 5.468, |
|
"eval_steps_per_second": 0.171, |
|
"learning_rate": 0.001, |
|
"step": 9828 |
|
}, |
|
{ |
|
"epoch": 36.63003663003663, |
|
"grad_norm": 0.1606360822916031, |
|
"learning_rate": 0.001, |
|
"loss": 0.1555, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.2667583361980062, |
|
"eval_f1_macro": 0.6602790790864311, |
|
"eval_f1_micro": 0.788356222091162, |
|
"eval_loss": 0.14101693034172058, |
|
"eval_roc_auc": 0.8547597772428587, |
|
"eval_runtime": 536.869, |
|
"eval_samples_per_second": 5.418, |
|
"eval_steps_per_second": 0.17, |
|
"learning_rate": 0.001, |
|
"step": 10101 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.2633207287727741, |
|
"eval_f1_macro": 0.6508514926081754, |
|
"eval_f1_micro": 0.7864065343433915, |
|
"eval_loss": 0.13849563896656036, |
|
"eval_roc_auc": 0.8524644165509108, |
|
"eval_runtime": 538.0729, |
|
"eval_samples_per_second": 5.406, |
|
"eval_steps_per_second": 0.169, |
|
"learning_rate": 0.001, |
|
"step": 10374 |
|
}, |
|
{ |
|
"epoch": 38.46153846153846, |
|
"grad_norm": 0.1459091752767563, |
|
"learning_rate": 0.001, |
|
"loss": 0.1547, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.26263320728772777, |
|
"eval_f1_macro": 0.6513021077089046, |
|
"eval_f1_micro": 0.7819844457738655, |
|
"eval_loss": 0.14249388873577118, |
|
"eval_roc_auc": 0.8475227906130532, |
|
"eval_runtime": 528.7471, |
|
"eval_samples_per_second": 5.502, |
|
"eval_steps_per_second": 0.172, |
|
"learning_rate": 0.001, |
|
"step": 10647 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.2633207287727741, |
|
"eval_f1_macro": 0.6421624481517915, |
|
"eval_f1_micro": 0.7819497946916141, |
|
"eval_loss": 0.1512959599494934, |
|
"eval_roc_auc": 0.850003278224088, |
|
"eval_runtime": 526.9207, |
|
"eval_samples_per_second": 5.521, |
|
"eval_steps_per_second": 0.173, |
|
"learning_rate": 0.001, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 40.29304029304029, |
|
"grad_norm": 0.16056223213672638, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1527, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.27157098659333107, |
|
"eval_f1_macro": 0.6708412782877394, |
|
"eval_f1_micro": 0.795353889863792, |
|
"eval_loss": 0.1416281908750534, |
|
"eval_roc_auc": 0.8608108400991562, |
|
"eval_runtime": 520.8949, |
|
"eval_samples_per_second": 5.585, |
|
"eval_steps_per_second": 0.175, |
|
"learning_rate": 0.0001, |
|
"step": 11193 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.2811962873839807, |
|
"eval_f1_macro": 0.6820172839356666, |
|
"eval_f1_micro": 0.8014906832298136, |
|
"eval_loss": 0.13480685651302338, |
|
"eval_roc_auc": 0.8666871757284966, |
|
"eval_runtime": 521.536, |
|
"eval_samples_per_second": 5.578, |
|
"eval_steps_per_second": 0.174, |
|
"learning_rate": 0.0001, |
|
"step": 11466 |
|
}, |
|
{ |
|
"epoch": 42.124542124542124, |
|
"grad_norm": 0.13122691214084625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1455, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.2756961155036095, |
|
"eval_f1_macro": 0.681931169239128, |
|
"eval_f1_micro": 0.8014919187733112, |
|
"eval_loss": 0.1342025101184845, |
|
"eval_roc_auc": 0.8664528250448902, |
|
"eval_runtime": 522.5262, |
|
"eval_samples_per_second": 5.567, |
|
"eval_steps_per_second": 0.174, |
|
"learning_rate": 0.0001, |
|
"step": 11739 |
|
}, |
|
{ |
|
"epoch": 43.956043956043956, |
|
"grad_norm": 0.15952740609645844, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1416, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.2811962873839807, |
|
"eval_f1_macro": 0.683693351140427, |
|
"eval_f1_micro": 0.8019789631231031, |
|
"eval_loss": 0.1327475756406784, |
|
"eval_roc_auc": 0.8658142028998129, |
|
"eval_runtime": 518.6048, |
|
"eval_samples_per_second": 5.609, |
|
"eval_steps_per_second": 0.175, |
|
"learning_rate": 0.0001, |
|
"step": 12012 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.2811962873839807, |
|
"eval_f1_macro": 0.6900135704395078, |
|
"eval_f1_micro": 0.8049446006284108, |
|
"eval_loss": 0.1318245828151703, |
|
"eval_roc_auc": 0.8690323816564128, |
|
"eval_runtime": 519.4737, |
|
"eval_samples_per_second": 5.6, |
|
"eval_steps_per_second": 0.175, |
|
"learning_rate": 0.0001, |
|
"step": 12285 |
|
}, |
|
{ |
|
"epoch": 45.78754578754579, |
|
"grad_norm": 0.18243736028671265, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1402, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.28910278446201443, |
|
"eval_f1_macro": 0.6920134474185277, |
|
"eval_f1_micro": 0.8063969585520062, |
|
"eval_loss": 0.13027183711528778, |
|
"eval_roc_auc": 0.869961249113055, |
|
"eval_runtime": 520.8317, |
|
"eval_samples_per_second": 5.585, |
|
"eval_steps_per_second": 0.175, |
|
"learning_rate": 0.0001, |
|
"step": 12558 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.284977655551736, |
|
"eval_f1_macro": 0.6938459582689339, |
|
"eval_f1_micro": 0.8065087538619978, |
|
"eval_loss": 0.12985946238040924, |
|
"eval_roc_auc": 0.8709245066726377, |
|
"eval_runtime": 520.349, |
|
"eval_samples_per_second": 5.59, |
|
"eval_steps_per_second": 0.175, |
|
"learning_rate": 0.0001, |
|
"step": 12831 |
|
}, |
|
{ |
|
"epoch": 47.61904761904762, |
|
"grad_norm": 0.20866894721984863, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1387, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.2853214162942592, |
|
"eval_f1_macro": 0.6917397436201066, |
|
"eval_f1_micro": 0.8031727379553465, |
|
"eval_loss": 0.12981055676937103, |
|
"eval_roc_auc": 0.8638141011256728, |
|
"eval_runtime": 515.7048, |
|
"eval_samples_per_second": 5.641, |
|
"eval_steps_per_second": 0.176, |
|
"learning_rate": 0.0001, |
|
"step": 13104 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.2839463733241664, |
|
"eval_f1_macro": 0.6980761423122126, |
|
"eval_f1_micro": 0.8081048867699644, |
|
"eval_loss": 0.1301460713148117, |
|
"eval_roc_auc": 0.8724715693697419, |
|
"eval_runtime": 519.0148, |
|
"eval_samples_per_second": 5.605, |
|
"eval_steps_per_second": 0.175, |
|
"learning_rate": 0.0001, |
|
"step": 13377 |
|
}, |
|
{ |
|
"epoch": 49.45054945054945, |
|
"grad_norm": 0.16819314658641815, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1391, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.2829150910965968, |
|
"eval_f1_macro": 0.6968263757426811, |
|
"eval_f1_micro": 0.8056895691232739, |
|
"eval_loss": 0.1294524371623993, |
|
"eval_roc_auc": 0.8677359534045406, |
|
"eval_runtime": 516.1382, |
|
"eval_samples_per_second": 5.636, |
|
"eval_steps_per_second": 0.176, |
|
"learning_rate": 0.0001, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.2846338948092128, |
|
"eval_f1_macro": 0.6981227572539419, |
|
"eval_f1_micro": 0.8078541374474054, |
|
"eval_loss": 0.12989668548107147, |
|
"eval_roc_auc": 0.8712741682837021, |
|
"eval_runtime": 519.4542, |
|
"eval_samples_per_second": 5.6, |
|
"eval_steps_per_second": 0.175, |
|
"learning_rate": 0.0001, |
|
"step": 13923 |
|
}, |
|
{ |
|
"epoch": 51.282051282051285, |
|
"grad_norm": 0.2057354748249054, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1374, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.284977655551736, |
|
"eval_f1_macro": 0.7032059573412642, |
|
"eval_f1_micro": 0.809621541745341, |
|
"eval_loss": 0.13097986578941345, |
|
"eval_roc_auc": 0.8736802406111642, |
|
"eval_runtime": 517.4492, |
|
"eval_samples_per_second": 5.622, |
|
"eval_steps_per_second": 0.176, |
|
"learning_rate": 0.0001, |
|
"step": 14196 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.288415262976968, |
|
"eval_f1_macro": 0.6952081515364695, |
|
"eval_f1_micro": 0.8082875892525485, |
|
"eval_loss": 0.12910524010658264, |
|
"eval_roc_auc": 0.8713581964654615, |
|
"eval_runtime": 515.3537, |
|
"eval_samples_per_second": 5.645, |
|
"eval_steps_per_second": 0.177, |
|
"learning_rate": 0.0001, |
|
"step": 14469 |
|
}, |
|
{ |
|
"epoch": 53.11355311355312, |
|
"grad_norm": 0.19681566953659058, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1367, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.2860089377793056, |
|
"eval_f1_macro": 0.6914506394370794, |
|
"eval_f1_micro": 0.8055729885778838, |
|
"eval_loss": 0.1276824176311493, |
|
"eval_roc_auc": 0.8659785304346413, |
|
"eval_runtime": 517.0408, |
|
"eval_samples_per_second": 5.626, |
|
"eval_steps_per_second": 0.176, |
|
"learning_rate": 0.0001, |
|
"step": 14742 |
|
}, |
|
{ |
|
"epoch": 54.94505494505494, |
|
"grad_norm": 0.21626819670200348, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1364, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.28979030594706084, |
|
"eval_f1_macro": 0.7051415507931676, |
|
"eval_f1_micro": 0.8091508143727464, |
|
"eval_loss": 0.12751279771327972, |
|
"eval_roc_auc": 0.8706409740591669, |
|
"eval_runtime": 516.7044, |
|
"eval_samples_per_second": 5.63, |
|
"eval_steps_per_second": 0.176, |
|
"learning_rate": 0.0001, |
|
"step": 15015 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.2911653489171537, |
|
"eval_f1_macro": 0.6990943862949641, |
|
"eval_f1_micro": 0.8077718065316246, |
|
"eval_loss": 0.12798655033111572, |
|
"eval_roc_auc": 0.8706103127849166, |
|
"eval_runtime": 520.6769, |
|
"eval_samples_per_second": 5.587, |
|
"eval_steps_per_second": 0.175, |
|
"learning_rate": 0.0001, |
|
"step": 15288 |
|
}, |
|
{ |
|
"epoch": 56.776556776556774, |
|
"grad_norm": 0.2569683790206909, |
|
"learning_rate": 0.0001, |
|
"loss": 0.135, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.29150910965967686, |
|
"eval_f1_macro": 0.7001268142729874, |
|
"eval_f1_micro": 0.8107930240210597, |
|
"eval_loss": 0.1279618740081787, |
|
"eval_roc_auc": 0.8737307073185808, |
|
"eval_runtime": 522.0888, |
|
"eval_samples_per_second": 5.572, |
|
"eval_steps_per_second": 0.174, |
|
"learning_rate": 0.0001, |
|
"step": 15561 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.290134066689584, |
|
"eval_f1_macro": 0.7039327958876614, |
|
"eval_f1_micro": 0.8108946874106743, |
|
"eval_loss": 0.1280883550643921, |
|
"eval_roc_auc": 0.8759047076910806, |
|
"eval_runtime": 524.5592, |
|
"eval_samples_per_second": 5.546, |
|
"eval_steps_per_second": 0.173, |
|
"learning_rate": 0.0001, |
|
"step": 15834 |
|
}, |
|
{ |
|
"epoch": 58.608058608058606, |
|
"grad_norm": 0.209602490067482, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1345, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.2873839807493984, |
|
"eval_f1_macro": 0.699653006099352, |
|
"eval_f1_micro": 0.8071845383437488, |
|
"eval_loss": 0.1287168562412262, |
|
"eval_roc_auc": 0.8699666770335968, |
|
"eval_runtime": 523.8253, |
|
"eval_samples_per_second": 5.553, |
|
"eval_steps_per_second": 0.174, |
|
"learning_rate": 0.0001, |
|
"step": 16107 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.28875902371949125, |
|
"eval_f1_macro": 0.7042073996338176, |
|
"eval_f1_micro": 0.8103491168421926, |
|
"eval_loss": 0.1270500272512436, |
|
"eval_roc_auc": 0.8711666058400855, |
|
"eval_runtime": 535.2866, |
|
"eval_samples_per_second": 5.434, |
|
"eval_steps_per_second": 0.17, |
|
"learning_rate": 0.0001, |
|
"step": 16380 |
|
}, |
|
{ |
|
"epoch": 60.43956043956044, |
|
"grad_norm": 0.2388932704925537, |
|
"learning_rate": 0.0001, |
|
"loss": 0.134, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.28944654520453766, |
|
"eval_f1_macro": 0.6994480698947442, |
|
"eval_f1_micro": 0.8072888368788399, |
|
"eval_loss": 0.1269637793302536, |
|
"eval_roc_auc": 0.8672195202075096, |
|
"eval_runtime": 529.882, |
|
"eval_samples_per_second": 5.49, |
|
"eval_steps_per_second": 0.172, |
|
"learning_rate": 0.0001, |
|
"step": 16653 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.28979030594706084, |
|
"eval_f1_macro": 0.7105518005302388, |
|
"eval_f1_micro": 0.8124407826982492, |
|
"eval_loss": 0.12639474868774414, |
|
"eval_roc_auc": 0.8741971299571496, |
|
"eval_runtime": 525.1851, |
|
"eval_samples_per_second": 5.539, |
|
"eval_steps_per_second": 0.173, |
|
"learning_rate": 0.0001, |
|
"step": 16926 |
|
}, |
|
{ |
|
"epoch": 62.27106227106227, |
|
"grad_norm": 0.2082633525133133, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1331, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.2918528704022001, |
|
"eval_f1_macro": 0.7042257858113937, |
|
"eval_f1_micro": 0.8093336660843524, |
|
"eval_loss": 0.12643341720104218, |
|
"eval_roc_auc": 0.8697033641675914, |
|
"eval_runtime": 525.5019, |
|
"eval_samples_per_second": 5.536, |
|
"eval_steps_per_second": 0.173, |
|
"learning_rate": 0.0001, |
|
"step": 17199 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.2918528704022001, |
|
"eval_f1_macro": 0.7054117610081568, |
|
"eval_f1_micro": 0.8119739624362535, |
|
"eval_loss": 0.12570597231388092, |
|
"eval_roc_auc": 0.8720692041572721, |
|
"eval_runtime": 522.3213, |
|
"eval_samples_per_second": 5.569, |
|
"eval_steps_per_second": 0.174, |
|
"learning_rate": 0.0001, |
|
"step": 17472 |
|
}, |
|
{ |
|
"epoch": 64.1025641025641, |
|
"grad_norm": 0.25887489318847656, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1327, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.29322791337229287, |
|
"eval_f1_macro": 0.7040599127700347, |
|
"eval_f1_micro": 0.8103770839396333, |
|
"eval_loss": 0.12599390745162964, |
|
"eval_roc_auc": 0.8704049661183646, |
|
"eval_runtime": 523.7812, |
|
"eval_samples_per_second": 5.554, |
|
"eval_steps_per_second": 0.174, |
|
"learning_rate": 0.0001, |
|
"step": 17745 |
|
}, |
|
{ |
|
"epoch": 65.93406593406593, |
|
"grad_norm": 0.2609516382217407, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1319, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.29769680302509455, |
|
"eval_f1_macro": 0.7083351143800681, |
|
"eval_f1_micro": 0.8141795311606633, |
|
"eval_loss": 0.12674611806869507, |
|
"eval_roc_auc": 0.8776967697557255, |
|
"eval_runtime": 525.4493, |
|
"eval_samples_per_second": 5.536, |
|
"eval_steps_per_second": 0.173, |
|
"learning_rate": 0.0001, |
|
"step": 18018 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.28979030594706084, |
|
"eval_f1_macro": 0.6998024530144022, |
|
"eval_f1_micro": 0.8090950582963362, |
|
"eval_loss": 0.12676431238651276, |
|
"eval_roc_auc": 0.8702765206211787, |
|
"eval_runtime": 528.6841, |
|
"eval_samples_per_second": 5.502, |
|
"eval_steps_per_second": 0.172, |
|
"learning_rate": 0.0001, |
|
"step": 18291 |
|
}, |
|
{ |
|
"epoch": 67.76556776556777, |
|
"grad_norm": 0.31347450613975525, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1319, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.2928841526297697, |
|
"eval_f1_macro": 0.7034736625177254, |
|
"eval_f1_micro": 0.8127327032445482, |
|
"eval_loss": 0.12638631463050842, |
|
"eval_roc_auc": 0.8763513248964829, |
|
"eval_runtime": 524.2249, |
|
"eval_samples_per_second": 5.549, |
|
"eval_steps_per_second": 0.174, |
|
"learning_rate": 0.0001, |
|
"step": 18564 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.2952904778274321, |
|
"eval_f1_macro": 0.7078892431331377, |
|
"eval_f1_micro": 0.8131967584022379, |
|
"eval_loss": 0.12608103454113007, |
|
"eval_roc_auc": 0.8750227852703141, |
|
"eval_runtime": 527.0116, |
|
"eval_samples_per_second": 5.52, |
|
"eval_steps_per_second": 0.173, |
|
"learning_rate": 0.0001, |
|
"step": 18837 |
|
}, |
|
{ |
|
"epoch": 69.59706959706959, |
|
"grad_norm": 0.24166111648082733, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1308, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.29150910965967686, |
|
"eval_f1_macro": 0.7081157868651535, |
|
"eval_f1_micro": 0.8136722606120435, |
|
"eval_loss": 0.12582050263881683, |
|
"eval_roc_auc": 0.875321565859663, |
|
"eval_runtime": 523.4787, |
|
"eval_samples_per_second": 5.557, |
|
"eval_steps_per_second": 0.174, |
|
"learning_rate": 0.0001, |
|
"step": 19110 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.2918528704022001, |
|
"eval_f1_macro": 0.7044517956080781, |
|
"eval_f1_micro": 0.8123295595405339, |
|
"eval_loss": 0.12533149123191833, |
|
"eval_roc_auc": 0.8732599343798731, |
|
"eval_runtime": 528.0987, |
|
"eval_samples_per_second": 5.508, |
|
"eval_steps_per_second": 0.172, |
|
"learning_rate": 1e-05, |
|
"step": 19383 |
|
}, |
|
{ |
|
"epoch": 71.42857142857143, |
|
"grad_norm": 0.2781914174556732, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1294, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.2966655207975249, |
|
"eval_f1_macro": 0.7099295458861072, |
|
"eval_f1_micro": 0.8159506713723581, |
|
"eval_loss": 0.1258901059627533, |
|
"eval_roc_auc": 0.8806755842224759, |
|
"eval_runtime": 528.1564, |
|
"eval_samples_per_second": 5.508, |
|
"eval_steps_per_second": 0.172, |
|
"learning_rate": 1e-05, |
|
"step": 19656 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.2949467170849089, |
|
"eval_f1_macro": 0.7116557450872655, |
|
"eval_f1_micro": 0.8159496670343587, |
|
"eval_loss": 0.12526649236679077, |
|
"eval_roc_auc": 0.8785730989334561, |
|
"eval_runtime": 529.1223, |
|
"eval_samples_per_second": 5.498, |
|
"eval_steps_per_second": 0.172, |
|
"learning_rate": 1e-05, |
|
"step": 19929 |
|
}, |
|
{ |
|
"epoch": 73.26007326007326, |
|
"grad_norm": 0.278796911239624, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1287, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.29769680302509455, |
|
"eval_f1_macro": 0.7159515864206437, |
|
"eval_f1_micro": 0.8156100747030249, |
|
"eval_loss": 0.12490212172269821, |
|
"eval_roc_auc": 0.8786256372123677, |
|
"eval_runtime": 527.6257, |
|
"eval_samples_per_second": 5.513, |
|
"eval_steps_per_second": 0.172, |
|
"learning_rate": 1e-05, |
|
"step": 20202 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.2966655207975249, |
|
"eval_f1_macro": 0.7082306828309269, |
|
"eval_f1_micro": 0.8135426082669078, |
|
"eval_loss": 0.12504002451896667, |
|
"eval_roc_auc": 0.8755625874067082, |
|
"eval_runtime": 534.3244, |
|
"eval_samples_per_second": 5.444, |
|
"eval_steps_per_second": 0.17, |
|
"learning_rate": 1e-05, |
|
"step": 20475 |
|
}, |
|
{ |
|
"epoch": 75.0915750915751, |
|
"grad_norm": 0.27821090817451477, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1282, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.2966655207975249, |
|
"eval_f1_macro": 0.6998917153140419, |
|
"eval_f1_micro": 0.8099675513769865, |
|
"eval_loss": 0.12634462118148804, |
|
"eval_roc_auc": 0.8700019375997908, |
|
"eval_runtime": 530.2495, |
|
"eval_samples_per_second": 5.486, |
|
"eval_steps_per_second": 0.172, |
|
"learning_rate": 1e-05, |
|
"step": 20748 |
|
}, |
|
{ |
|
"epoch": 76.92307692307692, |
|
"grad_norm": 0.2594936192035675, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1285, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.2966655207975249, |
|
"eval_f1_macro": 0.7104044870773909, |
|
"eval_f1_micro": 0.8142915811088296, |
|
"eval_loss": 0.1249643936753273, |
|
"eval_roc_auc": 0.8761213663476708, |
|
"eval_runtime": 520.8557, |
|
"eval_samples_per_second": 5.585, |
|
"eval_steps_per_second": 0.175, |
|
"learning_rate": 1e-05, |
|
"step": 21021 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.2939154348573393, |
|
"eval_f1_macro": 0.7076718539561497, |
|
"eval_f1_micro": 0.812339968613199, |
|
"eval_loss": 0.12509745359420776, |
|
"eval_roc_auc": 0.8734096354102718, |
|
"eval_runtime": 523.1264, |
|
"eval_samples_per_second": 5.561, |
|
"eval_steps_per_second": 0.174, |
|
"learning_rate": 1e-05, |
|
"step": 21294 |
|
}, |
|
{ |
|
"epoch": 78.75457875457876, |
|
"grad_norm": 0.24959908425807953, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1281, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.29838432451014096, |
|
"eval_f1_macro": 0.7097766100728804, |
|
"eval_f1_micro": 0.8147326016360423, |
|
"eval_loss": 0.12465520948171616, |
|
"eval_roc_auc": 0.876088219187627, |
|
"eval_runtime": 536.4997, |
|
"eval_samples_per_second": 5.422, |
|
"eval_steps_per_second": 0.17, |
|
"learning_rate": 1e-05, |
|
"step": 21567 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.2990718459951873, |
|
"eval_f1_macro": 0.7133791911991404, |
|
"eval_f1_micro": 0.8166140393490405, |
|
"eval_loss": 0.12526248395442963, |
|
"eval_roc_auc": 0.8816640751357672, |
|
"eval_runtime": 530.0362, |
|
"eval_samples_per_second": 5.488, |
|
"eval_steps_per_second": 0.172, |
|
"learning_rate": 1e-05, |
|
"step": 21840 |
|
}, |
|
{ |
|
"epoch": 80.58608058608058, |
|
"grad_norm": 0.2447408139705658, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1281, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_accuracy": 0.2952904778274321, |
|
"eval_f1_macro": 0.705898272950067, |
|
"eval_f1_micro": 0.8121923983622152, |
|
"eval_loss": 0.12510864436626434, |
|
"eval_roc_auc": 0.8729086155116128, |
|
"eval_runtime": 529.107, |
|
"eval_samples_per_second": 5.498, |
|
"eval_steps_per_second": 0.172, |
|
"learning_rate": 1e-05, |
|
"step": 22113 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.29975936748023374, |
|
"eval_f1_macro": 0.7095032932540235, |
|
"eval_f1_micro": 0.8150326797385622, |
|
"eval_loss": 0.12532733380794525, |
|
"eval_roc_auc": 0.8780991768028508, |
|
"eval_runtime": 520.4428, |
|
"eval_samples_per_second": 5.589, |
|
"eval_steps_per_second": 0.175, |
|
"learning_rate": 1e-05, |
|
"step": 22386 |
|
}, |
|
{ |
|
"epoch": 82.41758241758242, |
|
"grad_norm": 0.29986944794654846, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1269, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_accuracy": 0.29597799931247853, |
|
"eval_f1_macro": 0.7124383950303705, |
|
"eval_f1_micro": 0.815855206584497, |
|
"eval_loss": 0.12474868446588516, |
|
"eval_roc_auc": 0.87784419205748, |
|
"eval_runtime": 521.522, |
|
"eval_samples_per_second": 5.578, |
|
"eval_steps_per_second": 0.174, |
|
"learning_rate": 1e-05, |
|
"step": 22659 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.3007906497078034, |
|
"eval_f1_macro": 0.7138847615465347, |
|
"eval_f1_micro": 0.8175330467926365, |
|
"eval_loss": 0.12511858344078064, |
|
"eval_roc_auc": 0.8816953578814657, |
|
"eval_runtime": 527.0352, |
|
"eval_samples_per_second": 5.52, |
|
"eval_steps_per_second": 0.173, |
|
"learning_rate": 1e-05, |
|
"step": 22932 |
|
}, |
|
{ |
|
"epoch": 84.24908424908425, |
|
"grad_norm": 0.23814542591571808, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1267, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_accuracy": 0.2966655207975249, |
|
"eval_f1_macro": 0.7054571621251418, |
|
"eval_f1_micro": 0.8132141082960754, |
|
"eval_loss": 0.12457013875246048, |
|
"eval_roc_auc": 0.8731936400597855, |
|
"eval_runtime": 570.3692, |
|
"eval_samples_per_second": 5.1, |
|
"eval_steps_per_second": 0.16, |
|
"learning_rate": 1e-05, |
|
"step": 23205 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.2946029563423857, |
|
"eval_f1_macro": 0.7142702846379808, |
|
"eval_f1_micro": 0.8143732269868025, |
|
"eval_loss": 0.1251869946718216, |
|
"eval_roc_auc": 0.875841355405741, |
|
"eval_runtime": 518.8454, |
|
"eval_samples_per_second": 5.607, |
|
"eval_steps_per_second": 0.175, |
|
"learning_rate": 1e-05, |
|
"step": 23478 |
|
}, |
|
{ |
|
"epoch": 86.08058608058609, |
|
"grad_norm": 0.30401352047920227, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1274, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_accuracy": 0.2935716741148161, |
|
"eval_f1_macro": 0.7081357577756824, |
|
"eval_f1_micro": 0.8135328455150868, |
|
"eval_loss": 0.12492978572845459, |
|
"eval_roc_auc": 0.8754128863763097, |
|
"eval_runtime": 516.882, |
|
"eval_samples_per_second": 5.628, |
|
"eval_steps_per_second": 0.176, |
|
"learning_rate": 1e-05, |
|
"step": 23751 |
|
}, |
|
{ |
|
"epoch": 87.91208791208791, |
|
"grad_norm": 0.25764307379722595, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1263, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.2990718459951873, |
|
"eval_f1_macro": 0.7099379006276698, |
|
"eval_f1_micro": 0.815831263487927, |
|
"eval_loss": 0.12513719499111176, |
|
"eval_roc_auc": 0.8794975742553038, |
|
"eval_runtime": 519.5458, |
|
"eval_samples_per_second": 5.599, |
|
"eval_steps_per_second": 0.175, |
|
"learning_rate": 1e-05, |
|
"step": 24024 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_accuracy": 0.2963217600550017, |
|
"eval_f1_macro": 0.7092910188720426, |
|
"eval_f1_micro": 0.8143914473684211, |
|
"eval_loss": 0.12514576315879822, |
|
"eval_roc_auc": 0.8758152934234348, |
|
"eval_runtime": 518.776, |
|
"eval_samples_per_second": 5.607, |
|
"eval_steps_per_second": 0.175, |
|
"learning_rate": 1e-05, |
|
"step": 24297 |
|
}, |
|
{ |
|
"epoch": 89.74358974358974, |
|
"grad_norm": 0.25485533475875854, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1272, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 0.2942591955998625, |
|
"eval_f1_macro": 0.7121664381657501, |
|
"eval_f1_micro": 0.8134516195584898, |
|
"eval_loss": 0.1244530975818634, |
|
"eval_roc_auc": 0.8742738658040219, |
|
"eval_runtime": 522.4184, |
|
"eval_samples_per_second": 5.568, |
|
"eval_steps_per_second": 0.174, |
|
"learning_rate": 1e-05, |
|
"step": 24570 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_accuracy": 0.2990718459951873, |
|
"eval_f1_macro": 0.7106178930468596, |
|
"eval_f1_micro": 0.8153902768123646, |
|
"eval_loss": 0.12501013278961182, |
|
"eval_roc_auc": 0.8780205765416332, |
|
"eval_runtime": 521.4209, |
|
"eval_samples_per_second": 5.579, |
|
"eval_steps_per_second": 0.175, |
|
"learning_rate": 1e-05, |
|
"step": 24843 |
|
}, |
|
{ |
|
"epoch": 91.57509157509158, |
|
"grad_norm": 0.24377481639385223, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1275, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.2973530422825713, |
|
"eval_f1_macro": 0.7140173113811211, |
|
"eval_f1_micro": 0.8163049232398094, |
|
"eval_loss": 0.12525025010108948, |
|
"eval_roc_auc": 0.8797377671737511, |
|
"eval_runtime": 518.7634, |
|
"eval_samples_per_second": 5.608, |
|
"eval_steps_per_second": 0.175, |
|
"learning_rate": 1e-05, |
|
"step": 25116 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_accuracy": 0.29872808525266414, |
|
"eval_f1_macro": 0.7129019083206937, |
|
"eval_f1_micro": 0.8148661314641998, |
|
"eval_loss": 0.12471849471330643, |
|
"eval_roc_auc": 0.8787177863576121, |
|
"eval_runtime": 519.6644, |
|
"eval_samples_per_second": 5.598, |
|
"eval_steps_per_second": 0.175, |
|
"learning_rate": 1e-05, |
|
"step": 25389 |
|
}, |
|
{ |
|
"epoch": 93.4065934065934, |
|
"grad_norm": 0.2000974863767624, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1257, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_accuracy": 0.2980405637676177, |
|
"eval_f1_macro": 0.7053935701419592, |
|
"eval_f1_micro": 0.8141884924726748, |
|
"eval_loss": 0.12515641748905182, |
|
"eval_roc_auc": 0.8748001190563892, |
|
"eval_runtime": 516.6712, |
|
"eval_samples_per_second": 5.63, |
|
"eval_steps_per_second": 0.176, |
|
"learning_rate": 1e-05, |
|
"step": 25662 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_accuracy": 0.30147817119284975, |
|
"eval_f1_macro": 0.7134995447430972, |
|
"eval_f1_micro": 0.8165906870726147, |
|
"eval_loss": 0.12481416761875153, |
|
"eval_roc_auc": 0.8800302912139601, |
|
"eval_runtime": 518.3317, |
|
"eval_samples_per_second": 5.612, |
|
"eval_steps_per_second": 0.176, |
|
"learning_rate": 1e-05, |
|
"step": 25935 |
|
}, |
|
{ |
|
"epoch": 95.23809523809524, |
|
"grad_norm": 0.284708708524704, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1271, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.2980405637676177, |
|
"eval_f1_macro": 0.7110442004495683, |
|
"eval_f1_micro": 0.8160666176830762, |
|
"eval_loss": 0.12492986023426056, |
|
"eval_roc_auc": 0.8788918921131211, |
|
"eval_runtime": 518.2372, |
|
"eval_samples_per_second": 5.613, |
|
"eval_steps_per_second": 0.176, |
|
"learning_rate": 1e-05, |
|
"step": 26208 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_accuracy": 0.30147817119284975, |
|
"eval_f1_macro": 0.7158597011246477, |
|
"eval_f1_micro": 0.8168590473093806, |
|
"eval_loss": 0.12459924072027206, |
|
"eval_roc_auc": 0.8806157536086776, |
|
"eval_runtime": 516.8314, |
|
"eval_samples_per_second": 5.629, |
|
"eval_steps_per_second": 0.176, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 26481 |
|
}, |
|
{ |
|
"epoch": 97.06959706959707, |
|
"grad_norm": 0.2898052930831909, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.1272, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_accuracy": 0.29975936748023374, |
|
"eval_f1_macro": 0.707122866121441, |
|
"eval_f1_micro": 0.8149457415323906, |
|
"eval_loss": 0.12447398155927658, |
|
"eval_roc_auc": 0.8762181147848592, |
|
"eval_runtime": 516.1798, |
|
"eval_samples_per_second": 5.636, |
|
"eval_steps_per_second": 0.176, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 26754 |
|
}, |
|
{ |
|
"epoch": 98.9010989010989, |
|
"grad_norm": 0.2835540473461151, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.126, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_accuracy": 0.30216569267789617, |
|
"eval_f1_macro": 0.7182970295785608, |
|
"eval_f1_micro": 0.8165748111859562, |
|
"eval_loss": 0.12462905794382095, |
|
"eval_roc_auc": 0.8791385487378576, |
|
"eval_runtime": 525.6968, |
|
"eval_samples_per_second": 5.534, |
|
"eval_steps_per_second": 0.173, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 27027 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.30147817119284975, |
|
"eval_f1_macro": 0.7136275002413193, |
|
"eval_f1_micro": 0.8161644284310514, |
|
"eval_loss": 0.12463195621967316, |
|
"eval_roc_auc": 0.8780520664444814, |
|
"eval_runtime": 523.9869, |
|
"eval_samples_per_second": 5.552, |
|
"eval_steps_per_second": 0.174, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 27300, |
|
"total_flos": 1.2912305794345248e+21, |
|
"train_loss": 0.1443542043106023, |
|
"train_runtime": 217230.0758, |
|
"train_samples_per_second": 6.013, |
|
"train_steps_per_second": 0.189 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 40950, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 150, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 10, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.2912305794345248e+21, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|