|
{ |
|
"best_metric": 0.23673731088638306, |
|
"best_model_checkpoint": "/mnt/disk_victorlebos/data/datarmor/models/dinov2-large-2024_05_23-drone_batch-size512_epochs50_freeze/checkpoint-1400", |
|
"epoch": 50.0, |
|
"eval_steps": 500, |
|
"global_step": 1400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.012405938580435224, |
|
"eval_f1_macro": 0.40667627783698285, |
|
"eval_f1_micro": 0.5738973203699311, |
|
"eval_loss": 0.5951732397079468, |
|
"eval_roc_auc": 0.752833094536879, |
|
"eval_runtime": 32.6366, |
|
"eval_samples_per_second": 150.659, |
|
"eval_steps_per_second": 0.306, |
|
"learning_rate": 0.001, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.06975798250966037, |
|
"eval_f1_macro": 0.4367882492755507, |
|
"eval_f1_micro": 0.7307120964254151, |
|
"eval_loss": 0.4730209410190582, |
|
"eval_roc_auc": 0.8401493620198748, |
|
"eval_runtime": 32.801, |
|
"eval_samples_per_second": 149.904, |
|
"eval_steps_per_second": 0.305, |
|
"learning_rate": 0.001, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.10738255033557047, |
|
"eval_f1_macro": 0.37702459211637257, |
|
"eval_f1_micro": 0.7498981835953409, |
|
"eval_loss": 0.3240152895450592, |
|
"eval_roc_auc": 0.8377639463906806, |
|
"eval_runtime": 33.9297, |
|
"eval_samples_per_second": 144.917, |
|
"eval_steps_per_second": 0.295, |
|
"learning_rate": 0.001, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.11795810453528574, |
|
"eval_f1_macro": 0.3710481900670742, |
|
"eval_f1_micro": 0.7521195160095482, |
|
"eval_loss": 0.2770342230796814, |
|
"eval_roc_auc": 0.8371968879000634, |
|
"eval_runtime": 33.1578, |
|
"eval_samples_per_second": 148.291, |
|
"eval_steps_per_second": 0.302, |
|
"learning_rate": 0.001, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.11958511287370348, |
|
"eval_f1_macro": 0.3714736793659693, |
|
"eval_f1_micro": 0.7507292550220328, |
|
"eval_loss": 0.25879302620887756, |
|
"eval_roc_auc": 0.8353003094576528, |
|
"eval_runtime": 33.1064, |
|
"eval_samples_per_second": 148.521, |
|
"eval_steps_per_second": 0.302, |
|
"learning_rate": 0.001, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.12182224933902787, |
|
"eval_f1_macro": 0.36304822534346387, |
|
"eval_f1_micro": 0.7520252586099456, |
|
"eval_loss": 0.25328728556632996, |
|
"eval_roc_auc": 0.8353620986749458, |
|
"eval_runtime": 33.4017, |
|
"eval_samples_per_second": 147.208, |
|
"eval_steps_per_second": 0.299, |
|
"learning_rate": 0.001, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.11531421598535692, |
|
"eval_f1_macro": 0.3646331511607325, |
|
"eval_f1_micro": 0.7517183920016662, |
|
"eval_loss": 0.25132349133491516, |
|
"eval_roc_auc": 0.834652371152471, |
|
"eval_runtime": 33.338, |
|
"eval_samples_per_second": 147.489, |
|
"eval_steps_per_second": 0.3, |
|
"learning_rate": 0.001, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.12283912955053895, |
|
"eval_f1_macro": 0.38940077262215617, |
|
"eval_f1_micro": 0.7576399892988702, |
|
"eval_loss": 0.2507544159889221, |
|
"eval_roc_auc": 0.8407135898239845, |
|
"eval_runtime": 33.4587, |
|
"eval_samples_per_second": 146.957, |
|
"eval_steps_per_second": 0.299, |
|
"learning_rate": 0.001, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.12751677852348994, |
|
"eval_f1_macro": 0.38290945223752887, |
|
"eval_f1_micro": 0.7549859932265752, |
|
"eval_loss": 0.24785615503787994, |
|
"eval_roc_auc": 0.8360210323976333, |
|
"eval_runtime": 34.4389, |
|
"eval_samples_per_second": 142.775, |
|
"eval_steps_per_second": 0.29, |
|
"learning_rate": 0.001, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.12649989831197886, |
|
"eval_f1_macro": 0.37973264961121395, |
|
"eval_f1_micro": 0.7583163191651716, |
|
"eval_loss": 0.2480766475200653, |
|
"eval_roc_auc": 0.8407307602236042, |
|
"eval_runtime": 34.0205, |
|
"eval_samples_per_second": 144.531, |
|
"eval_steps_per_second": 0.294, |
|
"learning_rate": 0.001, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.12426276184665447, |
|
"eval_f1_macro": 0.3964288145693209, |
|
"eval_f1_micro": 0.7600958878849345, |
|
"eval_loss": 0.24667006731033325, |
|
"eval_roc_auc": 0.8431424613547966, |
|
"eval_runtime": 34.2677, |
|
"eval_samples_per_second": 143.488, |
|
"eval_steps_per_second": 0.292, |
|
"learning_rate": 0.001, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.12507626601586333, |
|
"eval_f1_macro": 0.3733203958034908, |
|
"eval_f1_micro": 0.7564640698455339, |
|
"eval_loss": 0.2459569126367569, |
|
"eval_roc_auc": 0.836213120741293, |
|
"eval_runtime": 34.8153, |
|
"eval_samples_per_second": 141.231, |
|
"eval_steps_per_second": 0.287, |
|
"learning_rate": 0.001, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.1297539149888143, |
|
"eval_f1_macro": 0.38618999344086086, |
|
"eval_f1_micro": 0.7581923944769908, |
|
"eval_loss": 0.245611771941185, |
|
"eval_roc_auc": 0.839925579714772, |
|
"eval_runtime": 34.1156, |
|
"eval_samples_per_second": 144.128, |
|
"eval_steps_per_second": 0.293, |
|
"learning_rate": 0.001, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.13707545251169412, |
|
"eval_f1_macro": 0.37084554766098704, |
|
"eval_f1_micro": 0.7526021832952525, |
|
"eval_loss": 0.24649737775325775, |
|
"eval_roc_auc": 0.8323046017083424, |
|
"eval_runtime": 34.0576, |
|
"eval_samples_per_second": 144.373, |
|
"eval_steps_per_second": 0.294, |
|
"learning_rate": 0.001, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.1271100264388855, |
|
"eval_f1_macro": 0.37953234953900117, |
|
"eval_f1_micro": 0.7540528606572888, |
|
"eval_loss": 0.24523988366127014, |
|
"eval_roc_auc": 0.8343517118764396, |
|
"eval_runtime": 34.2408, |
|
"eval_samples_per_second": 143.601, |
|
"eval_steps_per_second": 0.292, |
|
"learning_rate": 0.001, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.1293471629042099, |
|
"eval_f1_macro": 0.39042586476441543, |
|
"eval_f1_micro": 0.7597242635642867, |
|
"eval_loss": 0.24370642006397247, |
|
"eval_roc_auc": 0.8408989171069663, |
|
"eval_runtime": 34.1492, |
|
"eval_samples_per_second": 143.986, |
|
"eval_steps_per_second": 0.293, |
|
"learning_rate": 0.001, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.13158429936953428, |
|
"eval_f1_macro": 0.38542350135117487, |
|
"eval_f1_micro": 0.7525727259224682, |
|
"eval_loss": 0.24466517567634583, |
|
"eval_roc_auc": 0.8317419799054098, |
|
"eval_runtime": 33.8536, |
|
"eval_samples_per_second": 145.243, |
|
"eval_steps_per_second": 0.295, |
|
"learning_rate": 0.001, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 17.857142857142858, |
|
"grad_norm": 0.09434398263692856, |
|
"learning_rate": 0.001, |
|
"loss": 0.3126, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.133211307707952, |
|
"eval_f1_macro": 0.35783734462173733, |
|
"eval_f1_micro": 0.7534316217590239, |
|
"eval_loss": 0.24544650316238403, |
|
"eval_roc_auc": 0.8325846543647923, |
|
"eval_runtime": 33.8618, |
|
"eval_samples_per_second": 145.208, |
|
"eval_steps_per_second": 0.295, |
|
"learning_rate": 0.001, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.13239780353874314, |
|
"eval_f1_macro": 0.3694145346248099, |
|
"eval_f1_micro": 0.7568417082268136, |
|
"eval_loss": 0.2440878450870514, |
|
"eval_roc_auc": 0.8367437261179794, |
|
"eval_runtime": 33.9612, |
|
"eval_samples_per_second": 144.783, |
|
"eval_steps_per_second": 0.294, |
|
"learning_rate": 0.001, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.13605857230018303, |
|
"eval_f1_macro": 0.3768127127776539, |
|
"eval_f1_micro": 0.750895096799091, |
|
"eval_loss": 0.2453632354736328, |
|
"eval_roc_auc": 0.8288096279784166, |
|
"eval_runtime": 33.6713, |
|
"eval_samples_per_second": 146.03, |
|
"eval_steps_per_second": 0.297, |
|
"learning_rate": 0.001, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.12487288997356112, |
|
"eval_f1_macro": 0.38961590782494593, |
|
"eval_f1_micro": 0.760243826841616, |
|
"eval_loss": 0.24377579987049103, |
|
"eval_roc_auc": 0.8415825201166579, |
|
"eval_runtime": 33.7544, |
|
"eval_samples_per_second": 145.67, |
|
"eval_steps_per_second": 0.296, |
|
"learning_rate": 0.001, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.13016066707341875, |
|
"eval_f1_macro": 0.3715634230883189, |
|
"eval_f1_micro": 0.7576183975637929, |
|
"eval_loss": 0.24192409217357635, |
|
"eval_roc_auc": 0.8368051515288276, |
|
"eval_runtime": 33.7095, |
|
"eval_samples_per_second": 145.864, |
|
"eval_steps_per_second": 0.297, |
|
"learning_rate": 0.001, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.12649989831197886, |
|
"eval_f1_macro": 0.3880375815747224, |
|
"eval_f1_micro": 0.7628996647313762, |
|
"eval_loss": 0.24348826706409454, |
|
"eval_roc_auc": 0.845384034822261, |
|
"eval_runtime": 33.9782, |
|
"eval_samples_per_second": 144.711, |
|
"eval_steps_per_second": 0.294, |
|
"learning_rate": 0.001, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.1342281879194631, |
|
"eval_f1_macro": 0.3896884130115941, |
|
"eval_f1_micro": 0.7561114991428027, |
|
"eval_loss": 0.2413305789232254, |
|
"eval_roc_auc": 0.8344032230752988, |
|
"eval_runtime": 34.4343, |
|
"eval_samples_per_second": 142.794, |
|
"eval_steps_per_second": 0.29, |
|
"learning_rate": 0.001, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.1297539149888143, |
|
"eval_f1_macro": 0.38267978517684004, |
|
"eval_f1_micro": 0.7599182173024102, |
|
"eval_loss": 0.24189460277557373, |
|
"eval_roc_auc": 0.8414795784567333, |
|
"eval_runtime": 34.3901, |
|
"eval_samples_per_second": 142.977, |
|
"eval_steps_per_second": 0.291, |
|
"learning_rate": 0.001, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.12670327435428105, |
|
"eval_f1_macro": 0.3971421437602147, |
|
"eval_f1_micro": 0.7593076827294236, |
|
"eval_loss": 0.2437727451324463, |
|
"eval_roc_auc": 0.8401496844846781, |
|
"eval_runtime": 34.2926, |
|
"eval_samples_per_second": 143.384, |
|
"eval_steps_per_second": 0.292, |
|
"learning_rate": 0.001, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.1309741712426276, |
|
"eval_f1_macro": 0.38383597863653807, |
|
"eval_f1_micro": 0.761437908496732, |
|
"eval_loss": 0.24182096123695374, |
|
"eval_roc_auc": 0.8421530853512137, |
|
"eval_runtime": 34.6404, |
|
"eval_samples_per_second": 141.944, |
|
"eval_steps_per_second": 0.289, |
|
"learning_rate": 0.001, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.13341468375025423, |
|
"eval_f1_macro": 0.3792682503180625, |
|
"eval_f1_micro": 0.7498440155769273, |
|
"eval_loss": 0.24316559731960297, |
|
"eval_roc_auc": 0.8275284660741716, |
|
"eval_runtime": 33.9704, |
|
"eval_samples_per_second": 144.743, |
|
"eval_steps_per_second": 0.294, |
|
"learning_rate": 0.001, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.1366687004270897, |
|
"eval_f1_macro": 0.39596794972011545, |
|
"eval_f1_micro": 0.7621594930458399, |
|
"eval_loss": 0.24201267957687378, |
|
"eval_roc_auc": 0.8436467097327637, |
|
"eval_runtime": 34.1508, |
|
"eval_samples_per_second": 143.979, |
|
"eval_steps_per_second": 0.293, |
|
"learning_rate": 0.001, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.14236322961155176, |
|
"eval_f1_macro": 0.38596411111358153, |
|
"eval_f1_micro": 0.7619565217391304, |
|
"eval_loss": 0.2406790852546692, |
|
"eval_roc_auc": 0.840430919512073, |
|
"eval_runtime": 34.2312, |
|
"eval_samples_per_second": 143.641, |
|
"eval_steps_per_second": 0.292, |
|
"learning_rate": 0.001, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.13280455562334756, |
|
"eval_f1_macro": 0.3928781445591724, |
|
"eval_f1_micro": 0.7611869607298037, |
|
"eval_loss": 0.24222084879875183, |
|
"eval_roc_auc": 0.8428517162907542, |
|
"eval_runtime": 34.0721, |
|
"eval_samples_per_second": 144.311, |
|
"eval_steps_per_second": 0.293, |
|
"learning_rate": 0.001, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.13117754728492984, |
|
"eval_f1_macro": 0.3912203123758987, |
|
"eval_f1_micro": 0.7516135926480015, |
|
"eval_loss": 0.24304261803627014, |
|
"eval_roc_auc": 0.8298242087648536, |
|
"eval_runtime": 33.969, |
|
"eval_samples_per_second": 144.75, |
|
"eval_steps_per_second": 0.294, |
|
"learning_rate": 0.001, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.13016066707341875, |
|
"eval_f1_macro": 0.38844227395152936, |
|
"eval_f1_micro": 0.758885526453094, |
|
"eval_loss": 0.24139608442783356, |
|
"eval_roc_auc": 0.838798415911255, |
|
"eval_runtime": 34.3634, |
|
"eval_samples_per_second": 143.088, |
|
"eval_steps_per_second": 0.291, |
|
"learning_rate": 0.001, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.1354484441732764, |
|
"eval_f1_macro": 0.4037409737349212, |
|
"eval_f1_micro": 0.7624706542289075, |
|
"eval_loss": 0.24039919674396515, |
|
"eval_roc_auc": 0.8419399356299166, |
|
"eval_runtime": 34.2464, |
|
"eval_samples_per_second": 143.577, |
|
"eval_steps_per_second": 0.292, |
|
"learning_rate": 0.001, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.12995729103111653, |
|
"eval_f1_macro": 0.3973020120442106, |
|
"eval_f1_micro": 0.7601615858737297, |
|
"eval_loss": 0.24134761095046997, |
|
"eval_roc_auc": 0.8400403455705201, |
|
"eval_runtime": 34.3251, |
|
"eval_samples_per_second": 143.248, |
|
"eval_steps_per_second": 0.291, |
|
"learning_rate": 0.001, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 35.714285714285715, |
|
"grad_norm": 0.08046075701713562, |
|
"learning_rate": 0.001, |
|
"loss": 0.2465, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.13565182021557862, |
|
"eval_f1_macro": 0.38761085480429286, |
|
"eval_f1_micro": 0.7622066694112803, |
|
"eval_loss": 0.24192169308662415, |
|
"eval_roc_auc": 0.8435798471665079, |
|
"eval_runtime": 34.1038, |
|
"eval_samples_per_second": 144.177, |
|
"eval_steps_per_second": 0.293, |
|
"learning_rate": 0.001, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.1342281879194631, |
|
"eval_f1_macro": 0.3992187594370792, |
|
"eval_f1_micro": 0.7598352387357096, |
|
"eval_loss": 0.2399486005306244, |
|
"eval_roc_auc": 0.8380691834023499, |
|
"eval_runtime": 34.6394, |
|
"eval_samples_per_second": 141.948, |
|
"eval_steps_per_second": 0.289, |
|
"learning_rate": 0.001, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.13300793166564978, |
|
"eval_f1_macro": 0.3932700433432016, |
|
"eval_f1_micro": 0.7607364527387098, |
|
"eval_loss": 0.24004822969436646, |
|
"eval_roc_auc": 0.8397074475961233, |
|
"eval_runtime": 34.2918, |
|
"eval_samples_per_second": 143.387, |
|
"eval_steps_per_second": 0.292, |
|
"learning_rate": 0.001, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.13890583689241406, |
|
"eval_f1_macro": 0.4007929579258356, |
|
"eval_f1_micro": 0.7619087275149901, |
|
"eval_loss": 0.24091550707817078, |
|
"eval_roc_auc": 0.8411762713701608, |
|
"eval_runtime": 34.4221, |
|
"eval_samples_per_second": 142.844, |
|
"eval_steps_per_second": 0.291, |
|
"learning_rate": 0.001, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.1354484441732764, |
|
"eval_f1_macro": 0.39250375468507387, |
|
"eval_f1_micro": 0.76, |
|
"eval_loss": 0.23991511762142181, |
|
"eval_roc_auc": 0.8378351644204551, |
|
"eval_runtime": 35.4899, |
|
"eval_samples_per_second": 138.546, |
|
"eval_steps_per_second": 0.282, |
|
"learning_rate": 0.001, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.12487288997356112, |
|
"eval_f1_macro": 0.40608061408264917, |
|
"eval_f1_micro": 0.7639710985018574, |
|
"eval_loss": 0.2422637641429901, |
|
"eval_roc_auc": 0.8463905306427496, |
|
"eval_runtime": 35.0605, |
|
"eval_samples_per_second": 140.243, |
|
"eval_steps_per_second": 0.285, |
|
"learning_rate": 0.001, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.1309741712426276, |
|
"eval_f1_macro": 0.4005098857996497, |
|
"eval_f1_micro": 0.7568840806286871, |
|
"eval_loss": 0.24256455898284912, |
|
"eval_roc_auc": 0.8377674429208019, |
|
"eval_runtime": 35.1895, |
|
"eval_samples_per_second": 139.729, |
|
"eval_steps_per_second": 0.284, |
|
"learning_rate": 0.001, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.13361805979255645, |
|
"eval_f1_macro": 0.4007981173529554, |
|
"eval_f1_micro": 0.7594289817122102, |
|
"eval_loss": 0.23922023177146912, |
|
"eval_roc_auc": 0.8368935200169594, |
|
"eval_runtime": 34.9621, |
|
"eval_samples_per_second": 140.638, |
|
"eval_steps_per_second": 0.286, |
|
"learning_rate": 0.001, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.13036404311572097, |
|
"eval_f1_macro": 0.40641694858015515, |
|
"eval_f1_micro": 0.7576905272903253, |
|
"eval_loss": 0.24184103310108185, |
|
"eval_roc_auc": 0.8364958212082838, |
|
"eval_runtime": 35.0243, |
|
"eval_samples_per_second": 140.388, |
|
"eval_steps_per_second": 0.286, |
|
"learning_rate": 0.001, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.13788895668090298, |
|
"eval_f1_macro": 0.39055068831340933, |
|
"eval_f1_micro": 0.7591085068536151, |
|
"eval_loss": 0.24105145037174225, |
|
"eval_roc_auc": 0.8384187060666997, |
|
"eval_runtime": 34.7759, |
|
"eval_samples_per_second": 141.391, |
|
"eval_steps_per_second": 0.288, |
|
"learning_rate": 0.001, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.13626194834248526, |
|
"eval_f1_macro": 0.4106196361694743, |
|
"eval_f1_micro": 0.7653508320819534, |
|
"eval_loss": 0.23963303864002228, |
|
"eval_roc_auc": 0.8457236836435186, |
|
"eval_runtime": 34.9314, |
|
"eval_samples_per_second": 140.762, |
|
"eval_steps_per_second": 0.286, |
|
"learning_rate": 0.001, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.13260117958104536, |
|
"eval_f1_macro": 0.3967990889217657, |
|
"eval_f1_micro": 0.7575076348829317, |
|
"eval_loss": 0.23957742750644684, |
|
"eval_roc_auc": 0.8348926760093537, |
|
"eval_runtime": 35.1971, |
|
"eval_samples_per_second": 139.699, |
|
"eval_steps_per_second": 0.284, |
|
"learning_rate": 0.001, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.12873703477730322, |
|
"eval_f1_macro": 0.38777990454974365, |
|
"eval_f1_micro": 0.7563947423325684, |
|
"eval_loss": 0.24231907725334167, |
|
"eval_roc_auc": 0.8372881264708554, |
|
"eval_runtime": 106.544, |
|
"eval_samples_per_second": 46.15, |
|
"eval_steps_per_second": 0.094, |
|
"learning_rate": 0.001, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.13300793166564978, |
|
"eval_f1_macro": 0.40266317126303974, |
|
"eval_f1_micro": 0.7608376348147216, |
|
"eval_loss": 0.23978127539157867, |
|
"eval_roc_auc": 0.8389943850191152, |
|
"eval_runtime": 103.5302, |
|
"eval_samples_per_second": 47.493, |
|
"eval_steps_per_second": 0.097, |
|
"learning_rate": 0.001, |
|
"step": 1372 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.14236322961155176, |
|
"eval_f1_macro": 0.4087415721658059, |
|
"eval_f1_micro": 0.7652267908369019, |
|
"eval_loss": 0.23673731088638306, |
|
"eval_roc_auc": 0.8435746322832197, |
|
"eval_runtime": 102.3756, |
|
"eval_samples_per_second": 48.029, |
|
"eval_steps_per_second": 0.098, |
|
"learning_rate": 0.0001, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 0.0001, |
|
"step": 1400, |
|
"total_flos": 9.998216165993483e+19, |
|
"train_loss": 0.2689315250941685, |
|
"train_runtime": 8372.8151, |
|
"train_samples_per_second": 84.762, |
|
"train_steps_per_second": 0.167 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 1400, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 10, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.998216165993483e+19, |
|
"train_batch_size": 512, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|