|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9885057471264368, |
|
"eval_steps": 1, |
|
"global_step": 43, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.022988505747126436, |
|
"eval_logits/chosen": -0.285895973443985, |
|
"eval_logits/rejected": -0.2622124254703522, |
|
"eval_logps/chosen": -265.5323791503906, |
|
"eval_logps/rejected": -265.8489074707031, |
|
"eval_loss": 2.506535530090332, |
|
"eval_nll_loss": 0.7676451206207275, |
|
"eval_rewards/accuracies": 0.5173913240432739, |
|
"eval_rewards/chosen": -26.55323600769043, |
|
"eval_rewards/margins": 0.03165607899427414, |
|
"eval_rewards/rejected": -26.58489227294922, |
|
"eval_runtime": 73.2699, |
|
"eval_samples_per_second": 24.922, |
|
"eval_steps_per_second": 1.57, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.04597701149425287, |
|
"eval_logits/chosen": -0.28692948818206787, |
|
"eval_logits/rejected": -0.2633576989173889, |
|
"eval_logps/chosen": -265.1800537109375, |
|
"eval_logps/rejected": -265.50384521484375, |
|
"eval_loss": 2.505967855453491, |
|
"eval_nll_loss": 0.766638994216919, |
|
"eval_rewards/accuracies": 0.52173912525177, |
|
"eval_rewards/chosen": -26.51800537109375, |
|
"eval_rewards/margins": 0.03237998113036156, |
|
"eval_rewards/rejected": -26.550386428833008, |
|
"eval_runtime": 73.1616, |
|
"eval_samples_per_second": 24.958, |
|
"eval_steps_per_second": 1.572, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.06896551724137931, |
|
"eval_logits/chosen": -0.28926920890808105, |
|
"eval_logits/rejected": -0.2657304108142853, |
|
"eval_logps/chosen": -265.0088195800781, |
|
"eval_logps/rejected": -265.2777404785156, |
|
"eval_loss": 2.505052328109741, |
|
"eval_nll_loss": 0.7661022543907166, |
|
"eval_rewards/accuracies": 0.52173912525177, |
|
"eval_rewards/chosen": -26.500883102416992, |
|
"eval_rewards/margins": 0.02689189836382866, |
|
"eval_rewards/rejected": -26.527772903442383, |
|
"eval_runtime": 73.4564, |
|
"eval_samples_per_second": 24.858, |
|
"eval_steps_per_second": 1.566, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.09195402298850575, |
|
"eval_logits/chosen": -0.29259422421455383, |
|
"eval_logits/rejected": -0.26898470520973206, |
|
"eval_logps/chosen": -263.95184326171875, |
|
"eval_logps/rejected": -264.3146667480469, |
|
"eval_loss": 2.498246669769287, |
|
"eval_nll_loss": 0.7631996870040894, |
|
"eval_rewards/accuracies": 0.5239130258560181, |
|
"eval_rewards/chosen": -26.395187377929688, |
|
"eval_rewards/margins": 0.03628147765994072, |
|
"eval_rewards/rejected": -26.43147087097168, |
|
"eval_runtime": 73.5575, |
|
"eval_samples_per_second": 24.824, |
|
"eval_steps_per_second": 1.563, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.11494252873563218, |
|
"eval_logits/chosen": -0.29848381876945496, |
|
"eval_logits/rejected": -0.27501967549324036, |
|
"eval_logps/chosen": -262.6512145996094, |
|
"eval_logps/rejected": -263.0111999511719, |
|
"eval_loss": 2.489372968673706, |
|
"eval_nll_loss": 0.7594311237335205, |
|
"eval_rewards/accuracies": 0.52173912525177, |
|
"eval_rewards/chosen": -26.26512336730957, |
|
"eval_rewards/margins": 0.035997405648231506, |
|
"eval_rewards/rejected": -26.30112075805664, |
|
"eval_runtime": 73.7594, |
|
"eval_samples_per_second": 24.756, |
|
"eval_steps_per_second": 1.559, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.13793103448275862, |
|
"eval_logits/chosen": -0.30859696865081787, |
|
"eval_logits/rejected": -0.2858428359031677, |
|
"eval_logps/chosen": -259.449951171875, |
|
"eval_logps/rejected": -259.8551330566406, |
|
"eval_loss": 2.4688832759857178, |
|
"eval_nll_loss": 0.7501848340034485, |
|
"eval_rewards/accuracies": 0.5282608866691589, |
|
"eval_rewards/chosen": -25.94499397277832, |
|
"eval_rewards/margins": 0.040517814457416534, |
|
"eval_rewards/rejected": -25.98551368713379, |
|
"eval_runtime": 73.5054, |
|
"eval_samples_per_second": 24.842, |
|
"eval_steps_per_second": 1.565, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.16091954022988506, |
|
"eval_logits/chosen": -0.3201945424079895, |
|
"eval_logits/rejected": -0.297221302986145, |
|
"eval_logps/chosen": -257.0843200683594, |
|
"eval_logps/rejected": -257.527099609375, |
|
"eval_loss": 2.4511067867279053, |
|
"eval_nll_loss": 0.7433211207389832, |
|
"eval_rewards/accuracies": 0.5282608866691589, |
|
"eval_rewards/chosen": -25.708433151245117, |
|
"eval_rewards/margins": 0.04427630454301834, |
|
"eval_rewards/rejected": -25.752708435058594, |
|
"eval_runtime": 73.716, |
|
"eval_samples_per_second": 24.771, |
|
"eval_steps_per_second": 1.56, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.1839080459770115, |
|
"eval_logits/chosen": -0.348645955324173, |
|
"eval_logits/rejected": -0.3254188001155853, |
|
"eval_logps/chosen": -252.2147216796875, |
|
"eval_logps/rejected": -252.7242431640625, |
|
"eval_loss": 2.4179530143737793, |
|
"eval_nll_loss": 0.7291316390037537, |
|
"eval_rewards/accuracies": 0.532608687877655, |
|
"eval_rewards/chosen": -25.22147560119629, |
|
"eval_rewards/margins": 0.05095084756612778, |
|
"eval_rewards/rejected": -25.27242660522461, |
|
"eval_runtime": 73.8275, |
|
"eval_samples_per_second": 24.733, |
|
"eval_steps_per_second": 1.558, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.20689655172413793, |
|
"eval_logits/chosen": -0.37005820870399475, |
|
"eval_logits/rejected": -0.3462548851966858, |
|
"eval_logps/chosen": -248.8451385498047, |
|
"eval_logps/rejected": -249.3928985595703, |
|
"eval_loss": 2.3951992988586426, |
|
"eval_nll_loss": 0.7191779017448425, |
|
"eval_rewards/accuracies": 0.5282608866691589, |
|
"eval_rewards/chosen": -24.8845157623291, |
|
"eval_rewards/margins": 0.054776255041360855, |
|
"eval_rewards/rejected": -24.939287185668945, |
|
"eval_runtime": 73.7047, |
|
"eval_samples_per_second": 24.775, |
|
"eval_steps_per_second": 1.56, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.22988505747126436, |
|
"grad_norm": 55.518348693847656, |
|
"learning_rate": 8.684210526315789e-07, |
|
"logits/chosen": -0.35856884717941284, |
|
"logits/rejected": -0.3261299431324005, |
|
"logps/chosen": -264.810302734375, |
|
"logps/rejected": -258.8919982910156, |
|
"loss": 2.6865, |
|
"nll_loss": 0.7651573419570923, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -26.481029510498047, |
|
"rewards/margins": -0.5918328166007996, |
|
"rewards/rejected": -25.889196395874023, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.22988505747126436, |
|
"eval_logits/chosen": -0.384502112865448, |
|
"eval_logits/rejected": -0.3603852689266205, |
|
"eval_logps/chosen": -246.21482849121094, |
|
"eval_logps/rejected": -246.78208923339844, |
|
"eval_loss": 2.376126766204834, |
|
"eval_nll_loss": 0.7115476727485657, |
|
"eval_rewards/accuracies": 0.5347825884819031, |
|
"eval_rewards/chosen": -24.621484756469727, |
|
"eval_rewards/margins": 0.05672362819314003, |
|
"eval_rewards/rejected": -24.678205490112305, |
|
"eval_runtime": 73.7798, |
|
"eval_samples_per_second": 24.749, |
|
"eval_steps_per_second": 1.559, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.25287356321839083, |
|
"eval_logits/chosen": -0.397601842880249, |
|
"eval_logits/rejected": -0.3731386959552765, |
|
"eval_logps/chosen": -244.02699279785156, |
|
"eval_logps/rejected": -244.7050323486328, |
|
"eval_loss": 2.3608767986297607, |
|
"eval_nll_loss": 0.7050958275794983, |
|
"eval_rewards/accuracies": 0.539130449295044, |
|
"eval_rewards/chosen": -24.402700424194336, |
|
"eval_rewards/margins": 0.06780331581830978, |
|
"eval_rewards/rejected": -24.470500946044922, |
|
"eval_runtime": 73.0824, |
|
"eval_samples_per_second": 24.985, |
|
"eval_steps_per_second": 1.574, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.27586206896551724, |
|
"eval_logits/chosen": -0.4218127429485321, |
|
"eval_logits/rejected": -0.3970121443271637, |
|
"eval_logps/chosen": -240.5603790283203, |
|
"eval_logps/rejected": -241.30628967285156, |
|
"eval_loss": 2.3367197513580322, |
|
"eval_nll_loss": 0.6951096057891846, |
|
"eval_rewards/accuracies": 0.5347825884819031, |
|
"eval_rewards/chosen": -24.05603790283203, |
|
"eval_rewards/margins": 0.074591264128685, |
|
"eval_rewards/rejected": -24.130634307861328, |
|
"eval_runtime": 73.2125, |
|
"eval_samples_per_second": 24.941, |
|
"eval_steps_per_second": 1.571, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.2988505747126437, |
|
"eval_logits/chosen": -0.4434413015842438, |
|
"eval_logits/rejected": -0.4179251492023468, |
|
"eval_logps/chosen": -236.7858123779297, |
|
"eval_logps/rejected": -237.64541625976562, |
|
"eval_loss": 2.310944080352783, |
|
"eval_nll_loss": 0.6840075850486755, |
|
"eval_rewards/accuracies": 0.530434787273407, |
|
"eval_rewards/chosen": -23.6785831451416, |
|
"eval_rewards/margins": 0.08595678210258484, |
|
"eval_rewards/rejected": -23.764541625976562, |
|
"eval_runtime": 73.2236, |
|
"eval_samples_per_second": 24.937, |
|
"eval_steps_per_second": 1.571, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.3218390804597701, |
|
"eval_logits/chosen": -0.4679478406906128, |
|
"eval_logits/rejected": -0.4422786235809326, |
|
"eval_logps/chosen": -233.17481994628906, |
|
"eval_logps/rejected": -234.0310821533203, |
|
"eval_loss": 2.290565252304077, |
|
"eval_nll_loss": 0.6733331680297852, |
|
"eval_rewards/accuracies": 0.5347825884819031, |
|
"eval_rewards/chosen": -23.317480087280273, |
|
"eval_rewards/margins": 0.08562804758548737, |
|
"eval_rewards/rejected": -23.40311050415039, |
|
"eval_runtime": 73.3905, |
|
"eval_samples_per_second": 24.881, |
|
"eval_steps_per_second": 1.567, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.3448275862068966, |
|
"eval_logits/chosen": -0.49170100688934326, |
|
"eval_logits/rejected": -0.4659886956214905, |
|
"eval_logps/chosen": -229.94561767578125, |
|
"eval_logps/rejected": -230.9332275390625, |
|
"eval_loss": 2.272915840148926, |
|
"eval_nll_loss": 0.663709819316864, |
|
"eval_rewards/accuracies": 0.5347825884819031, |
|
"eval_rewards/chosen": -22.99456024169922, |
|
"eval_rewards/margins": 0.09876058995723724, |
|
"eval_rewards/rejected": -23.093320846557617, |
|
"eval_runtime": 73.5456, |
|
"eval_samples_per_second": 24.828, |
|
"eval_steps_per_second": 1.564, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.367816091954023, |
|
"eval_logits/chosen": -0.5142260789871216, |
|
"eval_logits/rejected": -0.4886496365070343, |
|
"eval_logps/chosen": -227.06649780273438, |
|
"eval_logps/rejected": -228.05648803710938, |
|
"eval_loss": 2.257603406906128, |
|
"eval_nll_loss": 0.6548909544944763, |
|
"eval_rewards/accuracies": 0.5369565486907959, |
|
"eval_rewards/chosen": -22.70665168762207, |
|
"eval_rewards/margins": 0.09899646788835526, |
|
"eval_rewards/rejected": -22.805648803710938, |
|
"eval_runtime": 73.494, |
|
"eval_samples_per_second": 24.846, |
|
"eval_steps_per_second": 1.565, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.39080459770114945, |
|
"eval_logits/chosen": -0.5408182144165039, |
|
"eval_logits/rejected": -0.5151581764221191, |
|
"eval_logps/chosen": -224.1295928955078, |
|
"eval_logps/rejected": -225.16580200195312, |
|
"eval_loss": 2.241145133972168, |
|
"eval_nll_loss": 0.6459768414497375, |
|
"eval_rewards/accuracies": 0.5282608866691589, |
|
"eval_rewards/chosen": -22.4129581451416, |
|
"eval_rewards/margins": 0.10362222790718079, |
|
"eval_rewards/rejected": -22.516578674316406, |
|
"eval_runtime": 73.7057, |
|
"eval_samples_per_second": 24.774, |
|
"eval_steps_per_second": 1.56, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.41379310344827586, |
|
"eval_logits/chosen": -0.5656267404556274, |
|
"eval_logits/rejected": -0.5400449633598328, |
|
"eval_logps/chosen": -221.59368896484375, |
|
"eval_logps/rejected": -222.6521759033203, |
|
"eval_loss": 2.230027198791504, |
|
"eval_nll_loss": 0.6381992697715759, |
|
"eval_rewards/accuracies": 0.5260869860649109, |
|
"eval_rewards/chosen": -22.15936851501465, |
|
"eval_rewards/margins": 0.10584992170333862, |
|
"eval_rewards/rejected": -22.265216827392578, |
|
"eval_runtime": 73.8674, |
|
"eval_samples_per_second": 24.72, |
|
"eval_steps_per_second": 1.557, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.4367816091954023, |
|
"eval_logits/chosen": -0.5914514064788818, |
|
"eval_logits/rejected": -0.565658688545227, |
|
"eval_logps/chosen": -219.20506286621094, |
|
"eval_logps/rejected": -220.354736328125, |
|
"eval_loss": 2.2169623374938965, |
|
"eval_nll_loss": 0.6308388113975525, |
|
"eval_rewards/accuracies": 0.530434787273407, |
|
"eval_rewards/chosen": -21.92050552368164, |
|
"eval_rewards/margins": 0.11496546864509583, |
|
"eval_rewards/rejected": -22.035470962524414, |
|
"eval_runtime": 73.7719, |
|
"eval_samples_per_second": 24.752, |
|
"eval_steps_per_second": 1.559, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.45977011494252873, |
|
"grad_norm": 51.48088455200195, |
|
"learning_rate": 6.052631578947368e-07, |
|
"logits/chosen": -0.48232191801071167, |
|
"logits/rejected": -0.4643561840057373, |
|
"logps/chosen": -226.7048797607422, |
|
"logps/rejected": -228.0491943359375, |
|
"loss": 2.3904, |
|
"nll_loss": 0.6598069667816162, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -22.670488357543945, |
|
"rewards/margins": 0.13443148136138916, |
|
"rewards/rejected": -22.804920196533203, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.45977011494252873, |
|
"eval_logits/chosen": -0.617470383644104, |
|
"eval_logits/rejected": -0.5920071601867676, |
|
"eval_logps/chosen": -217.05372619628906, |
|
"eval_logps/rejected": -218.20924377441406, |
|
"eval_loss": 2.20650315284729, |
|
"eval_nll_loss": 0.624081552028656, |
|
"eval_rewards/accuracies": 0.5282608866691589, |
|
"eval_rewards/chosen": -21.705373764038086, |
|
"eval_rewards/margins": 0.11555319279432297, |
|
"eval_rewards/rejected": -21.8209285736084, |
|
"eval_runtime": 73.6034, |
|
"eval_samples_per_second": 24.809, |
|
"eval_steps_per_second": 1.562, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.4827586206896552, |
|
"eval_logits/chosen": -0.6441444754600525, |
|
"eval_logits/rejected": -0.6189336180686951, |
|
"eval_logps/chosen": -214.8709716796875, |
|
"eval_logps/rejected": -216.1072235107422, |
|
"eval_loss": 2.193157911300659, |
|
"eval_nll_loss": 0.6171812415122986, |
|
"eval_rewards/accuracies": 0.5260869860649109, |
|
"eval_rewards/chosen": -21.487096786499023, |
|
"eval_rewards/margins": 0.12362580001354218, |
|
"eval_rewards/rejected": -21.6107234954834, |
|
"eval_runtime": 73.1268, |
|
"eval_samples_per_second": 24.97, |
|
"eval_steps_per_second": 1.573, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.5057471264367817, |
|
"eval_logits/chosen": -0.6693909168243408, |
|
"eval_logits/rejected": -0.6444550156593323, |
|
"eval_logps/chosen": -212.89871215820312, |
|
"eval_logps/rejected": -214.12872314453125, |
|
"eval_loss": 2.1838579177856445, |
|
"eval_nll_loss": 0.6109142899513245, |
|
"eval_rewards/accuracies": 0.519565224647522, |
|
"eval_rewards/chosen": -21.289873123168945, |
|
"eval_rewards/margins": 0.1229993924498558, |
|
"eval_rewards/rejected": -21.412874221801758, |
|
"eval_runtime": 73.3336, |
|
"eval_samples_per_second": 24.9, |
|
"eval_steps_per_second": 1.568, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.5287356321839081, |
|
"eval_logits/chosen": -0.6940123438835144, |
|
"eval_logits/rejected": -0.6688118577003479, |
|
"eval_logps/chosen": -210.87289428710938, |
|
"eval_logps/rejected": -212.1172332763672, |
|
"eval_loss": 2.17464280128479, |
|
"eval_nll_loss": 0.6044757962226868, |
|
"eval_rewards/accuracies": 0.5260869860649109, |
|
"eval_rewards/chosen": -21.087289810180664, |
|
"eval_rewards/margins": 0.12443248927593231, |
|
"eval_rewards/rejected": -21.21172332763672, |
|
"eval_runtime": 73.7107, |
|
"eval_samples_per_second": 24.773, |
|
"eval_steps_per_second": 1.56, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.5517241379310345, |
|
"eval_logits/chosen": -0.7184363603591919, |
|
"eval_logits/rejected": -0.6937569379806519, |
|
"eval_logps/chosen": -209.13641357421875, |
|
"eval_logps/rejected": -210.39794921875, |
|
"eval_loss": 2.1655774116516113, |
|
"eval_nll_loss": 0.5988763570785522, |
|
"eval_rewards/accuracies": 0.5239130258560181, |
|
"eval_rewards/chosen": -20.91364097595215, |
|
"eval_rewards/margins": 0.12615376710891724, |
|
"eval_rewards/rejected": -21.039793014526367, |
|
"eval_runtime": 73.2196, |
|
"eval_samples_per_second": 24.939, |
|
"eval_steps_per_second": 1.571, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.5747126436781609, |
|
"eval_logits/chosen": -0.7364875078201294, |
|
"eval_logits/rejected": -0.711971640586853, |
|
"eval_logps/chosen": -207.19107055664062, |
|
"eval_logps/rejected": -208.48138427734375, |
|
"eval_loss": 2.155548572540283, |
|
"eval_nll_loss": 0.5926215052604675, |
|
"eval_rewards/accuracies": 0.5282608866691589, |
|
"eval_rewards/chosen": -20.719106674194336, |
|
"eval_rewards/margins": 0.12903204560279846, |
|
"eval_rewards/rejected": -20.8481388092041, |
|
"eval_runtime": 73.1876, |
|
"eval_samples_per_second": 24.95, |
|
"eval_steps_per_second": 1.571, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.5977011494252874, |
|
"eval_logits/chosen": -0.7545364499092102, |
|
"eval_logits/rejected": -0.730129599571228, |
|
"eval_logps/chosen": -205.48521423339844, |
|
"eval_logps/rejected": -206.7897186279297, |
|
"eval_loss": 2.1465742588043213, |
|
"eval_nll_loss": 0.5872200727462769, |
|
"eval_rewards/accuracies": 0.5282608866691589, |
|
"eval_rewards/chosen": -20.548521041870117, |
|
"eval_rewards/margins": 0.13045117259025574, |
|
"eval_rewards/rejected": -20.678974151611328, |
|
"eval_runtime": 73.5262, |
|
"eval_samples_per_second": 24.835, |
|
"eval_steps_per_second": 1.564, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.6206896551724138, |
|
"eval_logits/chosen": -0.7720378041267395, |
|
"eval_logits/rejected": -0.7476205825805664, |
|
"eval_logps/chosen": -203.7217559814453, |
|
"eval_logps/rejected": -205.04006958007812, |
|
"eval_loss": 2.139249801635742, |
|
"eval_nll_loss": 0.5815550684928894, |
|
"eval_rewards/accuracies": 0.5369565486907959, |
|
"eval_rewards/chosen": -20.37217903137207, |
|
"eval_rewards/margins": 0.13182921707630157, |
|
"eval_rewards/rejected": -20.504005432128906, |
|
"eval_runtime": 73.5829, |
|
"eval_samples_per_second": 24.816, |
|
"eval_steps_per_second": 1.563, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.6436781609195402, |
|
"eval_logits/chosen": -0.781804621219635, |
|
"eval_logits/rejected": -0.7575309872627258, |
|
"eval_logps/chosen": -201.85330200195312, |
|
"eval_logps/rejected": -203.2164306640625, |
|
"eval_loss": 2.1307995319366455, |
|
"eval_nll_loss": 0.5756080150604248, |
|
"eval_rewards/accuracies": 0.532608687877655, |
|
"eval_rewards/chosen": -20.18532943725586, |
|
"eval_rewards/margins": 0.13631057739257812, |
|
"eval_rewards/rejected": -20.321643829345703, |
|
"eval_runtime": 73.6844, |
|
"eval_samples_per_second": 24.781, |
|
"eval_steps_per_second": 1.561, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"eval_logits/chosen": -0.7925211787223816, |
|
"eval_logits/rejected": -0.768252432346344, |
|
"eval_logps/chosen": -199.9458770751953, |
|
"eval_logps/rejected": -201.3154754638672, |
|
"eval_loss": 2.1228978633880615, |
|
"eval_nll_loss": 0.5694720149040222, |
|
"eval_rewards/accuracies": 0.5282608866691589, |
|
"eval_rewards/chosen": -19.994586944580078, |
|
"eval_rewards/margins": 0.13696083426475525, |
|
"eval_rewards/rejected": -20.131547927856445, |
|
"eval_runtime": 73.7355, |
|
"eval_samples_per_second": 24.764, |
|
"eval_steps_per_second": 1.56, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.6896551724137931, |
|
"grad_norm": 55.80259323120117, |
|
"learning_rate": 3.4210526315789473e-07, |
|
"logits/chosen": -0.6812049150466919, |
|
"logits/rejected": -0.6623071432113647, |
|
"logps/chosen": -199.8437042236328, |
|
"logps/rejected": -201.27694702148438, |
|
"loss": 2.3172, |
|
"nll_loss": 0.5909140706062317, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -19.984371185302734, |
|
"rewards/margins": 0.14332275092601776, |
|
"rewards/rejected": -20.127695083618164, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.6896551724137931, |
|
"eval_logits/chosen": -0.7975767254829407, |
|
"eval_logits/rejected": -0.7734904885292053, |
|
"eval_logps/chosen": -197.8929901123047, |
|
"eval_logps/rejected": -199.30410766601562, |
|
"eval_loss": 2.113354206085205, |
|
"eval_nll_loss": 0.5630350708961487, |
|
"eval_rewards/accuracies": 0.5260869860649109, |
|
"eval_rewards/chosen": -19.78929901123047, |
|
"eval_rewards/margins": 0.1411115825176239, |
|
"eval_rewards/rejected": -19.93041229248047, |
|
"eval_runtime": 73.6357, |
|
"eval_samples_per_second": 24.798, |
|
"eval_steps_per_second": 1.562, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.7126436781609196, |
|
"eval_logits/chosen": -0.7977136969566345, |
|
"eval_logits/rejected": -0.7735068202018738, |
|
"eval_logps/chosen": -195.95989990234375, |
|
"eval_logps/rejected": -197.4013214111328, |
|
"eval_loss": 2.1055009365081787, |
|
"eval_nll_loss": 0.5569384098052979, |
|
"eval_rewards/accuracies": 0.5282608866691589, |
|
"eval_rewards/chosen": -19.595989227294922, |
|
"eval_rewards/margins": 0.1441420167684555, |
|
"eval_rewards/rejected": -19.74013328552246, |
|
"eval_runtime": 73.0556, |
|
"eval_samples_per_second": 24.995, |
|
"eval_steps_per_second": 1.574, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.735632183908046, |
|
"eval_logits/chosen": -0.80599045753479, |
|
"eval_logits/rejected": -0.7817136645317078, |
|
"eval_logps/chosen": -194.0162811279297, |
|
"eval_logps/rejected": -195.46153259277344, |
|
"eval_loss": 2.0985281467437744, |
|
"eval_nll_loss": 0.5507530570030212, |
|
"eval_rewards/accuracies": 0.52173912525177, |
|
"eval_rewards/chosen": -19.401628494262695, |
|
"eval_rewards/margins": 0.14452561736106873, |
|
"eval_rewards/rejected": -19.546154022216797, |
|
"eval_runtime": 73.1881, |
|
"eval_samples_per_second": 24.949, |
|
"eval_steps_per_second": 1.571, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.7586206896551724, |
|
"eval_logits/chosen": -0.8030232787132263, |
|
"eval_logits/rejected": -0.7785286903381348, |
|
"eval_logps/chosen": -192.11659240722656, |
|
"eval_logps/rejected": -193.61715698242188, |
|
"eval_loss": 2.0903804302215576, |
|
"eval_nll_loss": 0.5446676015853882, |
|
"eval_rewards/accuracies": 0.5239130258560181, |
|
"eval_rewards/chosen": -19.211658477783203, |
|
"eval_rewards/margins": 0.1500559002161026, |
|
"eval_rewards/rejected": -19.36171531677246, |
|
"eval_runtime": 73.4088, |
|
"eval_samples_per_second": 24.874, |
|
"eval_steps_per_second": 1.567, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.7816091954022989, |
|
"eval_logits/chosen": -0.8003183603286743, |
|
"eval_logits/rejected": -0.7758002281188965, |
|
"eval_logps/chosen": -190.38067626953125, |
|
"eval_logps/rejected": -191.8131561279297, |
|
"eval_loss": 2.08504056930542, |
|
"eval_nll_loss": 0.539174497127533, |
|
"eval_rewards/accuracies": 0.5239130258560181, |
|
"eval_rewards/chosen": -19.038066864013672, |
|
"eval_rewards/margins": 0.1432473063468933, |
|
"eval_rewards/rejected": -19.18131446838379, |
|
"eval_runtime": 73.4902, |
|
"eval_samples_per_second": 24.847, |
|
"eval_steps_per_second": 1.565, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.8045977011494253, |
|
"eval_logits/chosen": -0.796375036239624, |
|
"eval_logits/rejected": -0.7714610695838928, |
|
"eval_logps/chosen": -188.9884033203125, |
|
"eval_logps/rejected": -190.43736267089844, |
|
"eval_loss": 2.0792500972747803, |
|
"eval_nll_loss": 0.5345708131790161, |
|
"eval_rewards/accuracies": 0.5173913240432739, |
|
"eval_rewards/chosen": -18.898839950561523, |
|
"eval_rewards/margins": 0.1448965221643448, |
|
"eval_rewards/rejected": -19.043737411499023, |
|
"eval_runtime": 73.2997, |
|
"eval_samples_per_second": 24.911, |
|
"eval_steps_per_second": 1.569, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.8275862068965517, |
|
"eval_logits/chosen": -0.7951973080635071, |
|
"eval_logits/rejected": -0.7701032757759094, |
|
"eval_logps/chosen": -187.54518127441406, |
|
"eval_logps/rejected": -188.98013305664062, |
|
"eval_loss": 2.0720129013061523, |
|
"eval_nll_loss": 0.5298618078231812, |
|
"eval_rewards/accuracies": 0.519565224647522, |
|
"eval_rewards/chosen": -18.754518508911133, |
|
"eval_rewards/margins": 0.14349476993083954, |
|
"eval_rewards/rejected": -18.898012161254883, |
|
"eval_runtime": 73.4171, |
|
"eval_samples_per_second": 24.872, |
|
"eval_steps_per_second": 1.566, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.8505747126436781, |
|
"eval_logits/chosen": -0.7926805019378662, |
|
"eval_logits/rejected": -0.7679208517074585, |
|
"eval_logps/chosen": -186.56715393066406, |
|
"eval_logps/rejected": -188.0532684326172, |
|
"eval_loss": 2.0663270950317383, |
|
"eval_nll_loss": 0.526580810546875, |
|
"eval_rewards/accuracies": 0.5260869860649109, |
|
"eval_rewards/chosen": -18.656715393066406, |
|
"eval_rewards/margins": 0.14861242473125458, |
|
"eval_rewards/rejected": -18.805326461791992, |
|
"eval_runtime": 73.5278, |
|
"eval_samples_per_second": 24.834, |
|
"eval_steps_per_second": 1.564, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.8735632183908046, |
|
"eval_logits/chosen": -0.7882456183433533, |
|
"eval_logits/rejected": -0.7631468176841736, |
|
"eval_logps/chosen": -185.62677001953125, |
|
"eval_logps/rejected": -187.13912963867188, |
|
"eval_loss": 2.0643482208251953, |
|
"eval_nll_loss": 0.5234898924827576, |
|
"eval_rewards/accuracies": 0.5239130258560181, |
|
"eval_rewards/chosen": -18.56267738342285, |
|
"eval_rewards/margins": 0.15123440325260162, |
|
"eval_rewards/rejected": -18.713911056518555, |
|
"eval_runtime": 73.4858, |
|
"eval_samples_per_second": 24.848, |
|
"eval_steps_per_second": 1.565, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.896551724137931, |
|
"eval_logits/chosen": -0.7857053279876709, |
|
"eval_logits/rejected": -0.7608606815338135, |
|
"eval_logps/chosen": -185.09970092773438, |
|
"eval_logps/rejected": -186.60646057128906, |
|
"eval_loss": 2.0600922107696533, |
|
"eval_nll_loss": 0.5217379927635193, |
|
"eval_rewards/accuracies": 0.5282608866691589, |
|
"eval_rewards/chosen": -18.509971618652344, |
|
"eval_rewards/margins": 0.15067508816719055, |
|
"eval_rewards/rejected": -18.66064453125, |
|
"eval_runtime": 73.7485, |
|
"eval_samples_per_second": 24.76, |
|
"eval_steps_per_second": 1.559, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.9195402298850575, |
|
"grad_norm": 50.088340759277344, |
|
"learning_rate": 7.894736842105262e-08, |
|
"logits/chosen": -0.8007175326347351, |
|
"logits/rejected": -0.7798112630844116, |
|
"logps/chosen": -190.50381469726562, |
|
"logps/rejected": -193.3760223388672, |
|
"loss": 2.1039, |
|
"nll_loss": 0.5438653230667114, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -19.05038070678711, |
|
"rewards/margins": 0.2872214913368225, |
|
"rewards/rejected": -19.337600708007812, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.9195402298850575, |
|
"eval_logits/chosen": -0.785999596118927, |
|
"eval_logits/rejected": -0.7610748410224915, |
|
"eval_logps/chosen": -184.6099090576172, |
|
"eval_logps/rejected": -186.1282958984375, |
|
"eval_loss": 2.0597591400146484, |
|
"eval_nll_loss": 0.5201125144958496, |
|
"eval_rewards/accuracies": 0.5282608866691589, |
|
"eval_rewards/chosen": -18.46099090576172, |
|
"eval_rewards/margins": 0.15183939039707184, |
|
"eval_rewards/rejected": -18.612829208374023, |
|
"eval_runtime": 73.6777, |
|
"eval_samples_per_second": 24.784, |
|
"eval_steps_per_second": 1.561, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.9425287356321839, |
|
"eval_logits/chosen": -0.7789402604103088, |
|
"eval_logits/rejected": -0.754026472568512, |
|
"eval_logps/chosen": -184.23236083984375, |
|
"eval_logps/rejected": -185.80072021484375, |
|
"eval_loss": 2.0538711547851562, |
|
"eval_nll_loss": 0.5189568400382996, |
|
"eval_rewards/accuracies": 0.5260869860649109, |
|
"eval_rewards/chosen": -18.423233032226562, |
|
"eval_rewards/margins": 0.15683722496032715, |
|
"eval_rewards/rejected": -18.5800724029541, |
|
"eval_runtime": 73.0726, |
|
"eval_samples_per_second": 24.989, |
|
"eval_steps_per_second": 1.574, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.9655172413793104, |
|
"eval_logits/chosen": -0.777718722820282, |
|
"eval_logits/rejected": -0.7525457739830017, |
|
"eval_logps/chosen": -183.968994140625, |
|
"eval_logps/rejected": -185.52581787109375, |
|
"eval_loss": 2.054420232772827, |
|
"eval_nll_loss": 0.518138587474823, |
|
"eval_rewards/accuracies": 0.5282608866691589, |
|
"eval_rewards/chosen": -18.396900177001953, |
|
"eval_rewards/margins": 0.15568143129348755, |
|
"eval_rewards/rejected": -18.552579879760742, |
|
"eval_runtime": 73.2982, |
|
"eval_samples_per_second": 24.912, |
|
"eval_steps_per_second": 1.569, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.9885057471264368, |
|
"eval_logits/chosen": -0.779742419719696, |
|
"eval_logits/rejected": -0.755063533782959, |
|
"eval_logps/chosen": -183.93116760253906, |
|
"eval_logps/rejected": -185.45208740234375, |
|
"eval_loss": 2.0544536113739014, |
|
"eval_nll_loss": 0.5179869532585144, |
|
"eval_rewards/accuracies": 0.5260869860649109, |
|
"eval_rewards/chosen": -18.393117904663086, |
|
"eval_rewards/margins": 0.15209028124809265, |
|
"eval_rewards/rejected": -18.54520606994629, |
|
"eval_runtime": 73.5834, |
|
"eval_samples_per_second": 24.815, |
|
"eval_steps_per_second": 1.563, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.9885057471264368, |
|
"step": 43, |
|
"total_flos": 0.0, |
|
"train_loss": 2.3523660704146985, |
|
"train_runtime": 5387.5537, |
|
"train_samples_per_second": 1.031, |
|
"train_steps_per_second": 0.008 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 43, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|