llama3.1-cpo-full-0919 / trainer_state.json
jbjeong91's picture
Model save
ecf66fe verified
raw
history blame
No virus
31.5 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9885057471264368,
"eval_steps": 1,
"global_step": 43,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.022988505747126436,
"eval_logits/chosen": -0.285895973443985,
"eval_logits/rejected": -0.2622124254703522,
"eval_logps/chosen": -265.5323791503906,
"eval_logps/rejected": -265.8489074707031,
"eval_loss": 2.506535530090332,
"eval_nll_loss": 0.7676451206207275,
"eval_rewards/accuracies": 0.5173913240432739,
"eval_rewards/chosen": -26.55323600769043,
"eval_rewards/margins": 0.03165607899427414,
"eval_rewards/rejected": -26.58489227294922,
"eval_runtime": 73.2699,
"eval_samples_per_second": 24.922,
"eval_steps_per_second": 1.57,
"step": 1
},
{
"epoch": 0.04597701149425287,
"eval_logits/chosen": -0.28692948818206787,
"eval_logits/rejected": -0.2633576989173889,
"eval_logps/chosen": -265.1800537109375,
"eval_logps/rejected": -265.50384521484375,
"eval_loss": 2.505967855453491,
"eval_nll_loss": 0.766638994216919,
"eval_rewards/accuracies": 0.52173912525177,
"eval_rewards/chosen": -26.51800537109375,
"eval_rewards/margins": 0.03237998113036156,
"eval_rewards/rejected": -26.550386428833008,
"eval_runtime": 73.1616,
"eval_samples_per_second": 24.958,
"eval_steps_per_second": 1.572,
"step": 2
},
{
"epoch": 0.06896551724137931,
"eval_logits/chosen": -0.28926920890808105,
"eval_logits/rejected": -0.2657304108142853,
"eval_logps/chosen": -265.0088195800781,
"eval_logps/rejected": -265.2777404785156,
"eval_loss": 2.505052328109741,
"eval_nll_loss": 0.7661022543907166,
"eval_rewards/accuracies": 0.52173912525177,
"eval_rewards/chosen": -26.500883102416992,
"eval_rewards/margins": 0.02689189836382866,
"eval_rewards/rejected": -26.527772903442383,
"eval_runtime": 73.4564,
"eval_samples_per_second": 24.858,
"eval_steps_per_second": 1.566,
"step": 3
},
{
"epoch": 0.09195402298850575,
"eval_logits/chosen": -0.29259422421455383,
"eval_logits/rejected": -0.26898470520973206,
"eval_logps/chosen": -263.95184326171875,
"eval_logps/rejected": -264.3146667480469,
"eval_loss": 2.498246669769287,
"eval_nll_loss": 0.7631996870040894,
"eval_rewards/accuracies": 0.5239130258560181,
"eval_rewards/chosen": -26.395187377929688,
"eval_rewards/margins": 0.03628147765994072,
"eval_rewards/rejected": -26.43147087097168,
"eval_runtime": 73.5575,
"eval_samples_per_second": 24.824,
"eval_steps_per_second": 1.563,
"step": 4
},
{
"epoch": 0.11494252873563218,
"eval_logits/chosen": -0.29848381876945496,
"eval_logits/rejected": -0.27501967549324036,
"eval_logps/chosen": -262.6512145996094,
"eval_logps/rejected": -263.0111999511719,
"eval_loss": 2.489372968673706,
"eval_nll_loss": 0.7594311237335205,
"eval_rewards/accuracies": 0.52173912525177,
"eval_rewards/chosen": -26.26512336730957,
"eval_rewards/margins": 0.035997405648231506,
"eval_rewards/rejected": -26.30112075805664,
"eval_runtime": 73.7594,
"eval_samples_per_second": 24.756,
"eval_steps_per_second": 1.559,
"step": 5
},
{
"epoch": 0.13793103448275862,
"eval_logits/chosen": -0.30859696865081787,
"eval_logits/rejected": -0.2858428359031677,
"eval_logps/chosen": -259.449951171875,
"eval_logps/rejected": -259.8551330566406,
"eval_loss": 2.4688832759857178,
"eval_nll_loss": 0.7501848340034485,
"eval_rewards/accuracies": 0.5282608866691589,
"eval_rewards/chosen": -25.94499397277832,
"eval_rewards/margins": 0.040517814457416534,
"eval_rewards/rejected": -25.98551368713379,
"eval_runtime": 73.5054,
"eval_samples_per_second": 24.842,
"eval_steps_per_second": 1.565,
"step": 6
},
{
"epoch": 0.16091954022988506,
"eval_logits/chosen": -0.3201945424079895,
"eval_logits/rejected": -0.297221302986145,
"eval_logps/chosen": -257.0843200683594,
"eval_logps/rejected": -257.527099609375,
"eval_loss": 2.4511067867279053,
"eval_nll_loss": 0.7433211207389832,
"eval_rewards/accuracies": 0.5282608866691589,
"eval_rewards/chosen": -25.708433151245117,
"eval_rewards/margins": 0.04427630454301834,
"eval_rewards/rejected": -25.752708435058594,
"eval_runtime": 73.716,
"eval_samples_per_second": 24.771,
"eval_steps_per_second": 1.56,
"step": 7
},
{
"epoch": 0.1839080459770115,
"eval_logits/chosen": -0.348645955324173,
"eval_logits/rejected": -0.3254188001155853,
"eval_logps/chosen": -252.2147216796875,
"eval_logps/rejected": -252.7242431640625,
"eval_loss": 2.4179530143737793,
"eval_nll_loss": 0.7291316390037537,
"eval_rewards/accuracies": 0.532608687877655,
"eval_rewards/chosen": -25.22147560119629,
"eval_rewards/margins": 0.05095084756612778,
"eval_rewards/rejected": -25.27242660522461,
"eval_runtime": 73.8275,
"eval_samples_per_second": 24.733,
"eval_steps_per_second": 1.558,
"step": 8
},
{
"epoch": 0.20689655172413793,
"eval_logits/chosen": -0.37005820870399475,
"eval_logits/rejected": -0.3462548851966858,
"eval_logps/chosen": -248.8451385498047,
"eval_logps/rejected": -249.3928985595703,
"eval_loss": 2.3951992988586426,
"eval_nll_loss": 0.7191779017448425,
"eval_rewards/accuracies": 0.5282608866691589,
"eval_rewards/chosen": -24.8845157623291,
"eval_rewards/margins": 0.054776255041360855,
"eval_rewards/rejected": -24.939287185668945,
"eval_runtime": 73.7047,
"eval_samples_per_second": 24.775,
"eval_steps_per_second": 1.56,
"step": 9
},
{
"epoch": 0.22988505747126436,
"grad_norm": 55.518348693847656,
"learning_rate": 8.684210526315789e-07,
"logits/chosen": -0.35856884717941284,
"logits/rejected": -0.3261299431324005,
"logps/chosen": -264.810302734375,
"logps/rejected": -258.8919982910156,
"loss": 2.6865,
"nll_loss": 0.7651573419570923,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -26.481029510498047,
"rewards/margins": -0.5918328166007996,
"rewards/rejected": -25.889196395874023,
"step": 10
},
{
"epoch": 0.22988505747126436,
"eval_logits/chosen": -0.384502112865448,
"eval_logits/rejected": -0.3603852689266205,
"eval_logps/chosen": -246.21482849121094,
"eval_logps/rejected": -246.78208923339844,
"eval_loss": 2.376126766204834,
"eval_nll_loss": 0.7115476727485657,
"eval_rewards/accuracies": 0.5347825884819031,
"eval_rewards/chosen": -24.621484756469727,
"eval_rewards/margins": 0.05672362819314003,
"eval_rewards/rejected": -24.678205490112305,
"eval_runtime": 73.7798,
"eval_samples_per_second": 24.749,
"eval_steps_per_second": 1.559,
"step": 10
},
{
"epoch": 0.25287356321839083,
"eval_logits/chosen": -0.397601842880249,
"eval_logits/rejected": -0.3731386959552765,
"eval_logps/chosen": -244.02699279785156,
"eval_logps/rejected": -244.7050323486328,
"eval_loss": 2.3608767986297607,
"eval_nll_loss": 0.7050958275794983,
"eval_rewards/accuracies": 0.539130449295044,
"eval_rewards/chosen": -24.402700424194336,
"eval_rewards/margins": 0.06780331581830978,
"eval_rewards/rejected": -24.470500946044922,
"eval_runtime": 73.0824,
"eval_samples_per_second": 24.985,
"eval_steps_per_second": 1.574,
"step": 11
},
{
"epoch": 0.27586206896551724,
"eval_logits/chosen": -0.4218127429485321,
"eval_logits/rejected": -0.3970121443271637,
"eval_logps/chosen": -240.5603790283203,
"eval_logps/rejected": -241.30628967285156,
"eval_loss": 2.3367197513580322,
"eval_nll_loss": 0.6951096057891846,
"eval_rewards/accuracies": 0.5347825884819031,
"eval_rewards/chosen": -24.05603790283203,
"eval_rewards/margins": 0.074591264128685,
"eval_rewards/rejected": -24.130634307861328,
"eval_runtime": 73.2125,
"eval_samples_per_second": 24.941,
"eval_steps_per_second": 1.571,
"step": 12
},
{
"epoch": 0.2988505747126437,
"eval_logits/chosen": -0.4434413015842438,
"eval_logits/rejected": -0.4179251492023468,
"eval_logps/chosen": -236.7858123779297,
"eval_logps/rejected": -237.64541625976562,
"eval_loss": 2.310944080352783,
"eval_nll_loss": 0.6840075850486755,
"eval_rewards/accuracies": 0.530434787273407,
"eval_rewards/chosen": -23.6785831451416,
"eval_rewards/margins": 0.08595678210258484,
"eval_rewards/rejected": -23.764541625976562,
"eval_runtime": 73.2236,
"eval_samples_per_second": 24.937,
"eval_steps_per_second": 1.571,
"step": 13
},
{
"epoch": 0.3218390804597701,
"eval_logits/chosen": -0.4679478406906128,
"eval_logits/rejected": -0.4422786235809326,
"eval_logps/chosen": -233.17481994628906,
"eval_logps/rejected": -234.0310821533203,
"eval_loss": 2.290565252304077,
"eval_nll_loss": 0.6733331680297852,
"eval_rewards/accuracies": 0.5347825884819031,
"eval_rewards/chosen": -23.317480087280273,
"eval_rewards/margins": 0.08562804758548737,
"eval_rewards/rejected": -23.40311050415039,
"eval_runtime": 73.3905,
"eval_samples_per_second": 24.881,
"eval_steps_per_second": 1.567,
"step": 14
},
{
"epoch": 0.3448275862068966,
"eval_logits/chosen": -0.49170100688934326,
"eval_logits/rejected": -0.4659886956214905,
"eval_logps/chosen": -229.94561767578125,
"eval_logps/rejected": -230.9332275390625,
"eval_loss": 2.272915840148926,
"eval_nll_loss": 0.663709819316864,
"eval_rewards/accuracies": 0.5347825884819031,
"eval_rewards/chosen": -22.99456024169922,
"eval_rewards/margins": 0.09876058995723724,
"eval_rewards/rejected": -23.093320846557617,
"eval_runtime": 73.5456,
"eval_samples_per_second": 24.828,
"eval_steps_per_second": 1.564,
"step": 15
},
{
"epoch": 0.367816091954023,
"eval_logits/chosen": -0.5142260789871216,
"eval_logits/rejected": -0.4886496365070343,
"eval_logps/chosen": -227.06649780273438,
"eval_logps/rejected": -228.05648803710938,
"eval_loss": 2.257603406906128,
"eval_nll_loss": 0.6548909544944763,
"eval_rewards/accuracies": 0.5369565486907959,
"eval_rewards/chosen": -22.70665168762207,
"eval_rewards/margins": 0.09899646788835526,
"eval_rewards/rejected": -22.805648803710938,
"eval_runtime": 73.494,
"eval_samples_per_second": 24.846,
"eval_steps_per_second": 1.565,
"step": 16
},
{
"epoch": 0.39080459770114945,
"eval_logits/chosen": -0.5408182144165039,
"eval_logits/rejected": -0.5151581764221191,
"eval_logps/chosen": -224.1295928955078,
"eval_logps/rejected": -225.16580200195312,
"eval_loss": 2.241145133972168,
"eval_nll_loss": 0.6459768414497375,
"eval_rewards/accuracies": 0.5282608866691589,
"eval_rewards/chosen": -22.4129581451416,
"eval_rewards/margins": 0.10362222790718079,
"eval_rewards/rejected": -22.516578674316406,
"eval_runtime": 73.7057,
"eval_samples_per_second": 24.774,
"eval_steps_per_second": 1.56,
"step": 17
},
{
"epoch": 0.41379310344827586,
"eval_logits/chosen": -0.5656267404556274,
"eval_logits/rejected": -0.5400449633598328,
"eval_logps/chosen": -221.59368896484375,
"eval_logps/rejected": -222.6521759033203,
"eval_loss": 2.230027198791504,
"eval_nll_loss": 0.6381992697715759,
"eval_rewards/accuracies": 0.5260869860649109,
"eval_rewards/chosen": -22.15936851501465,
"eval_rewards/margins": 0.10584992170333862,
"eval_rewards/rejected": -22.265216827392578,
"eval_runtime": 73.8674,
"eval_samples_per_second": 24.72,
"eval_steps_per_second": 1.557,
"step": 18
},
{
"epoch": 0.4367816091954023,
"eval_logits/chosen": -0.5914514064788818,
"eval_logits/rejected": -0.565658688545227,
"eval_logps/chosen": -219.20506286621094,
"eval_logps/rejected": -220.354736328125,
"eval_loss": 2.2169623374938965,
"eval_nll_loss": 0.6308388113975525,
"eval_rewards/accuracies": 0.530434787273407,
"eval_rewards/chosen": -21.92050552368164,
"eval_rewards/margins": 0.11496546864509583,
"eval_rewards/rejected": -22.035470962524414,
"eval_runtime": 73.7719,
"eval_samples_per_second": 24.752,
"eval_steps_per_second": 1.559,
"step": 19
},
{
"epoch": 0.45977011494252873,
"grad_norm": 51.48088455200195,
"learning_rate": 6.052631578947368e-07,
"logits/chosen": -0.48232191801071167,
"logits/rejected": -0.4643561840057373,
"logps/chosen": -226.7048797607422,
"logps/rejected": -228.0491943359375,
"loss": 2.3904,
"nll_loss": 0.6598069667816162,
"rewards/accuracies": 0.546875,
"rewards/chosen": -22.670488357543945,
"rewards/margins": 0.13443148136138916,
"rewards/rejected": -22.804920196533203,
"step": 20
},
{
"epoch": 0.45977011494252873,
"eval_logits/chosen": -0.617470383644104,
"eval_logits/rejected": -0.5920071601867676,
"eval_logps/chosen": -217.05372619628906,
"eval_logps/rejected": -218.20924377441406,
"eval_loss": 2.20650315284729,
"eval_nll_loss": 0.624081552028656,
"eval_rewards/accuracies": 0.5282608866691589,
"eval_rewards/chosen": -21.705373764038086,
"eval_rewards/margins": 0.11555319279432297,
"eval_rewards/rejected": -21.8209285736084,
"eval_runtime": 73.6034,
"eval_samples_per_second": 24.809,
"eval_steps_per_second": 1.562,
"step": 20
},
{
"epoch": 0.4827586206896552,
"eval_logits/chosen": -0.6441444754600525,
"eval_logits/rejected": -0.6189336180686951,
"eval_logps/chosen": -214.8709716796875,
"eval_logps/rejected": -216.1072235107422,
"eval_loss": 2.193157911300659,
"eval_nll_loss": 0.6171812415122986,
"eval_rewards/accuracies": 0.5260869860649109,
"eval_rewards/chosen": -21.487096786499023,
"eval_rewards/margins": 0.12362580001354218,
"eval_rewards/rejected": -21.6107234954834,
"eval_runtime": 73.1268,
"eval_samples_per_second": 24.97,
"eval_steps_per_second": 1.573,
"step": 21
},
{
"epoch": 0.5057471264367817,
"eval_logits/chosen": -0.6693909168243408,
"eval_logits/rejected": -0.6444550156593323,
"eval_logps/chosen": -212.89871215820312,
"eval_logps/rejected": -214.12872314453125,
"eval_loss": 2.1838579177856445,
"eval_nll_loss": 0.6109142899513245,
"eval_rewards/accuracies": 0.519565224647522,
"eval_rewards/chosen": -21.289873123168945,
"eval_rewards/margins": 0.1229993924498558,
"eval_rewards/rejected": -21.412874221801758,
"eval_runtime": 73.3336,
"eval_samples_per_second": 24.9,
"eval_steps_per_second": 1.568,
"step": 22
},
{
"epoch": 0.5287356321839081,
"eval_logits/chosen": -0.6940123438835144,
"eval_logits/rejected": -0.6688118577003479,
"eval_logps/chosen": -210.87289428710938,
"eval_logps/rejected": -212.1172332763672,
"eval_loss": 2.17464280128479,
"eval_nll_loss": 0.6044757962226868,
"eval_rewards/accuracies": 0.5260869860649109,
"eval_rewards/chosen": -21.087289810180664,
"eval_rewards/margins": 0.12443248927593231,
"eval_rewards/rejected": -21.21172332763672,
"eval_runtime": 73.7107,
"eval_samples_per_second": 24.773,
"eval_steps_per_second": 1.56,
"step": 23
},
{
"epoch": 0.5517241379310345,
"eval_logits/chosen": -0.7184363603591919,
"eval_logits/rejected": -0.6937569379806519,
"eval_logps/chosen": -209.13641357421875,
"eval_logps/rejected": -210.39794921875,
"eval_loss": 2.1655774116516113,
"eval_nll_loss": 0.5988763570785522,
"eval_rewards/accuracies": 0.5239130258560181,
"eval_rewards/chosen": -20.91364097595215,
"eval_rewards/margins": 0.12615376710891724,
"eval_rewards/rejected": -21.039793014526367,
"eval_runtime": 73.2196,
"eval_samples_per_second": 24.939,
"eval_steps_per_second": 1.571,
"step": 24
},
{
"epoch": 0.5747126436781609,
"eval_logits/chosen": -0.7364875078201294,
"eval_logits/rejected": -0.711971640586853,
"eval_logps/chosen": -207.19107055664062,
"eval_logps/rejected": -208.48138427734375,
"eval_loss": 2.155548572540283,
"eval_nll_loss": 0.5926215052604675,
"eval_rewards/accuracies": 0.5282608866691589,
"eval_rewards/chosen": -20.719106674194336,
"eval_rewards/margins": 0.12903204560279846,
"eval_rewards/rejected": -20.8481388092041,
"eval_runtime": 73.1876,
"eval_samples_per_second": 24.95,
"eval_steps_per_second": 1.571,
"step": 25
},
{
"epoch": 0.5977011494252874,
"eval_logits/chosen": -0.7545364499092102,
"eval_logits/rejected": -0.730129599571228,
"eval_logps/chosen": -205.48521423339844,
"eval_logps/rejected": -206.7897186279297,
"eval_loss": 2.1465742588043213,
"eval_nll_loss": 0.5872200727462769,
"eval_rewards/accuracies": 0.5282608866691589,
"eval_rewards/chosen": -20.548521041870117,
"eval_rewards/margins": 0.13045117259025574,
"eval_rewards/rejected": -20.678974151611328,
"eval_runtime": 73.5262,
"eval_samples_per_second": 24.835,
"eval_steps_per_second": 1.564,
"step": 26
},
{
"epoch": 0.6206896551724138,
"eval_logits/chosen": -0.7720378041267395,
"eval_logits/rejected": -0.7476205825805664,
"eval_logps/chosen": -203.7217559814453,
"eval_logps/rejected": -205.04006958007812,
"eval_loss": 2.139249801635742,
"eval_nll_loss": 0.5815550684928894,
"eval_rewards/accuracies": 0.5369565486907959,
"eval_rewards/chosen": -20.37217903137207,
"eval_rewards/margins": 0.13182921707630157,
"eval_rewards/rejected": -20.504005432128906,
"eval_runtime": 73.5829,
"eval_samples_per_second": 24.816,
"eval_steps_per_second": 1.563,
"step": 27
},
{
"epoch": 0.6436781609195402,
"eval_logits/chosen": -0.781804621219635,
"eval_logits/rejected": -0.7575309872627258,
"eval_logps/chosen": -201.85330200195312,
"eval_logps/rejected": -203.2164306640625,
"eval_loss": 2.1307995319366455,
"eval_nll_loss": 0.5756080150604248,
"eval_rewards/accuracies": 0.532608687877655,
"eval_rewards/chosen": -20.18532943725586,
"eval_rewards/margins": 0.13631057739257812,
"eval_rewards/rejected": -20.321643829345703,
"eval_runtime": 73.6844,
"eval_samples_per_second": 24.781,
"eval_steps_per_second": 1.561,
"step": 28
},
{
"epoch": 0.6666666666666666,
"eval_logits/chosen": -0.7925211787223816,
"eval_logits/rejected": -0.768252432346344,
"eval_logps/chosen": -199.9458770751953,
"eval_logps/rejected": -201.3154754638672,
"eval_loss": 2.1228978633880615,
"eval_nll_loss": 0.5694720149040222,
"eval_rewards/accuracies": 0.5282608866691589,
"eval_rewards/chosen": -19.994586944580078,
"eval_rewards/margins": 0.13696083426475525,
"eval_rewards/rejected": -20.131547927856445,
"eval_runtime": 73.7355,
"eval_samples_per_second": 24.764,
"eval_steps_per_second": 1.56,
"step": 29
},
{
"epoch": 0.6896551724137931,
"grad_norm": 55.80259323120117,
"learning_rate": 3.4210526315789473e-07,
"logits/chosen": -0.6812049150466919,
"logits/rejected": -0.6623071432113647,
"logps/chosen": -199.8437042236328,
"logps/rejected": -201.27694702148438,
"loss": 2.3172,
"nll_loss": 0.5909140706062317,
"rewards/accuracies": 0.53125,
"rewards/chosen": -19.984371185302734,
"rewards/margins": 0.14332275092601776,
"rewards/rejected": -20.127695083618164,
"step": 30
},
{
"epoch": 0.6896551724137931,
"eval_logits/chosen": -0.7975767254829407,
"eval_logits/rejected": -0.7734904885292053,
"eval_logps/chosen": -197.8929901123047,
"eval_logps/rejected": -199.30410766601562,
"eval_loss": 2.113354206085205,
"eval_nll_loss": 0.5630350708961487,
"eval_rewards/accuracies": 0.5260869860649109,
"eval_rewards/chosen": -19.78929901123047,
"eval_rewards/margins": 0.1411115825176239,
"eval_rewards/rejected": -19.93041229248047,
"eval_runtime": 73.6357,
"eval_samples_per_second": 24.798,
"eval_steps_per_second": 1.562,
"step": 30
},
{
"epoch": 0.7126436781609196,
"eval_logits/chosen": -0.7977136969566345,
"eval_logits/rejected": -0.7735068202018738,
"eval_logps/chosen": -195.95989990234375,
"eval_logps/rejected": -197.4013214111328,
"eval_loss": 2.1055009365081787,
"eval_nll_loss": 0.5569384098052979,
"eval_rewards/accuracies": 0.5282608866691589,
"eval_rewards/chosen": -19.595989227294922,
"eval_rewards/margins": 0.1441420167684555,
"eval_rewards/rejected": -19.74013328552246,
"eval_runtime": 73.0556,
"eval_samples_per_second": 24.995,
"eval_steps_per_second": 1.574,
"step": 31
},
{
"epoch": 0.735632183908046,
"eval_logits/chosen": -0.80599045753479,
"eval_logits/rejected": -0.7817136645317078,
"eval_logps/chosen": -194.0162811279297,
"eval_logps/rejected": -195.46153259277344,
"eval_loss": 2.0985281467437744,
"eval_nll_loss": 0.5507530570030212,
"eval_rewards/accuracies": 0.52173912525177,
"eval_rewards/chosen": -19.401628494262695,
"eval_rewards/margins": 0.14452561736106873,
"eval_rewards/rejected": -19.546154022216797,
"eval_runtime": 73.1881,
"eval_samples_per_second": 24.949,
"eval_steps_per_second": 1.571,
"step": 32
},
{
"epoch": 0.7586206896551724,
"eval_logits/chosen": -0.8030232787132263,
"eval_logits/rejected": -0.7785286903381348,
"eval_logps/chosen": -192.11659240722656,
"eval_logps/rejected": -193.61715698242188,
"eval_loss": 2.0903804302215576,
"eval_nll_loss": 0.5446676015853882,
"eval_rewards/accuracies": 0.5239130258560181,
"eval_rewards/chosen": -19.211658477783203,
"eval_rewards/margins": 0.1500559002161026,
"eval_rewards/rejected": -19.36171531677246,
"eval_runtime": 73.4088,
"eval_samples_per_second": 24.874,
"eval_steps_per_second": 1.567,
"step": 33
},
{
"epoch": 0.7816091954022989,
"eval_logits/chosen": -0.8003183603286743,
"eval_logits/rejected": -0.7758002281188965,
"eval_logps/chosen": -190.38067626953125,
"eval_logps/rejected": -191.8131561279297,
"eval_loss": 2.08504056930542,
"eval_nll_loss": 0.539174497127533,
"eval_rewards/accuracies": 0.5239130258560181,
"eval_rewards/chosen": -19.038066864013672,
"eval_rewards/margins": 0.1432473063468933,
"eval_rewards/rejected": -19.18131446838379,
"eval_runtime": 73.4902,
"eval_samples_per_second": 24.847,
"eval_steps_per_second": 1.565,
"step": 34
},
{
"epoch": 0.8045977011494253,
"eval_logits/chosen": -0.796375036239624,
"eval_logits/rejected": -0.7714610695838928,
"eval_logps/chosen": -188.9884033203125,
"eval_logps/rejected": -190.43736267089844,
"eval_loss": 2.0792500972747803,
"eval_nll_loss": 0.5345708131790161,
"eval_rewards/accuracies": 0.5173913240432739,
"eval_rewards/chosen": -18.898839950561523,
"eval_rewards/margins": 0.1448965221643448,
"eval_rewards/rejected": -19.043737411499023,
"eval_runtime": 73.2997,
"eval_samples_per_second": 24.911,
"eval_steps_per_second": 1.569,
"step": 35
},
{
"epoch": 0.8275862068965517,
"eval_logits/chosen": -0.7951973080635071,
"eval_logits/rejected": -0.7701032757759094,
"eval_logps/chosen": -187.54518127441406,
"eval_logps/rejected": -188.98013305664062,
"eval_loss": 2.0720129013061523,
"eval_nll_loss": 0.5298618078231812,
"eval_rewards/accuracies": 0.519565224647522,
"eval_rewards/chosen": -18.754518508911133,
"eval_rewards/margins": 0.14349476993083954,
"eval_rewards/rejected": -18.898012161254883,
"eval_runtime": 73.4171,
"eval_samples_per_second": 24.872,
"eval_steps_per_second": 1.566,
"step": 36
},
{
"epoch": 0.8505747126436781,
"eval_logits/chosen": -0.7926805019378662,
"eval_logits/rejected": -0.7679208517074585,
"eval_logps/chosen": -186.56715393066406,
"eval_logps/rejected": -188.0532684326172,
"eval_loss": 2.0663270950317383,
"eval_nll_loss": 0.526580810546875,
"eval_rewards/accuracies": 0.5260869860649109,
"eval_rewards/chosen": -18.656715393066406,
"eval_rewards/margins": 0.14861242473125458,
"eval_rewards/rejected": -18.805326461791992,
"eval_runtime": 73.5278,
"eval_samples_per_second": 24.834,
"eval_steps_per_second": 1.564,
"step": 37
},
{
"epoch": 0.8735632183908046,
"eval_logits/chosen": -0.7882456183433533,
"eval_logits/rejected": -0.7631468176841736,
"eval_logps/chosen": -185.62677001953125,
"eval_logps/rejected": -187.13912963867188,
"eval_loss": 2.0643482208251953,
"eval_nll_loss": 0.5234898924827576,
"eval_rewards/accuracies": 0.5239130258560181,
"eval_rewards/chosen": -18.56267738342285,
"eval_rewards/margins": 0.15123440325260162,
"eval_rewards/rejected": -18.713911056518555,
"eval_runtime": 73.4858,
"eval_samples_per_second": 24.848,
"eval_steps_per_second": 1.565,
"step": 38
},
{
"epoch": 0.896551724137931,
"eval_logits/chosen": -0.7857053279876709,
"eval_logits/rejected": -0.7608606815338135,
"eval_logps/chosen": -185.09970092773438,
"eval_logps/rejected": -186.60646057128906,
"eval_loss": 2.0600922107696533,
"eval_nll_loss": 0.5217379927635193,
"eval_rewards/accuracies": 0.5282608866691589,
"eval_rewards/chosen": -18.509971618652344,
"eval_rewards/margins": 0.15067508816719055,
"eval_rewards/rejected": -18.66064453125,
"eval_runtime": 73.7485,
"eval_samples_per_second": 24.76,
"eval_steps_per_second": 1.559,
"step": 39
},
{
"epoch": 0.9195402298850575,
"grad_norm": 50.088340759277344,
"learning_rate": 7.894736842105262e-08,
"logits/chosen": -0.8007175326347351,
"logits/rejected": -0.7798112630844116,
"logps/chosen": -190.50381469726562,
"logps/rejected": -193.3760223388672,
"loss": 2.1039,
"nll_loss": 0.5438653230667114,
"rewards/accuracies": 0.546875,
"rewards/chosen": -19.05038070678711,
"rewards/margins": 0.2872214913368225,
"rewards/rejected": -19.337600708007812,
"step": 40
},
{
"epoch": 0.9195402298850575,
"eval_logits/chosen": -0.785999596118927,
"eval_logits/rejected": -0.7610748410224915,
"eval_logps/chosen": -184.6099090576172,
"eval_logps/rejected": -186.1282958984375,
"eval_loss": 2.0597591400146484,
"eval_nll_loss": 0.5201125144958496,
"eval_rewards/accuracies": 0.5282608866691589,
"eval_rewards/chosen": -18.46099090576172,
"eval_rewards/margins": 0.15183939039707184,
"eval_rewards/rejected": -18.612829208374023,
"eval_runtime": 73.6777,
"eval_samples_per_second": 24.784,
"eval_steps_per_second": 1.561,
"step": 40
},
{
"epoch": 0.9425287356321839,
"eval_logits/chosen": -0.7789402604103088,
"eval_logits/rejected": -0.754026472568512,
"eval_logps/chosen": -184.23236083984375,
"eval_logps/rejected": -185.80072021484375,
"eval_loss": 2.0538711547851562,
"eval_nll_loss": 0.5189568400382996,
"eval_rewards/accuracies": 0.5260869860649109,
"eval_rewards/chosen": -18.423233032226562,
"eval_rewards/margins": 0.15683722496032715,
"eval_rewards/rejected": -18.5800724029541,
"eval_runtime": 73.0726,
"eval_samples_per_second": 24.989,
"eval_steps_per_second": 1.574,
"step": 41
},
{
"epoch": 0.9655172413793104,
"eval_logits/chosen": -0.777718722820282,
"eval_logits/rejected": -0.7525457739830017,
"eval_logps/chosen": -183.968994140625,
"eval_logps/rejected": -185.52581787109375,
"eval_loss": 2.054420232772827,
"eval_nll_loss": 0.518138587474823,
"eval_rewards/accuracies": 0.5282608866691589,
"eval_rewards/chosen": -18.396900177001953,
"eval_rewards/margins": 0.15568143129348755,
"eval_rewards/rejected": -18.552579879760742,
"eval_runtime": 73.2982,
"eval_samples_per_second": 24.912,
"eval_steps_per_second": 1.569,
"step": 42
},
{
"epoch": 0.9885057471264368,
"eval_logits/chosen": -0.779742419719696,
"eval_logits/rejected": -0.755063533782959,
"eval_logps/chosen": -183.93116760253906,
"eval_logps/rejected": -185.45208740234375,
"eval_loss": 2.0544536113739014,
"eval_nll_loss": 0.5179869532585144,
"eval_rewards/accuracies": 0.5260869860649109,
"eval_rewards/chosen": -18.393117904663086,
"eval_rewards/margins": 0.15209028124809265,
"eval_rewards/rejected": -18.54520606994629,
"eval_runtime": 73.5834,
"eval_samples_per_second": 24.815,
"eval_steps_per_second": 1.563,
"step": 43
},
{
"epoch": 0.9885057471264368,
"step": 43,
"total_flos": 0.0,
"train_loss": 2.3523660704146985,
"train_runtime": 5387.5537,
"train_samples_per_second": 1.031,
"train_steps_per_second": 0.008
}
],
"logging_steps": 10,
"max_steps": 43,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 10,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}