aoyunhui_genQA_personas_2000 / trainer_state.json
hvgg1ngface's picture
End of training
7104ca9 verified
raw
history blame contribute delete
No virus
4.49 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.988679245283019,
"eval_steps": 100,
"global_step": 198,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1509433962264151,
"grad_norm": 1.5483485460281372,
"learning_rate": 4e-06,
"loss": 1.1432,
"step": 10
},
{
"epoch": 0.3018867924528302,
"grad_norm": 0.7621527314186096,
"learning_rate": 8e-06,
"loss": 0.9841,
"step": 20
},
{
"epoch": 0.4528301886792453,
"grad_norm": 0.5265641212463379,
"learning_rate": 7.937861311446333e-06,
"loss": 0.8367,
"step": 30
},
{
"epoch": 0.6037735849056604,
"grad_norm": 0.4887830317020416,
"learning_rate": 7.753375854092917e-06,
"loss": 0.7718,
"step": 40
},
{
"epoch": 0.7547169811320755,
"grad_norm": 0.3349996507167816,
"learning_rate": 7.4522754701283365e-06,
"loss": 0.745,
"step": 50
},
{
"epoch": 0.9056603773584906,
"grad_norm": 0.4875039756298065,
"learning_rate": 7.04391515104387e-06,
"loss": 0.6574,
"step": 60
},
{
"epoch": 1.0566037735849056,
"grad_norm": 0.46130555868148804,
"learning_rate": 6.540982384182154e-06,
"loss": 0.6706,
"step": 70
},
{
"epoch": 1.2075471698113207,
"grad_norm": 0.45948392152786255,
"learning_rate": 5.9591029608249135e-06,
"loss": 0.6149,
"step": 80
},
{
"epoch": 1.3584905660377358,
"grad_norm": 0.4454590380191803,
"learning_rate": 5.31635549310404e-06,
"loss": 0.5374,
"step": 90
},
{
"epoch": 1.509433962264151,
"grad_norm": 0.42349863052368164,
"learning_rate": 4.6327097233772155e-06,
"loss": 0.5595,
"step": 100
},
{
"epoch": 1.509433962264151,
"eval_loss": 0.5344268083572388,
"eval_runtime": 2.3441,
"eval_samples_per_second": 47.779,
"eval_steps_per_second": 2.986,
"step": 100
},
{
"epoch": 1.6603773584905661,
"grad_norm": 0.4312027096748352,
"learning_rate": 3.929406077427486e-06,
"loss": 0.5048,
"step": 110
},
{
"epoch": 1.8113207547169812,
"grad_norm": 0.5000870227813721,
"learning_rate": 3.2282957383620122e-06,
"loss": 0.5256,
"step": 120
},
{
"epoch": 1.9622641509433962,
"grad_norm": 0.46476179361343384,
"learning_rate": 2.5511617446812707e-06,
"loss": 0.5328,
"step": 130
},
{
"epoch": 2.1132075471698113,
"grad_norm": 0.36232122778892517,
"learning_rate": 1.9190422055564713e-06,
"loss": 0.4995,
"step": 140
},
{
"epoch": 2.2641509433962264,
"grad_norm": 0.3772554397583008,
"learning_rate": 1.3515766605727983e-06,
"loss": 0.4822,
"step": 150
},
{
"epoch": 2.4150943396226414,
"grad_norm": 0.4370742738246918,
"learning_rate": 8.663958921125897e-07,
"loss": 0.4785,
"step": 160
},
{
"epoch": 2.5660377358490565,
"grad_norm": 0.46978744864463806,
"learning_rate": 4.785741485076355e-07,
"loss": 0.4779,
"step": 170
},
{
"epoch": 2.7169811320754715,
"grad_norm": 0.4040665030479431,
"learning_rate": 2.0016079702803678e-07,
"loss": 0.5026,
"step": 180
},
{
"epoch": 2.867924528301887,
"grad_norm": 0.3927009701728821,
"learning_rate": 3.9805957942182774e-08,
"loss": 0.4935,
"step": 190
},
{
"epoch": 2.988679245283019,
"step": 198,
"total_flos": 7.194515931031142e+16,
"train_loss": 0.6248175543968124,
"train_runtime": 424.9337,
"train_samples_per_second": 14.953,
"train_steps_per_second": 0.466
}
],
"logging_steps": 10,
"max_steps": 198,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 7.194515931031142e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}