{ "best_metric": 0.03475998714566231, "best_model_checkpoint": "t5/checkpoint-785", "epoch": 8.0, "eval_steps": 500, "global_step": 1256, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.17618757486343384, "learning_rate": 1.9e-05, "loss": 0.0618, "step": 157 }, { "epoch": 1.0, "eval_accuracy": 0.6860511590727418, "eval_loss": 0.041351497173309326, "eval_runtime": 60.885, "eval_samples_per_second": 82.188, "eval_steps_per_second": 0.328, "step": 157 }, { "epoch": 2.0, "grad_norm": 0.18060798943042755, "learning_rate": 1.8e-05, "loss": 0.0423, "step": 314 }, { "epoch": 2.0, "eval_accuracy": 0.707234212629896, "eval_loss": 0.03738662600517273, "eval_runtime": 60.1479, "eval_samples_per_second": 83.195, "eval_steps_per_second": 0.333, "step": 314 }, { "epoch": 3.0, "grad_norm": 0.20145682990550995, "learning_rate": 1.7e-05, "loss": 0.0368, "step": 471 }, { "epoch": 3.0, "eval_accuracy": 0.7208233413269385, "eval_loss": 0.03572960942983627, "eval_runtime": 59.3756, "eval_samples_per_second": 84.277, "eval_steps_per_second": 0.337, "step": 471 }, { "epoch": 4.0, "grad_norm": 0.20047585666179657, "learning_rate": 1.6000000000000003e-05, "loss": 0.0326, "step": 628 }, { "epoch": 4.0, "eval_accuracy": 0.7262190247801759, "eval_loss": 0.03503531962633133, "eval_runtime": 59.034, "eval_samples_per_second": 84.765, "eval_steps_per_second": 0.339, "step": 628 }, { "epoch": 5.0, "grad_norm": 0.1549680233001709, "learning_rate": 1.5000000000000002e-05, "loss": 0.0293, "step": 785 }, { "epoch": 5.0, "eval_accuracy": 0.7294164668265388, "eval_loss": 0.03475998714566231, "eval_runtime": 59.2862, "eval_samples_per_second": 84.404, "eval_steps_per_second": 0.337, "step": 785 }, { "epoch": 6.0, "grad_norm": 0.19214752316474915, "learning_rate": 1.4e-05, "loss": 0.0266, "step": 942 }, { "epoch": 6.0, "eval_accuracy": 0.7352118305355716, "eval_loss": 0.0347641222178936, "eval_runtime": 59.4378, "eval_samples_per_second": 84.189, "eval_steps_per_second": 0.336, "step": 942 }, { "epoch": 7.0, "grad_norm": 0.1793193817138672, "learning_rate": 1.3000000000000001e-05, "loss": 0.0243, "step": 1099 }, { "epoch": 7.0, "eval_accuracy": 0.7328137490007993, "eval_loss": 0.03488365188241005, "eval_runtime": 59.8565, "eval_samples_per_second": 83.6, "eval_steps_per_second": 0.334, "step": 1099 }, { "epoch": 8.0, "grad_norm": 0.19961176812648773, "learning_rate": 1.2e-05, "loss": 0.0223, "step": 1256 }, { "epoch": 8.0, "eval_accuracy": 0.7356115107913669, "eval_loss": 0.03529239445924759, "eval_runtime": 58.7625, "eval_samples_per_second": 85.156, "eval_steps_per_second": 0.34, "step": 1256 } ], "logging_steps": 500, "max_steps": 3140, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 4.333072619596493e+16, "train_batch_size": 256, "trial_name": null, "trial_params": null }