{ "best_metric": 0.09626218676567078, "best_model_checkpoint": "checkpoint/checkpoint-300000", "epoch": 0.8551077008149176, "global_step": 300000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 1.942992819945672e-05, "loss": 0.0499, "step": 10000 }, { "epoch": 0.06, "learning_rate": 1.8859856398913445e-05, "loss": 0.0274, "step": 20000 }, { "epoch": 0.09, "learning_rate": 1.8289784598370164e-05, "loss": 0.0244, "step": 30000 }, { "epoch": 0.11, "learning_rate": 1.7719712797826888e-05, "loss": 0.0218, "step": 40000 }, { "epoch": 0.14, "learning_rate": 1.714964099728361e-05, "loss": 0.0208, "step": 50000 }, { "epoch": 0.14, "eval_accuracy": 0.9703630265572595, "eval_f1": 0.9697362965196742, "eval_loss": 0.19255024194717407, "eval_precision": 0.9906862367555316, "eval_recall": 0.949654059527074, "eval_runtime": 7138.3308, "eval_samples_per_second": 53.13, "eval_steps_per_second": 6.641, "step": 50000 }, { "epoch": 0.17, "learning_rate": 1.657956919674033e-05, "loss": 0.018, "step": 60000 }, { "epoch": 0.2, "learning_rate": 1.600949739619705e-05, "loss": 0.0173, "step": 70000 }, { "epoch": 0.23, "learning_rate": 1.5439425595653774e-05, "loss": 0.0182, "step": 80000 }, { "epoch": 0.26, "learning_rate": 1.4869353795110495e-05, "loss": 0.0168, "step": 90000 }, { "epoch": 0.29, "learning_rate": 1.4299281994567218e-05, "loss": 0.0152, "step": 100000 }, { "epoch": 0.29, "eval_accuracy": 0.9694269833568883, "eval_f1": 0.9686356296001752, "eval_loss": 0.26416775584220886, "eval_precision": 0.9943740662782754, "eval_recall": 0.9441960048094163, "eval_runtime": 7120.7469, "eval_samples_per_second": 53.261, "eval_steps_per_second": 6.658, "step": 100000 }, { "epoch": 0.31, "learning_rate": 1.3729210194023938e-05, "loss": 0.0167, "step": 110000 }, { "epoch": 0.34, "learning_rate": 1.315913839348066e-05, "loss": 0.0143, "step": 120000 }, { "epoch": 0.37, "learning_rate": 1.2589066592937383e-05, "loss": 0.0151, "step": 130000 }, { "epoch": 0.4, "learning_rate": 1.2018994792394103e-05, "loss": 0.0127, "step": 140000 }, { "epoch": 0.43, "learning_rate": 1.1448922991850824e-05, "loss": 0.0129, "step": 150000 }, { "epoch": 0.43, "eval_accuracy": 0.9814399772185542, "eval_f1": 0.9811897565811801, "eval_loss": 0.14654095470905304, "eval_precision": 0.9945986358440376, "eval_recall": 0.9681376168076444, "eval_runtime": 7098.6821, "eval_samples_per_second": 53.426, "eval_steps_per_second": 6.678, "step": 150000 }, { "epoch": 0.46, "learning_rate": 1.0878851191307547e-05, "loss": 0.0166, "step": 160000 }, { "epoch": 0.48, "learning_rate": 1.0308779390764267e-05, "loss": 0.0133, "step": 170000 }, { "epoch": 0.51, "learning_rate": 9.738707590220989e-06, "loss": 0.0117, "step": 180000 }, { "epoch": 0.54, "learning_rate": 9.168635789677712e-06, "loss": 0.0104, "step": 190000 }, { "epoch": 0.57, "learning_rate": 8.598563989134432e-06, "loss": 0.0111, "step": 200000 }, { "epoch": 0.57, "eval_accuracy": 0.9705159575590103, "eval_f1": 0.9696535478373199, "eval_loss": 0.31930962204933167, "eval_precision": 0.9988705619233995, "eval_recall": 0.9420971586474571, "eval_runtime": 7105.0574, "eval_samples_per_second": 53.378, "eval_steps_per_second": 6.672, "step": 200000 }, { "epoch": 0.6, "learning_rate": 8.028492188591153e-06, "loss": 0.0103, "step": 210000 }, { "epoch": 0.63, "learning_rate": 7.4584203880478754e-06, "loss": 0.0113, "step": 220000 }, { "epoch": 0.66, "learning_rate": 6.888348587504596e-06, "loss": 0.0083, "step": 230000 }, { "epoch": 0.68, "learning_rate": 6.3182767869613184e-06, "loss": 0.008, "step": 240000 }, { "epoch": 0.71, "learning_rate": 5.74820498641804e-06, "loss": 0.008, "step": 250000 }, { "epoch": 0.71, "eval_accuracy": 0.976308878435674, "eval_f1": 0.9758237027270649, "eval_loss": 0.20410259068012238, "eval_precision": 0.996225627276573, "eval_recall": 0.9562406395679963, "eval_runtime": 7119.7261, "eval_samples_per_second": 53.268, "eval_steps_per_second": 6.659, "step": 250000 }, { "epoch": 0.74, "learning_rate": 5.178133185874762e-06, "loss": 0.0086, "step": 260000 }, { "epoch": 0.77, "learning_rate": 4.608061385331483e-06, "loss": 0.0067, "step": 270000 }, { "epoch": 0.8, "learning_rate": 4.0379895847882044e-06, "loss": 0.007, "step": 280000 }, { "epoch": 0.83, "learning_rate": 3.467917784244926e-06, "loss": 0.0059, "step": 290000 }, { "epoch": 0.86, "learning_rate": 2.8978459837016474e-06, "loss": 0.0061, "step": 300000 }, { "epoch": 0.86, "eval_accuracy": 0.9880529246735714, "eval_f1": 0.9879724036876487, "eval_loss": 0.09626218676567078, "eval_precision": 0.9946763305164922, "eval_recall": 0.9813582382348598, "eval_runtime": 7136.9095, "eval_samples_per_second": 53.14, "eval_steps_per_second": 6.643, "step": 300000 } ], "max_steps": 350833, "num_train_epochs": 1, "total_flos": 2.7489826686811325e+17, "trial_name": null, "trial_params": null }