{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "global_step": 750000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1e-05, "loss": 3.5986, "step": 1 }, { "epoch": 0.05, "learning_rate": 9.83344888888889e-06, "loss": 0.685, "step": 18750 }, { "epoch": 0.05, "eval_accuracy": 0.5626, "eval_loss": 0.676932692527771, "eval_runtime": 23.4325, "eval_samples_per_second": 426.757, "eval_steps_per_second": 6.7, "step": 18750 }, { "epoch": 0.1, "learning_rate": 9.666853333333333e-06, "loss": 0.6788, "step": 37500 }, { "epoch": 0.1, "eval_accuracy": 0.5642, "eval_loss": 0.6758846640586853, "eval_runtime": 23.4496, "eval_samples_per_second": 426.447, "eval_steps_per_second": 6.695, "step": 37500 }, { "epoch": 0.15, "learning_rate": 9.500266666666668e-06, "loss": 0.6774, "step": 56250 }, { "epoch": 0.15, "eval_accuracy": 0.5681, "eval_loss": 0.6747780442237854, "eval_runtime": 23.6291, "eval_samples_per_second": 423.207, "eval_steps_per_second": 6.644, "step": 56250 }, { "epoch": 0.2, "learning_rate": 9.333671111111113e-06, "loss": 0.6771, "step": 75000 }, { "epoch": 0.2, "eval_accuracy": 0.5748, "eval_loss": 0.6741061806678772, "eval_runtime": 23.3134, "eval_samples_per_second": 428.938, "eval_steps_per_second": 6.734, "step": 75000 }, { "epoch": 0.25, "learning_rate": 9.167075555555556e-06, "loss": 0.6762, "step": 93750 }, { "epoch": 0.25, "eval_accuracy": 0.5738, "eval_loss": 0.6736128926277161, "eval_runtime": 23.6397, "eval_samples_per_second": 423.017, "eval_steps_per_second": 6.641, "step": 93750 }, { "epoch": 0.3, "learning_rate": 9.00048e-06, "loss": 0.6755, "step": 112500 }, { "epoch": 0.3, "eval_accuracy": 0.5736, "eval_loss": 0.6737410426139832, "eval_runtime": 23.3251, "eval_samples_per_second": 428.723, "eval_steps_per_second": 6.731, "step": 112500 }, { "epoch": 0.35, "learning_rate": 8.833866666666667e-06, "loss": 0.6751, "step": 131250 }, { "epoch": 0.35, "eval_accuracy": 0.5731, "eval_loss": 0.6734435558319092, "eval_runtime": 23.3345, "eval_samples_per_second": 428.549, "eval_steps_per_second": 6.728, "step": 131250 }, { "epoch": 0.4, "learning_rate": 8.667271111111112e-06, "loss": 0.6749, "step": 150000 }, { "epoch": 0.4, "eval_accuracy": 0.5714, "eval_loss": 0.6720886826515198, "eval_runtime": 23.4003, "eval_samples_per_second": 427.345, "eval_steps_per_second": 6.709, "step": 150000 }, { "epoch": 0.45, "learning_rate": 8.500666666666666e-06, "loss": 0.6746, "step": 168750 }, { "epoch": 0.45, "eval_accuracy": 0.5767, "eval_loss": 0.6722772717475891, "eval_runtime": 23.5061, "eval_samples_per_second": 425.422, "eval_steps_per_second": 6.679, "step": 168750 }, { "epoch": 0.5, "learning_rate": 8.334071111111112e-06, "loss": 0.6745, "step": 187500 }, { "epoch": 0.5, "eval_accuracy": 0.5756, "eval_loss": 0.6716436147689819, "eval_runtime": 23.4294, "eval_samples_per_second": 426.814, "eval_steps_per_second": 6.701, "step": 187500 }, { "epoch": 0.55, "learning_rate": 8.167475555555557e-06, "loss": 0.6745, "step": 206250 }, { "epoch": 0.55, "eval_accuracy": 0.5785, "eval_loss": 0.6716175675392151, "eval_runtime": 23.3298, "eval_samples_per_second": 428.636, "eval_steps_per_second": 6.73, "step": 206250 }, { "epoch": 0.6, "learning_rate": 8.00088e-06, "loss": 0.6744, "step": 225000 }, { "epoch": 0.6, "eval_accuracy": 0.5721, "eval_loss": 0.6720548272132874, "eval_runtime": 23.2772, "eval_samples_per_second": 429.606, "eval_steps_per_second": 6.745, "step": 225000 }, { "epoch": 0.65, "learning_rate": 7.834284444444445e-06, "loss": 0.674, "step": 243750 }, { "epoch": 0.65, "eval_accuracy": 0.5742, "eval_loss": 0.6714185476303101, "eval_runtime": 23.567, "eval_samples_per_second": 424.321, "eval_steps_per_second": 6.662, "step": 243750 }, { "epoch": 0.7, "learning_rate": 7.66768888888889e-06, "loss": 0.6743, "step": 262500 }, { "epoch": 0.7, "eval_accuracy": 0.5699, "eval_loss": 0.6723877191543579, "eval_runtime": 23.1824, "eval_samples_per_second": 431.361, "eval_steps_per_second": 6.772, "step": 262500 }, { "epoch": 0.75, "learning_rate": 7.501084444444445e-06, "loss": 0.6737, "step": 281250 }, { "epoch": 0.75, "eval_accuracy": 0.5775, "eval_loss": 0.6708235144615173, "eval_runtime": 23.3586, "eval_samples_per_second": 428.108, "eval_steps_per_second": 6.721, "step": 281250 }, { "epoch": 0.8, "learning_rate": 7.33448888888889e-06, "loss": 0.6736, "step": 300000 }, { "epoch": 0.8, "eval_accuracy": 0.5749, "eval_loss": 0.6718671917915344, "eval_runtime": 23.412, "eval_samples_per_second": 427.132, "eval_steps_per_second": 6.706, "step": 300000 }, { "epoch": 0.85, "learning_rate": 7.167893333333334e-06, "loss": 0.6737, "step": 318750 }, { "epoch": 0.85, "eval_accuracy": 0.5756, "eval_loss": 0.6709334254264832, "eval_runtime": 23.5575, "eval_samples_per_second": 424.493, "eval_steps_per_second": 6.665, "step": 318750 }, { "epoch": 0.9, "learning_rate": 7.001297777777778e-06, "loss": 0.6736, "step": 337500 }, { "epoch": 0.9, "eval_accuracy": 0.5725, "eval_loss": 0.6705034375190735, "eval_runtime": 23.4245, "eval_samples_per_second": 426.903, "eval_steps_per_second": 6.702, "step": 337500 }, { "epoch": 0.95, "learning_rate": 6.834711111111112e-06, "loss": 0.6736, "step": 356250 }, { "epoch": 0.95, "eval_accuracy": 0.5734, "eval_loss": 0.6707730293273926, "eval_runtime": 23.4532, "eval_samples_per_second": 426.381, "eval_steps_per_second": 6.694, "step": 356250 }, { "epoch": 1.0, "learning_rate": 6.6681155555555566e-06, "loss": 0.6731, "step": 375000 }, { "epoch": 1.0, "eval_accuracy": 0.5765, "eval_loss": 0.6712862253189087, "eval_runtime": 23.3551, "eval_samples_per_second": 428.173, "eval_steps_per_second": 6.722, "step": 375000 }, { "epoch": 1.05, "learning_rate": 6.50152e-06, "loss": 0.6704, "step": 393750 }, { "epoch": 1.05, "eval_accuracy": 0.5772, "eval_loss": 0.6697613000869751, "eval_runtime": 23.5543, "eval_samples_per_second": 424.551, "eval_steps_per_second": 6.665, "step": 393750 }, { "epoch": 1.1, "learning_rate": 6.334924444444444e-06, "loss": 0.6703, "step": 412500 }, { "epoch": 1.1, "eval_accuracy": 0.5747, "eval_loss": 0.6700881719589233, "eval_runtime": 23.557, "eval_samples_per_second": 424.502, "eval_steps_per_second": 6.665, "step": 412500 }, { "epoch": 1.15, "learning_rate": 6.1683288888888896e-06, "loss": 0.6703, "step": 431250 }, { "epoch": 1.15, "eval_accuracy": 0.578, "eval_loss": 0.669475257396698, "eval_runtime": 23.4497, "eval_samples_per_second": 426.445, "eval_steps_per_second": 6.695, "step": 431250 }, { "epoch": 1.2, "learning_rate": 6.001733333333334e-06, "loss": 0.6705, "step": 450000 }, { "epoch": 1.2, "eval_accuracy": 0.577, "eval_loss": 0.6688240170478821, "eval_runtime": 23.5829, "eval_samples_per_second": 424.035, "eval_steps_per_second": 6.657, "step": 450000 }, { "epoch": 1.25, "learning_rate": 5.8351466666666665e-06, "loss": 0.6703, "step": 468750 }, { "epoch": 1.25, "eval_accuracy": 0.5795, "eval_loss": 0.6692911982536316, "eval_runtime": 23.4227, "eval_samples_per_second": 426.936, "eval_steps_per_second": 6.703, "step": 468750 }, { "epoch": 1.3, "learning_rate": 5.668551111111112e-06, "loss": 0.6704, "step": 487500 }, { "epoch": 1.3, "eval_accuracy": 0.5783, "eval_loss": 0.6689985394477844, "eval_runtime": 23.436, "eval_samples_per_second": 426.694, "eval_steps_per_second": 6.699, "step": 487500 }, { "epoch": 1.35, "learning_rate": 5.501955555555556e-06, "loss": 0.6703, "step": 506250 }, { "epoch": 1.35, "eval_accuracy": 0.5783, "eval_loss": 0.6690422296524048, "eval_runtime": 23.5352, "eval_samples_per_second": 424.896, "eval_steps_per_second": 6.671, "step": 506250 }, { "epoch": 1.4, "learning_rate": 5.33536e-06, "loss": 0.6705, "step": 525000 }, { "epoch": 1.4, "eval_accuracy": 0.5785, "eval_loss": 0.6693896055221558, "eval_runtime": 23.5799, "eval_samples_per_second": 424.09, "eval_steps_per_second": 6.658, "step": 525000 }, { "epoch": 1.45, "learning_rate": 5.168782222222223e-06, "loss": 0.6705, "step": 543750 }, { "epoch": 1.45, "eval_accuracy": 0.5806, "eval_loss": 0.6683958768844604, "eval_runtime": 23.4942, "eval_samples_per_second": 425.637, "eval_steps_per_second": 6.683, "step": 543750 }, { "epoch": 1.5, "learning_rate": 5.002186666666667e-06, "loss": 0.6704, "step": 562500 }, { "epoch": 1.5, "eval_accuracy": 0.5792, "eval_loss": 0.6692180633544922, "eval_runtime": 23.4522, "eval_samples_per_second": 426.4, "eval_steps_per_second": 6.694, "step": 562500 }, { "epoch": 1.55, "learning_rate": 4.8356e-06, "loss": 0.6706, "step": 581250 }, { "epoch": 1.55, "eval_accuracy": 0.5787, "eval_loss": 0.6688229441642761, "eval_runtime": 23.3586, "eval_samples_per_second": 428.109, "eval_steps_per_second": 6.721, "step": 581250 }, { "epoch": 1.6, "learning_rate": 4.669004444444445e-06, "loss": 0.6706, "step": 600000 }, { "epoch": 1.6, "eval_accuracy": 0.5814, "eval_loss": 0.6699367165565491, "eval_runtime": 23.5906, "eval_samples_per_second": 423.898, "eval_steps_per_second": 6.655, "step": 600000 }, { "epoch": 1.65, "learning_rate": 4.5024177777777786e-06, "loss": 0.6702, "step": 618750 }, { "epoch": 1.65, "eval_accuracy": 0.5766, "eval_loss": 0.669127881526947, "eval_runtime": 23.4357, "eval_samples_per_second": 426.7, "eval_steps_per_second": 6.699, "step": 618750 }, { "epoch": 1.7, "learning_rate": 4.335804444444445e-06, "loss": 0.6702, "step": 637500 }, { "epoch": 1.7, "eval_accuracy": 0.5782, "eval_loss": 0.6684728264808655, "eval_runtime": 23.4934, "eval_samples_per_second": 425.651, "eval_steps_per_second": 6.683, "step": 637500 }, { "epoch": 1.75, "learning_rate": 4.169217777777778e-06, "loss": 0.67, "step": 656250 }, { "epoch": 1.75, "eval_accuracy": 0.5817, "eval_loss": 0.6683481931686401, "eval_runtime": 23.6396, "eval_samples_per_second": 423.02, "eval_steps_per_second": 6.641, "step": 656250 }, { "epoch": 1.8, "learning_rate": 4.002613333333334e-06, "loss": 0.6697, "step": 675000 }, { "epoch": 1.8, "eval_accuracy": 0.582, "eval_loss": 0.667320966720581, "eval_runtime": 23.7429, "eval_samples_per_second": 421.178, "eval_steps_per_second": 6.612, "step": 675000 }, { "epoch": 1.85, "learning_rate": 3.836026666666667e-06, "loss": 0.6703, "step": 693750 }, { "epoch": 1.85, "eval_accuracy": 0.581, "eval_loss": 0.6678736209869385, "eval_runtime": 23.3426, "eval_samples_per_second": 428.401, "eval_steps_per_second": 6.726, "step": 693750 }, { "epoch": 1.9, "learning_rate": 3.6694400000000002e-06, "loss": 0.6697, "step": 712500 }, { "epoch": 1.9, "eval_accuracy": 0.5854, "eval_loss": 0.6670705080032349, "eval_runtime": 23.4267, "eval_samples_per_second": 426.862, "eval_steps_per_second": 6.702, "step": 712500 }, { "epoch": 1.95, "learning_rate": 3.502835555555556e-06, "loss": 0.6697, "step": 731250 }, { "epoch": 1.95, "eval_accuracy": 0.5837, "eval_loss": 0.6675453186035156, "eval_runtime": 23.4269, "eval_samples_per_second": 426.859, "eval_steps_per_second": 6.702, "step": 731250 }, { "epoch": 2.0, "learning_rate": 3.33624e-06, "loss": 0.6702, "step": 750000 }, { "epoch": 2.0, "eval_accuracy": 0.5857, "eval_loss": 0.6678363084793091, "eval_runtime": 23.4622, "eval_samples_per_second": 426.218, "eval_steps_per_second": 6.692, "step": 750000 } ], "max_steps": 1125000, "num_train_epochs": 3, "total_flos": 4.4577934226915066e+19, "trial_name": null, "trial_params": null }