|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.054945054945054944, |
|
"eval_steps": 500, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.001098901098901099, |
|
"grad_norm": 4.387043476104736, |
|
"learning_rate": 5.4945054945054946e-08, |
|
"loss": 0.8831, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.002197802197802198, |
|
"grad_norm": 3.8907365798950195, |
|
"learning_rate": 1.0989010989010989e-07, |
|
"loss": 0.8866, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0032967032967032967, |
|
"grad_norm": 3.2492220401763916, |
|
"learning_rate": 1.6483516483516484e-07, |
|
"loss": 0.8454, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.004395604395604396, |
|
"grad_norm": 3.0804762840270996, |
|
"learning_rate": 2.1978021978021978e-07, |
|
"loss": 0.8267, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.005494505494505495, |
|
"grad_norm": 2.037411689758301, |
|
"learning_rate": 2.7472527472527475e-07, |
|
"loss": 0.7371, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.006593406593406593, |
|
"grad_norm": 1.5473365783691406, |
|
"learning_rate": 3.296703296703297e-07, |
|
"loss": 0.6765, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.007692307692307693, |
|
"grad_norm": 1.1062999963760376, |
|
"learning_rate": 3.846153846153847e-07, |
|
"loss": 0.5946, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.008791208791208791, |
|
"grad_norm": 1.363224744796753, |
|
"learning_rate": 4.3956043956043957e-07, |
|
"loss": 0.5402, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.00989010989010989, |
|
"grad_norm": 0.9122905731201172, |
|
"learning_rate": 4.945054945054946e-07, |
|
"loss": 0.464, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.01098901098901099, |
|
"grad_norm": 0.676691472530365, |
|
"learning_rate": 5.494505494505495e-07, |
|
"loss": 0.4041, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.012087912087912088, |
|
"grad_norm": 0.5926629900932312, |
|
"learning_rate": 6.043956043956044e-07, |
|
"loss": 0.3727, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.013186813186813187, |
|
"grad_norm": 0.635013997554779, |
|
"learning_rate": 6.593406593406594e-07, |
|
"loss": 0.3609, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.014285714285714285, |
|
"grad_norm": 0.5836207270622253, |
|
"learning_rate": 7.142857142857143e-07, |
|
"loss": 0.3331, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.015384615384615385, |
|
"grad_norm": 0.548773467540741, |
|
"learning_rate": 7.692307692307694e-07, |
|
"loss": 0.3201, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.016483516483516484, |
|
"grad_norm": 0.5962342023849487, |
|
"learning_rate": 8.241758241758242e-07, |
|
"loss": 0.2993, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.017582417582417582, |
|
"grad_norm": 0.5346819162368774, |
|
"learning_rate": 8.791208791208791e-07, |
|
"loss": 0.2792, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.01868131868131868, |
|
"grad_norm": 0.569210946559906, |
|
"learning_rate": 9.340659340659341e-07, |
|
"loss": 0.273, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.01978021978021978, |
|
"grad_norm": 0.5142342448234558, |
|
"learning_rate": 9.890109890109891e-07, |
|
"loss": 0.2621, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.020879120879120878, |
|
"grad_norm": 0.5067290663719177, |
|
"learning_rate": 1.043956043956044e-06, |
|
"loss": 0.2641, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.02197802197802198, |
|
"grad_norm": 0.44699764251708984, |
|
"learning_rate": 1.098901098901099e-06, |
|
"loss": 0.2553, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.023076923076923078, |
|
"grad_norm": 0.5279501080513, |
|
"learning_rate": 1.153846153846154e-06, |
|
"loss": 0.2488, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.024175824175824177, |
|
"grad_norm": 0.5009133219718933, |
|
"learning_rate": 1.2087912087912089e-06, |
|
"loss": 0.2499, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.025274725274725275, |
|
"grad_norm": 0.5484806895256042, |
|
"learning_rate": 1.263736263736264e-06, |
|
"loss": 0.2485, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.026373626373626374, |
|
"grad_norm": 0.44114941358566284, |
|
"learning_rate": 1.3186813186813187e-06, |
|
"loss": 0.2499, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.027472527472527472, |
|
"grad_norm": 0.5557438731193542, |
|
"learning_rate": 1.3736263736263736e-06, |
|
"loss": 0.2455, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02857142857142857, |
|
"grad_norm": 0.5493007898330688, |
|
"learning_rate": 1.4285714285714286e-06, |
|
"loss": 0.2396, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.02967032967032967, |
|
"grad_norm": 0.5284810662269592, |
|
"learning_rate": 1.4835164835164835e-06, |
|
"loss": 0.2383, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.03076923076923077, |
|
"grad_norm": 0.5073139667510986, |
|
"learning_rate": 1.5384615384615387e-06, |
|
"loss": 0.2389, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.031868131868131866, |
|
"grad_norm": 0.5186975002288818, |
|
"learning_rate": 1.5934065934065933e-06, |
|
"loss": 0.2372, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.03296703296703297, |
|
"grad_norm": 0.5739095211029053, |
|
"learning_rate": 1.6483516483516484e-06, |
|
"loss": 0.2358, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.03406593406593406, |
|
"grad_norm": 0.5144438147544861, |
|
"learning_rate": 1.7032967032967032e-06, |
|
"loss": 0.2319, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.035164835164835165, |
|
"grad_norm": 0.4886190593242645, |
|
"learning_rate": 1.7582417582417583e-06, |
|
"loss": 0.2297, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.03626373626373627, |
|
"grad_norm": 0.6088211536407471, |
|
"learning_rate": 1.8131868131868135e-06, |
|
"loss": 0.2356, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.03736263736263736, |
|
"grad_norm": 0.4712292551994324, |
|
"learning_rate": 1.8681318681318681e-06, |
|
"loss": 0.2334, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.038461538461538464, |
|
"grad_norm": 0.5712177157402039, |
|
"learning_rate": 1.9230769230769234e-06, |
|
"loss": 0.2302, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.03956043956043956, |
|
"grad_norm": 0.5427699089050293, |
|
"learning_rate": 1.9780219780219782e-06, |
|
"loss": 0.2248, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.04065934065934066, |
|
"grad_norm": 0.6642568707466125, |
|
"learning_rate": 2.032967032967033e-06, |
|
"loss": 0.2291, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.041758241758241756, |
|
"grad_norm": 0.5859007239341736, |
|
"learning_rate": 2.087912087912088e-06, |
|
"loss": 0.227, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.04285714285714286, |
|
"grad_norm": 0.6507712602615356, |
|
"learning_rate": 2.142857142857143e-06, |
|
"loss": 0.227, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.04395604395604396, |
|
"grad_norm": 0.5675429105758667, |
|
"learning_rate": 2.197802197802198e-06, |
|
"loss": 0.2259, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.045054945054945054, |
|
"grad_norm": 0.6223055124282837, |
|
"learning_rate": 2.252747252747253e-06, |
|
"loss": 0.2283, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.046153846153846156, |
|
"grad_norm": 0.5504657030105591, |
|
"learning_rate": 2.307692307692308e-06, |
|
"loss": 0.2246, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.04725274725274725, |
|
"grad_norm": 0.48020097613334656, |
|
"learning_rate": 2.3626373626373625e-06, |
|
"loss": 0.225, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.04835164835164835, |
|
"grad_norm": 0.4979713261127472, |
|
"learning_rate": 2.4175824175824177e-06, |
|
"loss": 0.2212, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.04945054945054945, |
|
"grad_norm": 0.49497634172439575, |
|
"learning_rate": 2.4725274725274726e-06, |
|
"loss": 0.2234, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.05054945054945055, |
|
"grad_norm": 0.6207996010780334, |
|
"learning_rate": 2.527472527472528e-06, |
|
"loss": 0.2256, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.051648351648351645, |
|
"grad_norm": 0.530981719493866, |
|
"learning_rate": 2.5824175824175822e-06, |
|
"loss": 0.2231, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.05274725274725275, |
|
"grad_norm": 0.5495067834854126, |
|
"learning_rate": 2.6373626373626375e-06, |
|
"loss": 0.223, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.05384615384615385, |
|
"grad_norm": 0.5651763081550598, |
|
"learning_rate": 2.6923076923076928e-06, |
|
"loss": 0.2213, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.054945054945054944, |
|
"grad_norm": 0.553247332572937, |
|
"learning_rate": 2.747252747252747e-06, |
|
"loss": 0.2219, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 91000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.336356548608e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|