dlkp_test / trainer_state.json
dmahata's picture
Upload trainer_state.json
7e5f2d9
raw
history blame
No virus
12.5 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"global_step": 2500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.4,
"learning_rate": 2.88e-05,
"loss": 0.3202,
"step": 100
},
{
"epoch": 0.4,
"eval_accuracy": 0.8813009961455675,
"eval_f1": 0.16261325703385787,
"eval_loss": 0.24328218400478363,
"eval_precision": 0.2519394163280384,
"eval_recall": 0.12004928709734201,
"eval_runtime": 4.054,
"eval_samples_per_second": 123.334,
"eval_steps_per_second": 30.833,
"step": 100
},
{
"epoch": 0.8,
"learning_rate": 2.7600000000000003e-05,
"loss": 0.23,
"step": 200
},
{
"epoch": 0.8,
"eval_accuracy": 0.9034639835811182,
"eval_f1": 0.49651100375738055,
"eval_loss": 0.21028906106948853,
"eval_precision": 0.5048208113516464,
"eval_recall": 0.48847033972892095,
"eval_runtime": 4.2572,
"eval_samples_per_second": 117.449,
"eval_steps_per_second": 29.362,
"step": 200
},
{
"epoch": 1.2,
"learning_rate": 2.64e-05,
"loss": 0.2013,
"step": 300
},
{
"epoch": 1.2,
"eval_accuracy": 0.8792611503228713,
"eval_f1": 0.5627305035874741,
"eval_loss": 0.2621181607246399,
"eval_precision": 0.4545060658578856,
"eval_recall": 0.7386023587396585,
"eval_runtime": 4.1357,
"eval_samples_per_second": 120.897,
"eval_steps_per_second": 30.224,
"step": 300
},
{
"epoch": 1.6,
"learning_rate": 2.52e-05,
"loss": 0.1874,
"step": 400
},
{
"epoch": 1.6,
"eval_accuracy": 0.893452470340892,
"eval_f1": 0.5619149696320114,
"eval_loss": 0.2326020449399948,
"eval_precision": 0.47293721433726243,
"eval_recall": 0.6921316669600422,
"eval_runtime": 4.241,
"eval_samples_per_second": 117.897,
"eval_steps_per_second": 29.474,
"step": 400
},
{
"epoch": 2.0,
"learning_rate": 2.4e-05,
"loss": 0.1847,
"step": 500
},
{
"epoch": 2.0,
"eval_accuracy": 0.9060920058066777,
"eval_f1": 0.5619039721369932,
"eval_loss": 0.20794397592544556,
"eval_precision": 0.5312010034493572,
"eval_recall": 0.5963738778384088,
"eval_runtime": 4.124,
"eval_samples_per_second": 121.24,
"eval_steps_per_second": 30.31,
"step": 500
},
{
"epoch": 2.4,
"learning_rate": 2.2800000000000002e-05,
"loss": 0.1567,
"step": 600
},
{
"epoch": 2.4,
"eval_accuracy": 0.9071932722631025,
"eval_f1": 0.503008186211658,
"eval_loss": 0.23015139997005463,
"eval_precision": 0.5720053835800808,
"eval_recall": 0.4488646365076571,
"eval_runtime": 4.1201,
"eval_samples_per_second": 121.355,
"eval_steps_per_second": 30.339,
"step": 600
},
{
"epoch": 2.8,
"learning_rate": 2.16e-05,
"loss": 0.1484,
"step": 700
},
{
"epoch": 2.8,
"eval_accuracy": 0.9038269009360765,
"eval_f1": 0.5580642412882338,
"eval_loss": 0.22997109591960907,
"eval_precision": 0.540785997357992,
"eval_recall": 0.5764830135539518,
"eval_runtime": 4.134,
"eval_samples_per_second": 120.948,
"eval_steps_per_second": 30.237,
"step": 700
},
{
"epoch": 3.2,
"learning_rate": 2.04e-05,
"loss": 0.1388,
"step": 800
},
{
"epoch": 3.2,
"eval_accuracy": 0.9075561896180607,
"eval_f1": 0.5364304509572634,
"eval_loss": 0.2365296632051468,
"eval_precision": 0.5535580524344569,
"eval_recall": 0.5203309276535821,
"eval_runtime": 4.1587,
"eval_samples_per_second": 120.229,
"eval_steps_per_second": 30.057,
"step": 800
},
{
"epoch": 3.6,
"learning_rate": 1.9200000000000003e-05,
"loss": 0.1191,
"step": 900
},
{
"epoch": 3.6,
"eval_accuracy": 0.9067302397757421,
"eval_f1": 0.574726200505476,
"eval_loss": 0.26086461544036865,
"eval_precision": 0.5511391177896268,
"eval_recall": 0.6004224608343601,
"eval_runtime": 4.1544,
"eval_samples_per_second": 120.354,
"eval_steps_per_second": 30.088,
"step": 900
},
{
"epoch": 4.0,
"learning_rate": 1.8e-05,
"loss": 0.1193,
"step": 1000
},
{
"epoch": 4.0,
"eval_accuracy": 0.9059543474996246,
"eval_f1": 0.5809305373525557,
"eval_loss": 0.25283825397491455,
"eval_precision": 0.543281752719473,
"eval_recall": 0.6241858827671185,
"eval_runtime": 4.1474,
"eval_samples_per_second": 120.557,
"eval_steps_per_second": 30.139,
"step": 1000
},
{
"epoch": 4.4,
"learning_rate": 1.6800000000000002e-05,
"loss": 0.088,
"step": 1100
},
{
"epoch": 4.4,
"eval_accuracy": 0.9037142714121239,
"eval_f1": 0.5845009103142563,
"eval_loss": 0.2839806079864502,
"eval_precision": 0.5310701956271576,
"eval_recall": 0.6498855835240275,
"eval_runtime": 4.1556,
"eval_samples_per_second": 120.318,
"eval_steps_per_second": 30.08,
"step": 1100
},
{
"epoch": 4.8,
"learning_rate": 1.56e-05,
"loss": 0.0924,
"step": 1200
},
{
"epoch": 4.8,
"eval_accuracy": 0.9085197977674325,
"eval_f1": 0.5776627856834843,
"eval_loss": 0.27629220485687256,
"eval_precision": 0.5662833953331079,
"eval_recall": 0.5895088892800563,
"eval_runtime": 4.1675,
"eval_samples_per_second": 119.975,
"eval_steps_per_second": 29.994,
"step": 1200
},
{
"epoch": 5.2,
"learning_rate": 1.44e-05,
"loss": 0.0834,
"step": 1300
},
{
"epoch": 5.2,
"eval_accuracy": 0.9037267858036743,
"eval_f1": 0.5866475003992974,
"eval_loss": 0.332010954618454,
"eval_precision": 0.5369098085075281,
"eval_recall": 0.6465411019186763,
"eval_runtime": 4.1738,
"eval_samples_per_second": 119.795,
"eval_steps_per_second": 29.949,
"step": 1300
},
{
"epoch": 5.6,
"learning_rate": 1.32e-05,
"loss": 0.0654,
"step": 1400
},
{
"epoch": 5.6,
"eval_accuracy": 0.9057416028432698,
"eval_f1": 0.574710687542546,
"eval_loss": 0.32423922419548035,
"eval_precision": 0.5562510294844342,
"eval_recall": 0.5944375990142581,
"eval_runtime": 4.1539,
"eval_samples_per_second": 120.368,
"eval_steps_per_second": 30.092,
"step": 1400
},
{
"epoch": 6.0,
"learning_rate": 1.2e-05,
"loss": 0.0689,
"step": 1500
},
{
"epoch": 6.0,
"eval_accuracy": 0.9046403363868448,
"eval_f1": 0.5581112750629285,
"eval_loss": 0.31789475679397583,
"eval_precision": 0.550513698630137,
"eval_recall": 0.5659214926949481,
"eval_runtime": 4.1716,
"eval_samples_per_second": 119.859,
"eval_steps_per_second": 29.965,
"step": 1500
},
{
"epoch": 6.4,
"learning_rate": 1.08e-05,
"loss": 0.0498,
"step": 1600
},
{
"epoch": 6.4,
"eval_accuracy": 0.9053661710967613,
"eval_f1": 0.5820808768579258,
"eval_loss": 0.38915345072746277,
"eval_precision": 0.5509273813266269,
"eval_recall": 0.6169688435134659,
"eval_runtime": 4.1814,
"eval_samples_per_second": 119.577,
"eval_steps_per_second": 29.894,
"step": 1600
},
{
"epoch": 6.8,
"learning_rate": 9.600000000000001e-06,
"loss": 0.0528,
"step": 1700
},
{
"epoch": 6.8,
"eval_accuracy": 0.9048155378685488,
"eval_f1": 0.5776866283839212,
"eval_loss": 0.3601633608341217,
"eval_precision": 0.5409433092640958,
"eval_recall": 0.619785249075867,
"eval_runtime": 4.1473,
"eval_samples_per_second": 120.56,
"eval_steps_per_second": 30.14,
"step": 1700
},
{
"epoch": 7.2,
"learning_rate": 8.400000000000001e-06,
"loss": 0.0474,
"step": 1800
},
{
"epoch": 7.2,
"eval_accuracy": 0.9040396455924313,
"eval_f1": 0.5793253173012691,
"eval_loss": 0.39758625626564026,
"eval_precision": 0.5510722795869738,
"eval_recall": 0.6106319309980637,
"eval_runtime": 4.1737,
"eval_samples_per_second": 119.798,
"eval_steps_per_second": 29.949,
"step": 1800
},
{
"epoch": 7.6,
"learning_rate": 7.2e-06,
"loss": 0.039,
"step": 1900
},
{
"epoch": 7.6,
"eval_accuracy": 0.9035766131050709,
"eval_f1": 0.5778368499750789,
"eval_loss": 0.4138449728488922,
"eval_precision": 0.5471134182790625,
"eval_recall": 0.6122161591269143,
"eval_runtime": 4.1525,
"eval_samples_per_second": 120.408,
"eval_steps_per_second": 30.102,
"step": 1900
},
{
"epoch": 8.0,
"learning_rate": 6e-06,
"loss": 0.0446,
"step": 2000
},
{
"epoch": 8.0,
"eval_accuracy": 0.9039520448515793,
"eval_f1": 0.5882447535579319,
"eval_loss": 0.408151775598526,
"eval_precision": 0.5414446417998816,
"eval_recall": 0.6439007217039253,
"eval_runtime": 4.1562,
"eval_samples_per_second": 120.303,
"eval_steps_per_second": 30.076,
"step": 2000
},
{
"epoch": 8.4,
"learning_rate": 4.800000000000001e-06,
"loss": 0.0333,
"step": 2100
},
{
"epoch": 8.4,
"eval_accuracy": 0.9046528507783952,
"eval_f1": 0.5720617062984743,
"eval_loss": 0.4318484365940094,
"eval_precision": 0.5545274289491078,
"eval_recall": 0.5907410667136067,
"eval_runtime": 4.1724,
"eval_samples_per_second": 119.834,
"eval_steps_per_second": 29.959,
"step": 2100
},
{
"epoch": 8.8,
"learning_rate": 3.6e-06,
"loss": 0.0327,
"step": 2200
},
{
"epoch": 8.8,
"eval_accuracy": 0.9054913150122641,
"eval_f1": 0.5734657499363381,
"eval_loss": 0.4232546091079712,
"eval_precision": 0.5537704918032786,
"eval_recall": 0.5946136243619081,
"eval_runtime": 4.1536,
"eval_samples_per_second": 120.378,
"eval_steps_per_second": 30.095,
"step": 2200
},
{
"epoch": 9.2,
"learning_rate": 2.4000000000000003e-06,
"loss": 0.03,
"step": 2300
},
{
"epoch": 9.2,
"eval_accuracy": 0.9049782249587025,
"eval_f1": 0.5769523005487548,
"eval_loss": 0.44003215432167053,
"eval_precision": 0.5543478260869565,
"eval_recall": 0.6014786129202605,
"eval_runtime": 4.2605,
"eval_samples_per_second": 117.358,
"eval_steps_per_second": 29.339,
"step": 2300
},
{
"epoch": 9.6,
"learning_rate": 1.2000000000000002e-06,
"loss": 0.0286,
"step": 2400
},
{
"epoch": 9.6,
"eval_accuracy": 0.9048280522600991,
"eval_f1": 0.5807528586929305,
"eval_loss": 0.4442707598209381,
"eval_precision": 0.5522222222222222,
"eval_recall": 0.6123921844745643,
"eval_runtime": 4.3652,
"eval_samples_per_second": 114.542,
"eval_steps_per_second": 28.636,
"step": 2400
},
{
"epoch": 10.0,
"learning_rate": 0.0,
"loss": 0.0261,
"step": 2500
},
{
"epoch": 10.0,
"eval_accuracy": 0.9050407969164539,
"eval_f1": 0.5811535881958416,
"eval_loss": 0.4490407407283783,
"eval_precision": 0.5548263166319833,
"eval_recall": 0.6101038549551135,
"eval_runtime": 4.2364,
"eval_samples_per_second": 118.025,
"eval_steps_per_second": 29.506,
"step": 2500
},
{
"epoch": 10.0,
"step": 2500,
"total_flos": 2612991191040000.0,
"train_loss": 0.10352115373611451,
"train_runtime": 364.2147,
"train_samples_per_second": 27.456,
"train_steps_per_second": 6.864
}
],
"max_steps": 2500,
"num_train_epochs": 10,
"total_flos": 2612991191040000.0,
"trial_name": null,
"trial_params": null
}