{ "best_metric": 0.8683206106870229, "best_model_checkpoint": "beit-base-patch16-224-pt22k-ft22k-finetuned-lora-medmnistv2/checkpoint-590", "epoch": 9.898305084745763, "eval_steps": 500, "global_step": 730, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.13559322033898305, "grad_norm": 12.995793342590332, "learning_rate": 0.004945205479452055, "loss": 0.7148, "step": 10 }, { "epoch": 0.2711864406779661, "grad_norm": 1.2968316078186035, "learning_rate": 0.004876712328767123, "loss": 0.612, "step": 20 }, { "epoch": 0.4067796610169492, "grad_norm": 1.6618480682373047, "learning_rate": 0.004808219178082192, "loss": 0.5877, "step": 30 }, { "epoch": 0.5423728813559322, "grad_norm": 5.746555328369141, "learning_rate": 0.00473972602739726, "loss": 0.5768, "step": 40 }, { "epoch": 0.6779661016949152, "grad_norm": 1.3945930004119873, "learning_rate": 0.0046712328767123295, "loss": 0.5698, "step": 50 }, { "epoch": 0.8135593220338984, "grad_norm": 6.072067737579346, "learning_rate": 0.004602739726027398, "loss": 0.5479, "step": 60 }, { "epoch": 0.9491525423728814, "grad_norm": 0.36280161142349243, "learning_rate": 0.004534246575342466, "loss": 0.5947, "step": 70 }, { "epoch": 0.9898305084745763, "eval_accuracy": 0.7423664122137404, "eval_f1": 0.42606790799561883, "eval_loss": 0.5164713263511658, "eval_precision": 0.3711832061068702, "eval_recall": 0.5, "eval_runtime": 2.9766, "eval_samples_per_second": 176.038, "eval_steps_per_second": 11.086, "step": 73 }, { "epoch": 1.0847457627118644, "grad_norm": 3.3023428916931152, "learning_rate": 0.004465753424657534, "loss": 0.5075, "step": 80 }, { "epoch": 1.2203389830508475, "grad_norm": 4.128161430358887, "learning_rate": 0.004397260273972603, "loss": 0.641, "step": 90 }, { "epoch": 1.3559322033898304, "grad_norm": 7.110351085662842, "learning_rate": 0.004328767123287672, "loss": 0.7467, "step": 100 }, { "epoch": 1.4915254237288136, "grad_norm": 0.7752661108970642, "learning_rate": 0.00426027397260274, "loss": 0.5226, "step": 110 }, { "epoch": 1.6271186440677967, "grad_norm": 0.3319437801837921, "learning_rate": 0.004191780821917808, "loss": 0.5337, "step": 120 }, { "epoch": 1.7627118644067796, "grad_norm": 1.7883654832839966, "learning_rate": 0.004123287671232877, "loss": 0.4819, "step": 130 }, { "epoch": 1.8983050847457628, "grad_norm": 0.5220211148262024, "learning_rate": 0.0040547945205479455, "loss": 0.4888, "step": 140 }, { "epoch": 1.993220338983051, "eval_accuracy": 0.8568702290076335, "eval_f1": 0.8151240738562859, "eval_loss": 0.3449835777282715, "eval_precision": 0.8115699404761905, "eval_recall": 0.8189564886222984, "eval_runtime": 3.0609, "eval_samples_per_second": 171.191, "eval_steps_per_second": 10.781, "step": 147 }, { "epoch": 2.0338983050847457, "grad_norm": 5.3052544593811035, "learning_rate": 0.003986301369863014, "loss": 0.4278, "step": 150 }, { "epoch": 2.169491525423729, "grad_norm": 4.525509357452393, "learning_rate": 0.003917808219178082, "loss": 0.4448, "step": 160 }, { "epoch": 2.305084745762712, "grad_norm": 6.519794940948486, "learning_rate": 0.0038493150684931507, "loss": 0.4817, "step": 170 }, { "epoch": 2.440677966101695, "grad_norm": 1.3314294815063477, "learning_rate": 0.0037808219178082194, "loss": 0.4393, "step": 180 }, { "epoch": 2.576271186440678, "grad_norm": 0.6930696368217468, "learning_rate": 0.0037123287671232876, "loss": 0.3842, "step": 190 }, { "epoch": 2.711864406779661, "grad_norm": 3.826169729232788, "learning_rate": 0.0036438356164383563, "loss": 0.4206, "step": 200 }, { "epoch": 2.847457627118644, "grad_norm": 0.5641322135925293, "learning_rate": 0.0035753424657534246, "loss": 0.3684, "step": 210 }, { "epoch": 2.983050847457627, "grad_norm": 3.910162925720215, "learning_rate": 0.0035068493150684932, "loss": 0.4022, "step": 220 }, { "epoch": 2.9966101694915253, "eval_accuracy": 0.833969465648855, "eval_f1": 0.8079106043560151, "eval_loss": 0.42247942090034485, "eval_precision": 0.7914074166251867, "eval_recall": 0.8567361706179186, "eval_runtime": 3.2139, "eval_samples_per_second": 163.043, "eval_steps_per_second": 10.268, "step": 221 }, { "epoch": 3.1186440677966103, "grad_norm": 1.5272403955459595, "learning_rate": 0.0034383561643835615, "loss": 0.4376, "step": 230 }, { "epoch": 3.2542372881355934, "grad_norm": 0.9074549674987793, "learning_rate": 0.00336986301369863, "loss": 0.403, "step": 240 }, { "epoch": 3.389830508474576, "grad_norm": 0.9038354158401489, "learning_rate": 0.0033013698630136984, "loss": 0.3898, "step": 250 }, { "epoch": 3.5254237288135593, "grad_norm": 0.5530111193656921, "learning_rate": 0.003232876712328767, "loss": 0.3958, "step": 260 }, { "epoch": 3.6610169491525424, "grad_norm": 3.4515538215637207, "learning_rate": 0.0031643835616438354, "loss": 0.4354, "step": 270 }, { "epoch": 3.7966101694915255, "grad_norm": 5.728641986846924, "learning_rate": 0.003095890410958904, "loss": 0.4601, "step": 280 }, { "epoch": 3.9322033898305087, "grad_norm": 0.6854091882705688, "learning_rate": 0.0030273972602739723, "loss": 0.4319, "step": 290 }, { "epoch": 4.0, "eval_accuracy": 0.8587786259541985, "eval_f1": 0.8292197382097493, "eval_loss": 0.3599797189235687, "eval_precision": 0.8122691052465915, "eval_recall": 0.8589355422260307, "eval_runtime": 2.983, "eval_samples_per_second": 175.665, "eval_steps_per_second": 11.063, "step": 295 }, { "epoch": 4.067796610169491, "grad_norm": 4.648618221282959, "learning_rate": 0.002958904109589041, "loss": 0.3716, "step": 300 }, { "epoch": 4.203389830508475, "grad_norm": 2.3614566326141357, "learning_rate": 0.0028904109589041093, "loss": 0.423, "step": 310 }, { "epoch": 4.338983050847458, "grad_norm": 1.8763720989227295, "learning_rate": 0.0028219178082191784, "loss": 0.3737, "step": 320 }, { "epoch": 4.47457627118644, "grad_norm": 3.2308576107025146, "learning_rate": 0.002753424657534246, "loss": 0.4613, "step": 330 }, { "epoch": 4.610169491525424, "grad_norm": 1.1305418014526367, "learning_rate": 0.0026849315068493153, "loss": 0.377, "step": 340 }, { "epoch": 4.745762711864407, "grad_norm": 0.8450165390968323, "learning_rate": 0.0026164383561643836, "loss": 0.3981, "step": 350 }, { "epoch": 4.88135593220339, "grad_norm": 0.7082638144493103, "learning_rate": 0.0025479452054794523, "loss": 0.3836, "step": 360 }, { "epoch": 4.989830508474577, "eval_accuracy": 0.851145038167939, "eval_f1": 0.8233035899564225, "eval_loss": 0.36647942662239075, "eval_precision": 0.8054056666076059, "eval_recall": 0.8610492240312292, "eval_runtime": 3.005, "eval_samples_per_second": 174.375, "eval_steps_per_second": 10.982, "step": 368 }, { "epoch": 5.016949152542373, "grad_norm": 0.8164511322975159, "learning_rate": 0.0024794520547945205, "loss": 0.3491, "step": 370 }, { "epoch": 5.1525423728813555, "grad_norm": 2.7462785243988037, "learning_rate": 0.002410958904109589, "loss": 0.4279, "step": 380 }, { "epoch": 5.288135593220339, "grad_norm": 0.1708032637834549, "learning_rate": 0.0023424657534246575, "loss": 0.3868, "step": 390 }, { "epoch": 5.423728813559322, "grad_norm": 0.4240102469921112, "learning_rate": 0.002273972602739726, "loss": 0.416, "step": 400 }, { "epoch": 5.559322033898305, "grad_norm": 0.3024919331073761, "learning_rate": 0.002205479452054795, "loss": 0.3745, "step": 410 }, { "epoch": 5.694915254237288, "grad_norm": 0.4930131733417511, "learning_rate": 0.002136986301369863, "loss": 0.4183, "step": 420 }, { "epoch": 5.830508474576272, "grad_norm": 2.3017737865448, "learning_rate": 0.002068493150684932, "loss": 0.3791, "step": 430 }, { "epoch": 5.966101694915254, "grad_norm": 1.3229153156280518, "learning_rate": 0.002, "loss": 0.3887, "step": 440 }, { "epoch": 5.9932203389830505, "eval_accuracy": 0.8645038167938931, "eval_f1": 0.8382821499206712, "eval_loss": 0.36667701601982117, "eval_precision": 0.8196564265766058, "eval_recall": 0.8748833666571456, "eval_runtime": 3.2223, "eval_samples_per_second": 162.616, "eval_steps_per_second": 10.241, "step": 442 }, { "epoch": 6.101694915254237, "grad_norm": 1.612464189529419, "learning_rate": 0.0019315068493150687, "loss": 0.3626, "step": 450 }, { "epoch": 6.237288135593221, "grad_norm": 0.30710288882255554, "learning_rate": 0.0018630136986301372, "loss": 0.4014, "step": 460 }, { "epoch": 6.372881355932203, "grad_norm": 1.5897902250289917, "learning_rate": 0.0017945205479452057, "loss": 0.3953, "step": 470 }, { "epoch": 6.508474576271187, "grad_norm": 1.6094199419021606, "learning_rate": 0.0017260273972602741, "loss": 0.4189, "step": 480 }, { "epoch": 6.6440677966101696, "grad_norm": 1.078356385231018, "learning_rate": 0.0016575342465753426, "loss": 0.4268, "step": 490 }, { "epoch": 6.779661016949152, "grad_norm": 2.4723339080810547, "learning_rate": 0.001589041095890411, "loss": 0.4025, "step": 500 }, { "epoch": 6.915254237288136, "grad_norm": 1.7719030380249023, "learning_rate": 0.0015205479452054796, "loss": 0.3947, "step": 510 }, { "epoch": 6.996610169491525, "eval_accuracy": 0.8530534351145038, "eval_f1": 0.8282881157569955, "eval_loss": 0.39514508843421936, "eval_precision": 0.8098487236054208, "eval_recall": 0.8744263543749404, "eval_runtime": 2.9797, "eval_samples_per_second": 175.856, "eval_steps_per_second": 11.075, "step": 516 }, { "epoch": 7.0508474576271185, "grad_norm": 0.9901585578918457, "learning_rate": 0.0014520547945205478, "loss": 0.364, "step": 520 }, { "epoch": 7.186440677966102, "grad_norm": 1.4664475917816162, "learning_rate": 0.0013835616438356163, "loss": 0.3738, "step": 530 }, { "epoch": 7.322033898305085, "grad_norm": 1.8693777322769165, "learning_rate": 0.0013150684931506848, "loss": 0.409, "step": 540 }, { "epoch": 7.4576271186440675, "grad_norm": 1.6105625629425049, "learning_rate": 0.0012465753424657534, "loss": 0.373, "step": 550 }, { "epoch": 7.593220338983051, "grad_norm": 1.700050950050354, "learning_rate": 0.001178082191780822, "loss": 0.3899, "step": 560 }, { "epoch": 7.728813559322034, "grad_norm": 0.6655128598213196, "learning_rate": 0.0011095890410958904, "loss": 0.3573, "step": 570 }, { "epoch": 7.864406779661017, "grad_norm": 2.1642274856567383, "learning_rate": 0.0010410958904109589, "loss": 0.3828, "step": 580 }, { "epoch": 8.0, "grad_norm": 1.8888564109802246, "learning_rate": 0.0009726027397260274, "loss": 0.3741, "step": 590 }, { "epoch": 8.0, "eval_accuracy": 0.8683206106870229, "eval_f1": 0.8398298890291713, "eval_loss": 0.34485381841659546, "eval_precision": 0.8229333868378812, "eval_recall": 0.8677806341045415, "eval_runtime": 2.988, "eval_samples_per_second": 175.367, "eval_steps_per_second": 11.044, "step": 590 }, { "epoch": 8.135593220338983, "grad_norm": 1.7316805124282837, "learning_rate": 0.0009041095890410959, "loss": 0.4155, "step": 600 }, { "epoch": 8.271186440677965, "grad_norm": 0.8091392517089844, "learning_rate": 0.0008356164383561645, "loss": 0.3834, "step": 610 }, { "epoch": 8.40677966101695, "grad_norm": 0.47697487473487854, "learning_rate": 0.000767123287671233, "loss": 0.3501, "step": 620 }, { "epoch": 8.542372881355933, "grad_norm": 0.419321209192276, "learning_rate": 0.0006986301369863014, "loss": 0.349, "step": 630 }, { "epoch": 8.677966101694915, "grad_norm": 2.6489877700805664, "learning_rate": 0.0006301369863013699, "loss": 0.3791, "step": 640 }, { "epoch": 8.813559322033898, "grad_norm": 0.7624053359031677, "learning_rate": 0.0005616438356164384, "loss": 0.3655, "step": 650 }, { "epoch": 8.94915254237288, "grad_norm": 0.44931384921073914, "learning_rate": 0.0004931506849315068, "loss": 0.3964, "step": 660 }, { "epoch": 8.989830508474576, "eval_accuracy": 0.8587786259541985, "eval_f1": 0.830509659935309, "eval_loss": 0.3624710738658905, "eval_precision": 0.8127875562832583, "eval_recall": 0.8637722555460345, "eval_runtime": 3.2987, "eval_samples_per_second": 158.852, "eval_steps_per_second": 10.004, "step": 663 }, { "epoch": 9.084745762711865, "grad_norm": 0.7742342948913574, "learning_rate": 0.00042465753424657536, "loss": 0.3609, "step": 670 }, { "epoch": 9.220338983050848, "grad_norm": 0.2361624389886856, "learning_rate": 0.00035616438356164383, "loss": 0.3231, "step": 680 }, { "epoch": 9.35593220338983, "grad_norm": 0.3743881583213806, "learning_rate": 0.00028767123287671236, "loss": 0.3835, "step": 690 }, { "epoch": 9.491525423728813, "grad_norm": 0.3935371935367584, "learning_rate": 0.00021917808219178083, "loss": 0.3503, "step": 700 }, { "epoch": 9.627118644067796, "grad_norm": 0.6624675393104553, "learning_rate": 0.00015068493150684933, "loss": 0.3023, "step": 710 }, { "epoch": 9.76271186440678, "grad_norm": 1.4423840045928955, "learning_rate": 8.21917808219178e-05, "loss": 0.404, "step": 720 }, { "epoch": 9.898305084745763, "grad_norm": 0.639860212802887, "learning_rate": 1.3698630136986302e-05, "loss": 0.3845, "step": 730 }, { "epoch": 9.898305084745763, "eval_accuracy": 0.8568702290076335, "eval_f1": 0.8291712851274695, "eval_loss": 0.3568853735923767, "eval_precision": 0.8111482756381113, "eval_recall": 0.8649052651623346, "eval_runtime": 3.0816, "eval_samples_per_second": 170.043, "eval_steps_per_second": 10.709, "step": 730 }, { "epoch": 9.898305084745763, "step": 730, "total_flos": 3.6354082184735293e+18, "train_loss": 0.42946596570210915, "train_runtime": 617.6458, "train_samples_per_second": 76.225, "train_steps_per_second": 1.182 } ], "logging_steps": 10, "max_steps": 730, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.6354082184735293e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }