{ "best_metric": 0.7726819541375872, "best_model_checkpoint": "beit-base-patch16-224-pt22k-ft22k-finetuned-lora-medmnistv2/checkpoint-1090", "epoch": 9.954337899543379, "eval_steps": 500, "global_step": 1090, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09, "grad_norm": 4.391805171966553, "learning_rate": 0.004954128440366973, "loss": 1.2554, "step": 10 }, { "epoch": 0.18, "grad_norm": 1.9513263702392578, "learning_rate": 0.004908256880733945, "loss": 1.0074, "step": 20 }, { "epoch": 0.27, "grad_norm": 3.8831961154937744, "learning_rate": 0.004862385321100918, "loss": 0.9068, "step": 30 }, { "epoch": 0.37, "grad_norm": 2.7356910705566406, "learning_rate": 0.00481651376146789, "loss": 0.9577, "step": 40 }, { "epoch": 0.46, "grad_norm": 2.5790963172912598, "learning_rate": 0.0047706422018348625, "loss": 0.9124, "step": 50 }, { "epoch": 0.55, "grad_norm": 2.810718297958374, "learning_rate": 0.004724770642201835, "loss": 0.8901, "step": 60 }, { "epoch": 0.64, "grad_norm": 2.3134875297546387, "learning_rate": 0.004678899082568808, "loss": 0.9111, "step": 70 }, { "epoch": 0.73, "grad_norm": 1.8082305192947388, "learning_rate": 0.00463302752293578, "loss": 0.9059, "step": 80 }, { "epoch": 0.82, "grad_norm": 1.4370285272598267, "learning_rate": 0.0045871559633027525, "loss": 0.9984, "step": 90 }, { "epoch": 0.91, "grad_norm": 2.8687102794647217, "learning_rate": 0.004541284403669725, "loss": 0.9135, "step": 100 }, { "epoch": 1.0, "eval_accuracy": 0.7198404785643071, "eval_f1": 0.3049533355905189, "eval_loss": 0.7698342800140381, "eval_precision": 0.5179078489450006, "eval_recall": 0.3103063518754385, "eval_runtime": 5.826, "eval_samples_per_second": 172.159, "eval_steps_per_second": 10.814, "step": 109 }, { "epoch": 1.0, "grad_norm": 1.885165810585022, "learning_rate": 0.004495412844036698, "loss": 0.8622, "step": 110 }, { "epoch": 1.1, "grad_norm": 2.4616353511810303, "learning_rate": 0.0044495412844036695, "loss": 0.9407, "step": 120 }, { "epoch": 1.19, "grad_norm": 1.2185616493225098, "learning_rate": 0.004403669724770643, "loss": 0.8853, "step": 130 }, { "epoch": 1.28, "grad_norm": 2.0212063789367676, "learning_rate": 0.004357798165137615, "loss": 0.8534, "step": 140 }, { "epoch": 1.37, "grad_norm": 1.129385232925415, "learning_rate": 0.004311926605504587, "loss": 0.8613, "step": 150 }, { "epoch": 1.46, "grad_norm": 1.726841926574707, "learning_rate": 0.0042660550458715595, "loss": 0.8104, "step": 160 }, { "epoch": 1.55, "grad_norm": 2.272818088531494, "learning_rate": 0.004220183486238533, "loss": 0.915, "step": 170 }, { "epoch": 1.64, "grad_norm": 1.117639422416687, "learning_rate": 0.004174311926605505, "loss": 0.8753, "step": 180 }, { "epoch": 1.74, "grad_norm": 0.8981102108955383, "learning_rate": 0.004128440366972477, "loss": 0.8429, "step": 190 }, { "epoch": 1.83, "grad_norm": 1.282863974571228, "learning_rate": 0.00408256880733945, "loss": 0.8355, "step": 200 }, { "epoch": 1.92, "grad_norm": 0.9718325734138489, "learning_rate": 0.004036697247706422, "loss": 0.8352, "step": 210 }, { "epoch": 2.0, "eval_accuracy": 0.7298105682951147, "eval_f1": 0.3883733866690214, "eval_loss": 0.7352049350738525, "eval_precision": 0.536225724752637, "eval_recall": 0.42308831430611293, "eval_runtime": 5.7891, "eval_samples_per_second": 173.258, "eval_steps_per_second": 10.883, "step": 219 }, { "epoch": 2.01, "grad_norm": 1.3424675464630127, "learning_rate": 0.003990825688073394, "loss": 0.8943, "step": 220 }, { "epoch": 2.1, "grad_norm": 1.352690577507019, "learning_rate": 0.003944954128440367, "loss": 0.8426, "step": 230 }, { "epoch": 2.19, "grad_norm": 1.743643879890442, "learning_rate": 0.0038990825688073397, "loss": 0.7838, "step": 240 }, { "epoch": 2.28, "grad_norm": 0.9074971675872803, "learning_rate": 0.0038532110091743124, "loss": 0.8546, "step": 250 }, { "epoch": 2.37, "grad_norm": 0.6970580816268921, "learning_rate": 0.0038073394495412843, "loss": 0.8481, "step": 260 }, { "epoch": 2.47, "grad_norm": 1.3498753309249878, "learning_rate": 0.003761467889908257, "loss": 0.766, "step": 270 }, { "epoch": 2.56, "grad_norm": 1.2216682434082031, "learning_rate": 0.0037155963302752293, "loss": 0.8654, "step": 280 }, { "epoch": 2.65, "grad_norm": 1.1758558750152588, "learning_rate": 0.003669724770642202, "loss": 0.8438, "step": 290 }, { "epoch": 2.74, "grad_norm": 0.8012980222702026, "learning_rate": 0.0036238532110091743, "loss": 0.8363, "step": 300 }, { "epoch": 2.83, "grad_norm": 1.5546942949295044, "learning_rate": 0.003577981651376147, "loss": 0.8998, "step": 310 }, { "epoch": 2.92, "grad_norm": 0.9878047704696655, "learning_rate": 0.0035321100917431194, "loss": 0.7891, "step": 320 }, { "epoch": 3.0, "eval_accuracy": 0.7178464606181456, "eval_f1": 0.36668660138047887, "eval_loss": 0.7574967741966248, "eval_precision": 0.3953602798961963, "eval_recall": 0.40001083689608274, "eval_runtime": 5.9834, "eval_samples_per_second": 167.63, "eval_steps_per_second": 10.529, "step": 328 }, { "epoch": 3.01, "grad_norm": 1.59013032913208, "learning_rate": 0.003486238532110092, "loss": 0.8096, "step": 330 }, { "epoch": 3.11, "grad_norm": 0.7891358733177185, "learning_rate": 0.0034403669724770644, "loss": 0.8351, "step": 340 }, { "epoch": 3.2, "grad_norm": 1.1285407543182373, "learning_rate": 0.003394495412844037, "loss": 0.8189, "step": 350 }, { "epoch": 3.29, "grad_norm": 1.166321873664856, "learning_rate": 0.003348623853211009, "loss": 0.7379, "step": 360 }, { "epoch": 3.38, "grad_norm": 0.8681693077087402, "learning_rate": 0.0033027522935779817, "loss": 0.7388, "step": 370 }, { "epoch": 3.47, "grad_norm": 1.2175371646881104, "learning_rate": 0.003256880733944954, "loss": 0.8183, "step": 380 }, { "epoch": 3.56, "grad_norm": 1.4015443325042725, "learning_rate": 0.003211009174311927, "loss": 0.8545, "step": 390 }, { "epoch": 3.65, "grad_norm": 1.3555314540863037, "learning_rate": 0.003165137614678899, "loss": 0.8038, "step": 400 }, { "epoch": 3.74, "grad_norm": 1.3774343729019165, "learning_rate": 0.003119266055045872, "loss": 0.8346, "step": 410 }, { "epoch": 3.84, "grad_norm": 1.0004535913467407, "learning_rate": 0.003073394495412844, "loss": 0.7919, "step": 420 }, { "epoch": 3.93, "grad_norm": 0.7557776570320129, "learning_rate": 0.003027522935779817, "loss": 0.7649, "step": 430 }, { "epoch": 4.0, "eval_accuracy": 0.7417746759720838, "eval_f1": 0.41456723287460584, "eval_loss": 0.6878895163536072, "eval_precision": 0.5009001421261136, "eval_recall": 0.3971855759911965, "eval_runtime": 5.8982, "eval_samples_per_second": 170.052, "eval_steps_per_second": 10.681, "step": 438 }, { "epoch": 4.02, "grad_norm": 0.8360877633094788, "learning_rate": 0.002981651376146789, "loss": 0.7322, "step": 440 }, { "epoch": 4.11, "grad_norm": 1.399274468421936, "learning_rate": 0.002935779816513762, "loss": 0.7425, "step": 450 }, { "epoch": 4.2, "grad_norm": 1.0945734977722168, "learning_rate": 0.0028899082568807338, "loss": 0.7295, "step": 460 }, { "epoch": 4.29, "grad_norm": 1.5122932195663452, "learning_rate": 0.0028440366972477065, "loss": 0.7892, "step": 470 }, { "epoch": 4.38, "grad_norm": 0.7687821388244629, "learning_rate": 0.002798165137614679, "loss": 0.7614, "step": 480 }, { "epoch": 4.47, "grad_norm": 1.25822913646698, "learning_rate": 0.0027522935779816515, "loss": 0.7811, "step": 490 }, { "epoch": 4.57, "grad_norm": 0.7886181473731995, "learning_rate": 0.002706422018348624, "loss": 0.8093, "step": 500 }, { "epoch": 4.66, "grad_norm": 0.7840601801872253, "learning_rate": 0.0026605504587155966, "loss": 0.738, "step": 510 }, { "epoch": 4.75, "grad_norm": 1.1541500091552734, "learning_rate": 0.002614678899082569, "loss": 0.8208, "step": 520 }, { "epoch": 4.84, "grad_norm": 1.354428768157959, "learning_rate": 0.0025688073394495416, "loss": 0.746, "step": 530 }, { "epoch": 4.93, "grad_norm": 0.8335555195808411, "learning_rate": 0.0025229357798165135, "loss": 0.8146, "step": 540 }, { "epoch": 5.0, "eval_accuracy": 0.7178464606181456, "eval_f1": 0.36409764256305227, "eval_loss": 0.7471081614494324, "eval_precision": 0.4490225302647663, "eval_recall": 0.4141043546195771, "eval_runtime": 5.7321, "eval_samples_per_second": 174.978, "eval_steps_per_second": 10.991, "step": 547 }, { "epoch": 5.02, "grad_norm": 1.1929486989974976, "learning_rate": 0.0024770642201834866, "loss": 0.7308, "step": 550 }, { "epoch": 5.11, "grad_norm": 1.202407717704773, "learning_rate": 0.002431192660550459, "loss": 0.7956, "step": 560 }, { "epoch": 5.21, "grad_norm": 1.1130154132843018, "learning_rate": 0.0023853211009174312, "loss": 0.7042, "step": 570 }, { "epoch": 5.3, "grad_norm": 1.1920500993728638, "learning_rate": 0.002339449541284404, "loss": 0.7945, "step": 580 }, { "epoch": 5.39, "grad_norm": 1.4452427625656128, "learning_rate": 0.0022935779816513763, "loss": 0.7757, "step": 590 }, { "epoch": 5.48, "grad_norm": 1.1632148027420044, "learning_rate": 0.002247706422018349, "loss": 0.7595, "step": 600 }, { "epoch": 5.57, "grad_norm": 0.8493008017539978, "learning_rate": 0.0022018348623853213, "loss": 0.7757, "step": 610 }, { "epoch": 5.66, "grad_norm": 1.3676623106002808, "learning_rate": 0.0021559633027522936, "loss": 0.7664, "step": 620 }, { "epoch": 5.75, "grad_norm": 0.9162562489509583, "learning_rate": 0.0021100917431192663, "loss": 0.7303, "step": 630 }, { "epoch": 5.84, "grad_norm": 0.7179450392723083, "learning_rate": 0.0020642201834862386, "loss": 0.7088, "step": 640 }, { "epoch": 5.94, "grad_norm": 0.7027204632759094, "learning_rate": 0.002018348623853211, "loss": 0.6831, "step": 650 }, { "epoch": 6.0, "eval_accuracy": 0.7367896311066799, "eval_f1": 0.42520915568023343, "eval_loss": 0.7007002830505371, "eval_precision": 0.47770601644698146, "eval_recall": 0.4148257042870626, "eval_runtime": 5.9496, "eval_samples_per_second": 168.582, "eval_steps_per_second": 10.589, "step": 657 }, { "epoch": 6.03, "grad_norm": 0.7743313312530518, "learning_rate": 0.0019724770642201837, "loss": 0.7261, "step": 660 }, { "epoch": 6.12, "grad_norm": 0.6318005323410034, "learning_rate": 0.0019266055045871562, "loss": 0.702, "step": 670 }, { "epoch": 6.21, "grad_norm": 0.696121096611023, "learning_rate": 0.0018807339449541285, "loss": 0.6966, "step": 680 }, { "epoch": 6.3, "grad_norm": 0.5956413149833679, "learning_rate": 0.001834862385321101, "loss": 0.7176, "step": 690 }, { "epoch": 6.39, "grad_norm": 1.6218867301940918, "learning_rate": 0.0017889908256880735, "loss": 0.7223, "step": 700 }, { "epoch": 6.48, "grad_norm": 1.1353025436401367, "learning_rate": 0.001743119266055046, "loss": 0.7098, "step": 710 }, { "epoch": 6.58, "grad_norm": 0.7476430535316467, "learning_rate": 0.0016972477064220186, "loss": 0.6411, "step": 720 }, { "epoch": 6.67, "grad_norm": 1.3945239782333374, "learning_rate": 0.0016513761467889909, "loss": 0.7219, "step": 730 }, { "epoch": 6.76, "grad_norm": 0.9898785948753357, "learning_rate": 0.0016055045871559634, "loss": 0.7456, "step": 740 }, { "epoch": 6.85, "grad_norm": 0.7352449893951416, "learning_rate": 0.001559633027522936, "loss": 0.7057, "step": 750 }, { "epoch": 6.94, "grad_norm": 1.0120513439178467, "learning_rate": 0.0015137614678899084, "loss": 0.695, "step": 760 }, { "epoch": 7.0, "eval_accuracy": 0.7427716849451645, "eval_f1": 0.48411655206463156, "eval_loss": 0.6797036528587341, "eval_precision": 0.4638223826629623, "eval_recall": 0.5333725550353186, "eval_runtime": 5.7522, "eval_samples_per_second": 174.367, "eval_steps_per_second": 10.952, "step": 766 }, { "epoch": 7.03, "grad_norm": 1.297319769859314, "learning_rate": 0.001467889908256881, "loss": 0.7541, "step": 770 }, { "epoch": 7.12, "grad_norm": 1.1115490198135376, "learning_rate": 0.0014220183486238532, "loss": 0.6944, "step": 780 }, { "epoch": 7.21, "grad_norm": 0.881907045841217, "learning_rate": 0.0013761467889908258, "loss": 0.7047, "step": 790 }, { "epoch": 7.31, "grad_norm": 0.9110414981842041, "learning_rate": 0.0013302752293577983, "loss": 0.7227, "step": 800 }, { "epoch": 7.4, "grad_norm": 0.7190865874290466, "learning_rate": 0.0012844036697247708, "loss": 0.7263, "step": 810 }, { "epoch": 7.49, "grad_norm": 0.9148305654525757, "learning_rate": 0.0012385321100917433, "loss": 0.6336, "step": 820 }, { "epoch": 7.58, "grad_norm": 0.7972878813743591, "learning_rate": 0.0011926605504587156, "loss": 0.6886, "step": 830 }, { "epoch": 7.67, "grad_norm": 0.9237717986106873, "learning_rate": 0.0011467889908256881, "loss": 0.6212, "step": 840 }, { "epoch": 7.76, "grad_norm": 1.1942154169082642, "learning_rate": 0.0011009174311926607, "loss": 0.6202, "step": 850 }, { "epoch": 7.85, "grad_norm": 1.2370058298110962, "learning_rate": 0.0010550458715596332, "loss": 0.703, "step": 860 }, { "epoch": 7.95, "grad_norm": 1.2201330661773682, "learning_rate": 0.0010091743119266055, "loss": 0.6646, "step": 870 }, { "epoch": 8.0, "eval_accuracy": 0.7537387836490529, "eval_f1": 0.4932964122925023, "eval_loss": 0.6534218192100525, "eval_precision": 0.6130395728356862, "eval_recall": 0.5077119285550199, "eval_runtime": 6.1104, "eval_samples_per_second": 164.148, "eval_steps_per_second": 10.31, "step": 876 }, { "epoch": 8.04, "grad_norm": 0.904547393321991, "learning_rate": 0.0009633027522935781, "loss": 0.6991, "step": 880 }, { "epoch": 8.13, "grad_norm": 0.7196776270866394, "learning_rate": 0.0009174311926605505, "loss": 0.612, "step": 890 }, { "epoch": 8.22, "grad_norm": 1.1908409595489502, "learning_rate": 0.000871559633027523, "loss": 0.6583, "step": 900 }, { "epoch": 8.31, "grad_norm": 1.1398776769638062, "learning_rate": 0.0008256880733944954, "loss": 0.7221, "step": 910 }, { "epoch": 8.4, "grad_norm": 0.867472767829895, "learning_rate": 0.000779816513761468, "loss": 0.6012, "step": 920 }, { "epoch": 8.49, "grad_norm": 0.7770695686340332, "learning_rate": 0.0007339449541284405, "loss": 0.6763, "step": 930 }, { "epoch": 8.58, "grad_norm": 1.4209235906600952, "learning_rate": 0.0006880733944954129, "loss": 0.6451, "step": 940 }, { "epoch": 8.68, "grad_norm": 1.1582767963409424, "learning_rate": 0.0006422018348623854, "loss": 0.6745, "step": 950 }, { "epoch": 8.77, "grad_norm": 1.2608932256698608, "learning_rate": 0.0005963302752293578, "loss": 0.6464, "step": 960 }, { "epoch": 8.86, "grad_norm": 0.7971704006195068, "learning_rate": 0.0005504587155963303, "loss": 0.6251, "step": 970 }, { "epoch": 8.95, "grad_norm": 1.318163275718689, "learning_rate": 0.0005045871559633027, "loss": 0.675, "step": 980 }, { "epoch": 9.0, "eval_accuracy": 0.7666999002991027, "eval_f1": 0.5308101410608023, "eval_loss": 0.6237577795982361, "eval_precision": 0.6518028517787527, "eval_recall": 0.5430883155707511, "eval_runtime": 5.972, "eval_samples_per_second": 167.95, "eval_steps_per_second": 10.549, "step": 985 }, { "epoch": 9.04, "grad_norm": 0.9488193392753601, "learning_rate": 0.00045871559633027525, "loss": 0.6078, "step": 990 }, { "epoch": 9.13, "grad_norm": 0.8466370105743408, "learning_rate": 0.0004128440366972477, "loss": 0.6368, "step": 1000 }, { "epoch": 9.22, "grad_norm": 1.245497465133667, "learning_rate": 0.00036697247706422024, "loss": 0.6639, "step": 1010 }, { "epoch": 9.32, "grad_norm": 1.0719431638717651, "learning_rate": 0.0003211009174311927, "loss": 0.5846, "step": 1020 }, { "epoch": 9.41, "grad_norm": 0.8983196020126343, "learning_rate": 0.00027522935779816516, "loss": 0.663, "step": 1030 }, { "epoch": 9.5, "grad_norm": 0.8495015501976013, "learning_rate": 0.00022935779816513763, "loss": 0.5945, "step": 1040 }, { "epoch": 9.59, "grad_norm": 1.3694357872009277, "learning_rate": 0.00018348623853211012, "loss": 0.6575, "step": 1050 }, { "epoch": 9.68, "grad_norm": 1.014473557472229, "learning_rate": 0.00013761467889908258, "loss": 0.6433, "step": 1060 }, { "epoch": 9.77, "grad_norm": 1.0054584741592407, "learning_rate": 9.174311926605506e-05, "loss": 0.6797, "step": 1070 }, { "epoch": 9.86, "grad_norm": 1.37436044216156, "learning_rate": 4.587155963302753e-05, "loss": 0.6579, "step": 1080 }, { "epoch": 9.95, "grad_norm": 0.9838040471076965, "learning_rate": 0.0, "loss": 0.6145, "step": 1090 }, { "epoch": 9.95, "eval_accuracy": 0.7726819541375872, "eval_f1": 0.5282805571871881, "eval_loss": 0.6095667481422424, "eval_precision": 0.6426600715638663, "eval_recall": 0.5346360087577886, "eval_runtime": 6.0311, "eval_samples_per_second": 166.305, "eval_steps_per_second": 10.446, "step": 1090 }, { "epoch": 9.95, "step": 1090, "total_flos": 5.440571948014866e+18, "train_loss": 0.7656277652180523, "train_runtime": 925.5702, "train_samples_per_second": 75.705, "train_steps_per_second": 1.178 } ], "logging_steps": 10, "max_steps": 1090, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 5.440571948014866e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }