jbjeong91 commited on
Commit
ecf66fe
1 Parent(s): e9f63ab

Model save

Browse files
Files changed (5) hide show
  1. README.md +117 -0
  2. all_results.json +9 -0
  3. generation_config.json +12 -0
  4. train_results.json +9 -0
  5. trainer_state.json +837 -0
README.md ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: llama3.1
4
+ base_model: meta-llama/Meta-Llama-3.1-8B-Instruct
5
+ tags:
6
+ - trl
7
+ - cpo
8
+ - generated_from_trainer
9
+ model-index:
10
+ - name: llama3.1-cpo-full-0919
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # llama3.1-cpo-full-0919
18
+
19
+ This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct) on an unknown dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 2.0545
22
+ - Rewards/chosen: -18.3931
23
+ - Rewards/rejected: -18.5452
24
+ - Rewards/accuracies: 0.5261
25
+ - Rewards/margins: 0.1521
26
+ - Logps/rejected: -185.4521
27
+ - Logps/chosen: -183.9312
28
+ - Logits/rejected: -0.7551
29
+ - Logits/chosen: -0.7797
30
+ - Nll Loss: 0.5180
31
+
32
+ ## Model description
33
+
34
+ More information needed
35
+
36
+ ## Intended uses & limitations
37
+
38
+ More information needed
39
+
40
+ ## Training and evaluation data
41
+
42
+ More information needed
43
+
44
+ ## Training procedure
45
+
46
+ ### Training hyperparameters
47
+
48
+ The following hyperparameters were used during training:
49
+ - learning_rate: 1e-06
50
+ - train_batch_size: 4
51
+ - eval_batch_size: 4
52
+ - seed: 42
53
+ - distributed_type: multi-GPU
54
+ - num_devices: 4
55
+ - gradient_accumulation_steps: 8
56
+ - total_train_batch_size: 128
57
+ - total_eval_batch_size: 16
58
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
59
+ - lr_scheduler_type: linear
60
+ - lr_scheduler_warmup_ratio: 0.1
61
+ - num_epochs: 1
62
+
63
+ ### Training results
64
+
65
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Nll Loss |
66
+ |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:--------:|
67
+ | No log | 0.0230 | 1 | 2.5065 | -26.5532 | -26.5849 | 0.5174 | 0.0317 | -265.8489 | -265.5324 | -0.2622 | -0.2859 | 0.7676 |
68
+ | No log | 0.0460 | 2 | 2.5060 | -26.5180 | -26.5504 | 0.5217 | 0.0324 | -265.5038 | -265.1801 | -0.2634 | -0.2869 | 0.7666 |
69
+ | No log | 0.0690 | 3 | 2.5051 | -26.5009 | -26.5278 | 0.5217 | 0.0269 | -265.2777 | -265.0088 | -0.2657 | -0.2893 | 0.7661 |
70
+ | No log | 0.0920 | 4 | 2.4982 | -26.3952 | -26.4315 | 0.5239 | 0.0363 | -264.3147 | -263.9518 | -0.2690 | -0.2926 | 0.7632 |
71
+ | No log | 0.1149 | 5 | 2.4894 | -26.2651 | -26.3011 | 0.5217 | 0.0360 | -263.0112 | -262.6512 | -0.2750 | -0.2985 | 0.7594 |
72
+ | No log | 0.1379 | 6 | 2.4689 | -25.9450 | -25.9855 | 0.5283 | 0.0405 | -259.8551 | -259.4500 | -0.2858 | -0.3086 | 0.7502 |
73
+ | No log | 0.1609 | 7 | 2.4511 | -25.7084 | -25.7527 | 0.5283 | 0.0443 | -257.5271 | -257.0843 | -0.2972 | -0.3202 | 0.7433 |
74
+ | No log | 0.1839 | 8 | 2.4180 | -25.2215 | -25.2724 | 0.5326 | 0.0510 | -252.7242 | -252.2147 | -0.3254 | -0.3486 | 0.7291 |
75
+ | No log | 0.2069 | 9 | 2.3952 | -24.8845 | -24.9393 | 0.5283 | 0.0548 | -249.3929 | -248.8451 | -0.3463 | -0.3701 | 0.7192 |
76
+ | 2.6865 | 0.2299 | 10 | 2.3761 | -24.6215 | -24.6782 | 0.5348 | 0.0567 | -246.7821 | -246.2148 | -0.3604 | -0.3845 | 0.7115 |
77
+ | 2.6865 | 0.2529 | 11 | 2.3609 | -24.4027 | -24.4705 | 0.5391 | 0.0678 | -244.7050 | -244.0270 | -0.3731 | -0.3976 | 0.7051 |
78
+ | 2.6865 | 0.2759 | 12 | 2.3367 | -24.0560 | -24.1306 | 0.5348 | 0.0746 | -241.3063 | -240.5604 | -0.3970 | -0.4218 | 0.6951 |
79
+ | 2.6865 | 0.2989 | 13 | 2.3109 | -23.6786 | -23.7645 | 0.5304 | 0.0860 | -237.6454 | -236.7858 | -0.4179 | -0.4434 | 0.6840 |
80
+ | 2.6865 | 0.3218 | 14 | 2.2906 | -23.3175 | -23.4031 | 0.5348 | 0.0856 | -234.0311 | -233.1748 | -0.4423 | -0.4679 | 0.6733 |
81
+ | 2.6865 | 0.3448 | 15 | 2.2729 | -22.9946 | -23.0933 | 0.5348 | 0.0988 | -230.9332 | -229.9456 | -0.4660 | -0.4917 | 0.6637 |
82
+ | 2.6865 | 0.3678 | 16 | 2.2576 | -22.7067 | -22.8056 | 0.5370 | 0.0990 | -228.0565 | -227.0665 | -0.4886 | -0.5142 | 0.6549 |
83
+ | 2.6865 | 0.3908 | 17 | 2.2411 | -22.4130 | -22.5166 | 0.5283 | 0.1036 | -225.1658 | -224.1296 | -0.5152 | -0.5408 | 0.6460 |
84
+ | 2.6865 | 0.4138 | 18 | 2.2300 | -22.1594 | -22.2652 | 0.5261 | 0.1058 | -222.6522 | -221.5937 | -0.5400 | -0.5656 | 0.6382 |
85
+ | 2.6865 | 0.4368 | 19 | 2.2170 | -21.9205 | -22.0355 | 0.5304 | 0.1150 | -220.3547 | -219.2051 | -0.5657 | -0.5915 | 0.6308 |
86
+ | 2.3904 | 0.4598 | 20 | 2.2065 | -21.7054 | -21.8209 | 0.5283 | 0.1156 | -218.2092 | -217.0537 | -0.5920 | -0.6175 | 0.6241 |
87
+ | 2.3904 | 0.4828 | 21 | 2.1932 | -21.4871 | -21.6107 | 0.5261 | 0.1236 | -216.1072 | -214.8710 | -0.6189 | -0.6441 | 0.6172 |
88
+ | 2.3904 | 0.5057 | 22 | 2.1839 | -21.2899 | -21.4129 | 0.5196 | 0.1230 | -214.1287 | -212.8987 | -0.6445 | -0.6694 | 0.6109 |
89
+ | 2.3904 | 0.5287 | 23 | 2.1746 | -21.0873 | -21.2117 | 0.5261 | 0.1244 | -212.1172 | -210.8729 | -0.6688 | -0.6940 | 0.6045 |
90
+ | 2.3904 | 0.5517 | 24 | 2.1656 | -20.9136 | -21.0398 | 0.5239 | 0.1262 | -210.3979 | -209.1364 | -0.6938 | -0.7184 | 0.5989 |
91
+ | 2.3904 | 0.5747 | 25 | 2.1555 | -20.7191 | -20.8481 | 0.5283 | 0.1290 | -208.4814 | -207.1911 | -0.7120 | -0.7365 | 0.5926 |
92
+ | 2.3904 | 0.5977 | 26 | 2.1466 | -20.5485 | -20.6790 | 0.5283 | 0.1305 | -206.7897 | -205.4852 | -0.7301 | -0.7545 | 0.5872 |
93
+ | 2.3904 | 0.6207 | 27 | 2.1392 | -20.3722 | -20.5040 | 0.5370 | 0.1318 | -205.0401 | -203.7218 | -0.7476 | -0.7720 | 0.5816 |
94
+ | 2.3904 | 0.6437 | 28 | 2.1308 | -20.1853 | -20.3216 | 0.5326 | 0.1363 | -203.2164 | -201.8533 | -0.7575 | -0.7818 | 0.5756 |
95
+ | 2.3904 | 0.6667 | 29 | 2.1229 | -19.9946 | -20.1315 | 0.5283 | 0.1370 | -201.3155 | -199.9459 | -0.7683 | -0.7925 | 0.5695 |
96
+ | 2.3172 | 0.6897 | 30 | 2.1134 | -19.7893 | -19.9304 | 0.5261 | 0.1411 | -199.3041 | -197.8930 | -0.7735 | -0.7976 | 0.5630 |
97
+ | 2.3172 | 0.7126 | 31 | 2.1055 | -19.5960 | -19.7401 | 0.5283 | 0.1441 | -197.4013 | -195.9599 | -0.7735 | -0.7977 | 0.5569 |
98
+ | 2.3172 | 0.7356 | 32 | 2.0985 | -19.4016 | -19.5462 | 0.5217 | 0.1445 | -195.4615 | -194.0163 | -0.7817 | -0.8060 | 0.5508 |
99
+ | 2.3172 | 0.7586 | 33 | 2.0904 | -19.2117 | -19.3617 | 0.5239 | 0.1501 | -193.6172 | -192.1166 | -0.7785 | -0.8030 | 0.5447 |
100
+ | 2.3172 | 0.7816 | 34 | 2.0850 | -19.0381 | -19.1813 | 0.5239 | 0.1432 | -191.8132 | -190.3807 | -0.7758 | -0.8003 | 0.5392 |
101
+ | 2.3172 | 0.8046 | 35 | 2.0793 | -18.8988 | -19.0437 | 0.5174 | 0.1449 | -190.4374 | -188.9884 | -0.7715 | -0.7964 | 0.5346 |
102
+ | 2.3172 | 0.8276 | 36 | 2.0720 | -18.7545 | -18.8980 | 0.5196 | 0.1435 | -188.9801 | -187.5452 | -0.7701 | -0.7952 | 0.5299 |
103
+ | 2.3172 | 0.8506 | 37 | 2.0663 | -18.6567 | -18.8053 | 0.5261 | 0.1486 | -188.0533 | -186.5672 | -0.7679 | -0.7927 | 0.5266 |
104
+ | 2.3172 | 0.8736 | 38 | 2.0643 | -18.5627 | -18.7139 | 0.5239 | 0.1512 | -187.1391 | -185.6268 | -0.7631 | -0.7882 | 0.5235 |
105
+ | 2.3172 | 0.8966 | 39 | 2.0601 | -18.5100 | -18.6606 | 0.5283 | 0.1507 | -186.6065 | -185.0997 | -0.7609 | -0.7857 | 0.5217 |
106
+ | 2.1039 | 0.9195 | 40 | 2.0598 | -18.4610 | -18.6128 | 0.5283 | 0.1518 | -186.1283 | -184.6099 | -0.7611 | -0.7860 | 0.5201 |
107
+ | 2.1039 | 0.9425 | 41 | 2.0539 | -18.4232 | -18.5801 | 0.5261 | 0.1568 | -185.8007 | -184.2324 | -0.7540 | -0.7789 | 0.5190 |
108
+ | 2.1039 | 0.9655 | 42 | 2.0544 | -18.3969 | -18.5526 | 0.5283 | 0.1557 | -185.5258 | -183.9690 | -0.7525 | -0.7777 | 0.5181 |
109
+ | 2.1039 | 0.9885 | 43 | 2.0545 | -18.3931 | -18.5452 | 0.5261 | 0.1521 | -185.4521 | -183.9312 | -0.7551 | -0.7797 | 0.5180 |
110
+
111
+
112
+ ### Framework versions
113
+
114
+ - Transformers 4.44.2
115
+ - Pytorch 2.3.1
116
+ - Datasets 2.21.0
117
+ - Tokenizers 0.19.1
all_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.9885057471264368,
3
+ "total_flos": 0.0,
4
+ "train_loss": 2.3523660704146985,
5
+ "train_runtime": 5387.5537,
6
+ "train_samples": 5556,
7
+ "train_samples_per_second": 1.031,
8
+ "train_steps_per_second": 0.008
9
+ }
generation_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 128000,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 128001,
6
+ 128008,
7
+ 128009
8
+ ],
9
+ "temperature": 0.6,
10
+ "top_p": 0.9,
11
+ "transformers_version": "4.44.2"
12
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.9885057471264368,
3
+ "total_flos": 0.0,
4
+ "train_loss": 2.3523660704146985,
5
+ "train_runtime": 5387.5537,
6
+ "train_samples": 5556,
7
+ "train_samples_per_second": 1.031,
8
+ "train_steps_per_second": 0.008
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,837 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.9885057471264368,
5
+ "eval_steps": 1,
6
+ "global_step": 43,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.022988505747126436,
13
+ "eval_logits/chosen": -0.285895973443985,
14
+ "eval_logits/rejected": -0.2622124254703522,
15
+ "eval_logps/chosen": -265.5323791503906,
16
+ "eval_logps/rejected": -265.8489074707031,
17
+ "eval_loss": 2.506535530090332,
18
+ "eval_nll_loss": 0.7676451206207275,
19
+ "eval_rewards/accuracies": 0.5173913240432739,
20
+ "eval_rewards/chosen": -26.55323600769043,
21
+ "eval_rewards/margins": 0.03165607899427414,
22
+ "eval_rewards/rejected": -26.58489227294922,
23
+ "eval_runtime": 73.2699,
24
+ "eval_samples_per_second": 24.922,
25
+ "eval_steps_per_second": 1.57,
26
+ "step": 1
27
+ },
28
+ {
29
+ "epoch": 0.04597701149425287,
30
+ "eval_logits/chosen": -0.28692948818206787,
31
+ "eval_logits/rejected": -0.2633576989173889,
32
+ "eval_logps/chosen": -265.1800537109375,
33
+ "eval_logps/rejected": -265.50384521484375,
34
+ "eval_loss": 2.505967855453491,
35
+ "eval_nll_loss": 0.766638994216919,
36
+ "eval_rewards/accuracies": 0.52173912525177,
37
+ "eval_rewards/chosen": -26.51800537109375,
38
+ "eval_rewards/margins": 0.03237998113036156,
39
+ "eval_rewards/rejected": -26.550386428833008,
40
+ "eval_runtime": 73.1616,
41
+ "eval_samples_per_second": 24.958,
42
+ "eval_steps_per_second": 1.572,
43
+ "step": 2
44
+ },
45
+ {
46
+ "epoch": 0.06896551724137931,
47
+ "eval_logits/chosen": -0.28926920890808105,
48
+ "eval_logits/rejected": -0.2657304108142853,
49
+ "eval_logps/chosen": -265.0088195800781,
50
+ "eval_logps/rejected": -265.2777404785156,
51
+ "eval_loss": 2.505052328109741,
52
+ "eval_nll_loss": 0.7661022543907166,
53
+ "eval_rewards/accuracies": 0.52173912525177,
54
+ "eval_rewards/chosen": -26.500883102416992,
55
+ "eval_rewards/margins": 0.02689189836382866,
56
+ "eval_rewards/rejected": -26.527772903442383,
57
+ "eval_runtime": 73.4564,
58
+ "eval_samples_per_second": 24.858,
59
+ "eval_steps_per_second": 1.566,
60
+ "step": 3
61
+ },
62
+ {
63
+ "epoch": 0.09195402298850575,
64
+ "eval_logits/chosen": -0.29259422421455383,
65
+ "eval_logits/rejected": -0.26898470520973206,
66
+ "eval_logps/chosen": -263.95184326171875,
67
+ "eval_logps/rejected": -264.3146667480469,
68
+ "eval_loss": 2.498246669769287,
69
+ "eval_nll_loss": 0.7631996870040894,
70
+ "eval_rewards/accuracies": 0.5239130258560181,
71
+ "eval_rewards/chosen": -26.395187377929688,
72
+ "eval_rewards/margins": 0.03628147765994072,
73
+ "eval_rewards/rejected": -26.43147087097168,
74
+ "eval_runtime": 73.5575,
75
+ "eval_samples_per_second": 24.824,
76
+ "eval_steps_per_second": 1.563,
77
+ "step": 4
78
+ },
79
+ {
80
+ "epoch": 0.11494252873563218,
81
+ "eval_logits/chosen": -0.29848381876945496,
82
+ "eval_logits/rejected": -0.27501967549324036,
83
+ "eval_logps/chosen": -262.6512145996094,
84
+ "eval_logps/rejected": -263.0111999511719,
85
+ "eval_loss": 2.489372968673706,
86
+ "eval_nll_loss": 0.7594311237335205,
87
+ "eval_rewards/accuracies": 0.52173912525177,
88
+ "eval_rewards/chosen": -26.26512336730957,
89
+ "eval_rewards/margins": 0.035997405648231506,
90
+ "eval_rewards/rejected": -26.30112075805664,
91
+ "eval_runtime": 73.7594,
92
+ "eval_samples_per_second": 24.756,
93
+ "eval_steps_per_second": 1.559,
94
+ "step": 5
95
+ },
96
+ {
97
+ "epoch": 0.13793103448275862,
98
+ "eval_logits/chosen": -0.30859696865081787,
99
+ "eval_logits/rejected": -0.2858428359031677,
100
+ "eval_logps/chosen": -259.449951171875,
101
+ "eval_logps/rejected": -259.8551330566406,
102
+ "eval_loss": 2.4688832759857178,
103
+ "eval_nll_loss": 0.7501848340034485,
104
+ "eval_rewards/accuracies": 0.5282608866691589,
105
+ "eval_rewards/chosen": -25.94499397277832,
106
+ "eval_rewards/margins": 0.040517814457416534,
107
+ "eval_rewards/rejected": -25.98551368713379,
108
+ "eval_runtime": 73.5054,
109
+ "eval_samples_per_second": 24.842,
110
+ "eval_steps_per_second": 1.565,
111
+ "step": 6
112
+ },
113
+ {
114
+ "epoch": 0.16091954022988506,
115
+ "eval_logits/chosen": -0.3201945424079895,
116
+ "eval_logits/rejected": -0.297221302986145,
117
+ "eval_logps/chosen": -257.0843200683594,
118
+ "eval_logps/rejected": -257.527099609375,
119
+ "eval_loss": 2.4511067867279053,
120
+ "eval_nll_loss": 0.7433211207389832,
121
+ "eval_rewards/accuracies": 0.5282608866691589,
122
+ "eval_rewards/chosen": -25.708433151245117,
123
+ "eval_rewards/margins": 0.04427630454301834,
124
+ "eval_rewards/rejected": -25.752708435058594,
125
+ "eval_runtime": 73.716,
126
+ "eval_samples_per_second": 24.771,
127
+ "eval_steps_per_second": 1.56,
128
+ "step": 7
129
+ },
130
+ {
131
+ "epoch": 0.1839080459770115,
132
+ "eval_logits/chosen": -0.348645955324173,
133
+ "eval_logits/rejected": -0.3254188001155853,
134
+ "eval_logps/chosen": -252.2147216796875,
135
+ "eval_logps/rejected": -252.7242431640625,
136
+ "eval_loss": 2.4179530143737793,
137
+ "eval_nll_loss": 0.7291316390037537,
138
+ "eval_rewards/accuracies": 0.532608687877655,
139
+ "eval_rewards/chosen": -25.22147560119629,
140
+ "eval_rewards/margins": 0.05095084756612778,
141
+ "eval_rewards/rejected": -25.27242660522461,
142
+ "eval_runtime": 73.8275,
143
+ "eval_samples_per_second": 24.733,
144
+ "eval_steps_per_second": 1.558,
145
+ "step": 8
146
+ },
147
+ {
148
+ "epoch": 0.20689655172413793,
149
+ "eval_logits/chosen": -0.37005820870399475,
150
+ "eval_logits/rejected": -0.3462548851966858,
151
+ "eval_logps/chosen": -248.8451385498047,
152
+ "eval_logps/rejected": -249.3928985595703,
153
+ "eval_loss": 2.3951992988586426,
154
+ "eval_nll_loss": 0.7191779017448425,
155
+ "eval_rewards/accuracies": 0.5282608866691589,
156
+ "eval_rewards/chosen": -24.8845157623291,
157
+ "eval_rewards/margins": 0.054776255041360855,
158
+ "eval_rewards/rejected": -24.939287185668945,
159
+ "eval_runtime": 73.7047,
160
+ "eval_samples_per_second": 24.775,
161
+ "eval_steps_per_second": 1.56,
162
+ "step": 9
163
+ },
164
+ {
165
+ "epoch": 0.22988505747126436,
166
+ "grad_norm": 55.518348693847656,
167
+ "learning_rate": 8.684210526315789e-07,
168
+ "logits/chosen": -0.35856884717941284,
169
+ "logits/rejected": -0.3261299431324005,
170
+ "logps/chosen": -264.810302734375,
171
+ "logps/rejected": -258.8919982910156,
172
+ "loss": 2.6865,
173
+ "nll_loss": 0.7651573419570923,
174
+ "rewards/accuracies": 0.4749999940395355,
175
+ "rewards/chosen": -26.481029510498047,
176
+ "rewards/margins": -0.5918328166007996,
177
+ "rewards/rejected": -25.889196395874023,
178
+ "step": 10
179
+ },
180
+ {
181
+ "epoch": 0.22988505747126436,
182
+ "eval_logits/chosen": -0.384502112865448,
183
+ "eval_logits/rejected": -0.3603852689266205,
184
+ "eval_logps/chosen": -246.21482849121094,
185
+ "eval_logps/rejected": -246.78208923339844,
186
+ "eval_loss": 2.376126766204834,
187
+ "eval_nll_loss": 0.7115476727485657,
188
+ "eval_rewards/accuracies": 0.5347825884819031,
189
+ "eval_rewards/chosen": -24.621484756469727,
190
+ "eval_rewards/margins": 0.05672362819314003,
191
+ "eval_rewards/rejected": -24.678205490112305,
192
+ "eval_runtime": 73.7798,
193
+ "eval_samples_per_second": 24.749,
194
+ "eval_steps_per_second": 1.559,
195
+ "step": 10
196
+ },
197
+ {
198
+ "epoch": 0.25287356321839083,
199
+ "eval_logits/chosen": -0.397601842880249,
200
+ "eval_logits/rejected": -0.3731386959552765,
201
+ "eval_logps/chosen": -244.02699279785156,
202
+ "eval_logps/rejected": -244.7050323486328,
203
+ "eval_loss": 2.3608767986297607,
204
+ "eval_nll_loss": 0.7050958275794983,
205
+ "eval_rewards/accuracies": 0.539130449295044,
206
+ "eval_rewards/chosen": -24.402700424194336,
207
+ "eval_rewards/margins": 0.06780331581830978,
208
+ "eval_rewards/rejected": -24.470500946044922,
209
+ "eval_runtime": 73.0824,
210
+ "eval_samples_per_second": 24.985,
211
+ "eval_steps_per_second": 1.574,
212
+ "step": 11
213
+ },
214
+ {
215
+ "epoch": 0.27586206896551724,
216
+ "eval_logits/chosen": -0.4218127429485321,
217
+ "eval_logits/rejected": -0.3970121443271637,
218
+ "eval_logps/chosen": -240.5603790283203,
219
+ "eval_logps/rejected": -241.30628967285156,
220
+ "eval_loss": 2.3367197513580322,
221
+ "eval_nll_loss": 0.6951096057891846,
222
+ "eval_rewards/accuracies": 0.5347825884819031,
223
+ "eval_rewards/chosen": -24.05603790283203,
224
+ "eval_rewards/margins": 0.074591264128685,
225
+ "eval_rewards/rejected": -24.130634307861328,
226
+ "eval_runtime": 73.2125,
227
+ "eval_samples_per_second": 24.941,
228
+ "eval_steps_per_second": 1.571,
229
+ "step": 12
230
+ },
231
+ {
232
+ "epoch": 0.2988505747126437,
233
+ "eval_logits/chosen": -0.4434413015842438,
234
+ "eval_logits/rejected": -0.4179251492023468,
235
+ "eval_logps/chosen": -236.7858123779297,
236
+ "eval_logps/rejected": -237.64541625976562,
237
+ "eval_loss": 2.310944080352783,
238
+ "eval_nll_loss": 0.6840075850486755,
239
+ "eval_rewards/accuracies": 0.530434787273407,
240
+ "eval_rewards/chosen": -23.6785831451416,
241
+ "eval_rewards/margins": 0.08595678210258484,
242
+ "eval_rewards/rejected": -23.764541625976562,
243
+ "eval_runtime": 73.2236,
244
+ "eval_samples_per_second": 24.937,
245
+ "eval_steps_per_second": 1.571,
246
+ "step": 13
247
+ },
248
+ {
249
+ "epoch": 0.3218390804597701,
250
+ "eval_logits/chosen": -0.4679478406906128,
251
+ "eval_logits/rejected": -0.4422786235809326,
252
+ "eval_logps/chosen": -233.17481994628906,
253
+ "eval_logps/rejected": -234.0310821533203,
254
+ "eval_loss": 2.290565252304077,
255
+ "eval_nll_loss": 0.6733331680297852,
256
+ "eval_rewards/accuracies": 0.5347825884819031,
257
+ "eval_rewards/chosen": -23.317480087280273,
258
+ "eval_rewards/margins": 0.08562804758548737,
259
+ "eval_rewards/rejected": -23.40311050415039,
260
+ "eval_runtime": 73.3905,
261
+ "eval_samples_per_second": 24.881,
262
+ "eval_steps_per_second": 1.567,
263
+ "step": 14
264
+ },
265
+ {
266
+ "epoch": 0.3448275862068966,
267
+ "eval_logits/chosen": -0.49170100688934326,
268
+ "eval_logits/rejected": -0.4659886956214905,
269
+ "eval_logps/chosen": -229.94561767578125,
270
+ "eval_logps/rejected": -230.9332275390625,
271
+ "eval_loss": 2.272915840148926,
272
+ "eval_nll_loss": 0.663709819316864,
273
+ "eval_rewards/accuracies": 0.5347825884819031,
274
+ "eval_rewards/chosen": -22.99456024169922,
275
+ "eval_rewards/margins": 0.09876058995723724,
276
+ "eval_rewards/rejected": -23.093320846557617,
277
+ "eval_runtime": 73.5456,
278
+ "eval_samples_per_second": 24.828,
279
+ "eval_steps_per_second": 1.564,
280
+ "step": 15
281
+ },
282
+ {
283
+ "epoch": 0.367816091954023,
284
+ "eval_logits/chosen": -0.5142260789871216,
285
+ "eval_logits/rejected": -0.4886496365070343,
286
+ "eval_logps/chosen": -227.06649780273438,
287
+ "eval_logps/rejected": -228.05648803710938,
288
+ "eval_loss": 2.257603406906128,
289
+ "eval_nll_loss": 0.6548909544944763,
290
+ "eval_rewards/accuracies": 0.5369565486907959,
291
+ "eval_rewards/chosen": -22.70665168762207,
292
+ "eval_rewards/margins": 0.09899646788835526,
293
+ "eval_rewards/rejected": -22.805648803710938,
294
+ "eval_runtime": 73.494,
295
+ "eval_samples_per_second": 24.846,
296
+ "eval_steps_per_second": 1.565,
297
+ "step": 16
298
+ },
299
+ {
300
+ "epoch": 0.39080459770114945,
301
+ "eval_logits/chosen": -0.5408182144165039,
302
+ "eval_logits/rejected": -0.5151581764221191,
303
+ "eval_logps/chosen": -224.1295928955078,
304
+ "eval_logps/rejected": -225.16580200195312,
305
+ "eval_loss": 2.241145133972168,
306
+ "eval_nll_loss": 0.6459768414497375,
307
+ "eval_rewards/accuracies": 0.5282608866691589,
308
+ "eval_rewards/chosen": -22.4129581451416,
309
+ "eval_rewards/margins": 0.10362222790718079,
310
+ "eval_rewards/rejected": -22.516578674316406,
311
+ "eval_runtime": 73.7057,
312
+ "eval_samples_per_second": 24.774,
313
+ "eval_steps_per_second": 1.56,
314
+ "step": 17
315
+ },
316
+ {
317
+ "epoch": 0.41379310344827586,
318
+ "eval_logits/chosen": -0.5656267404556274,
319
+ "eval_logits/rejected": -0.5400449633598328,
320
+ "eval_logps/chosen": -221.59368896484375,
321
+ "eval_logps/rejected": -222.6521759033203,
322
+ "eval_loss": 2.230027198791504,
323
+ "eval_nll_loss": 0.6381992697715759,
324
+ "eval_rewards/accuracies": 0.5260869860649109,
325
+ "eval_rewards/chosen": -22.15936851501465,
326
+ "eval_rewards/margins": 0.10584992170333862,
327
+ "eval_rewards/rejected": -22.265216827392578,
328
+ "eval_runtime": 73.8674,
329
+ "eval_samples_per_second": 24.72,
330
+ "eval_steps_per_second": 1.557,
331
+ "step": 18
332
+ },
333
+ {
334
+ "epoch": 0.4367816091954023,
335
+ "eval_logits/chosen": -0.5914514064788818,
336
+ "eval_logits/rejected": -0.565658688545227,
337
+ "eval_logps/chosen": -219.20506286621094,
338
+ "eval_logps/rejected": -220.354736328125,
339
+ "eval_loss": 2.2169623374938965,
340
+ "eval_nll_loss": 0.6308388113975525,
341
+ "eval_rewards/accuracies": 0.530434787273407,
342
+ "eval_rewards/chosen": -21.92050552368164,
343
+ "eval_rewards/margins": 0.11496546864509583,
344
+ "eval_rewards/rejected": -22.035470962524414,
345
+ "eval_runtime": 73.7719,
346
+ "eval_samples_per_second": 24.752,
347
+ "eval_steps_per_second": 1.559,
348
+ "step": 19
349
+ },
350
+ {
351
+ "epoch": 0.45977011494252873,
352
+ "grad_norm": 51.48088455200195,
353
+ "learning_rate": 6.052631578947368e-07,
354
+ "logits/chosen": -0.48232191801071167,
355
+ "logits/rejected": -0.4643561840057373,
356
+ "logps/chosen": -226.7048797607422,
357
+ "logps/rejected": -228.0491943359375,
358
+ "loss": 2.3904,
359
+ "nll_loss": 0.6598069667816162,
360
+ "rewards/accuracies": 0.546875,
361
+ "rewards/chosen": -22.670488357543945,
362
+ "rewards/margins": 0.13443148136138916,
363
+ "rewards/rejected": -22.804920196533203,
364
+ "step": 20
365
+ },
366
+ {
367
+ "epoch": 0.45977011494252873,
368
+ "eval_logits/chosen": -0.617470383644104,
369
+ "eval_logits/rejected": -0.5920071601867676,
370
+ "eval_logps/chosen": -217.05372619628906,
371
+ "eval_logps/rejected": -218.20924377441406,
372
+ "eval_loss": 2.20650315284729,
373
+ "eval_nll_loss": 0.624081552028656,
374
+ "eval_rewards/accuracies": 0.5282608866691589,
375
+ "eval_rewards/chosen": -21.705373764038086,
376
+ "eval_rewards/margins": 0.11555319279432297,
377
+ "eval_rewards/rejected": -21.8209285736084,
378
+ "eval_runtime": 73.6034,
379
+ "eval_samples_per_second": 24.809,
380
+ "eval_steps_per_second": 1.562,
381
+ "step": 20
382
+ },
383
+ {
384
+ "epoch": 0.4827586206896552,
385
+ "eval_logits/chosen": -0.6441444754600525,
386
+ "eval_logits/rejected": -0.6189336180686951,
387
+ "eval_logps/chosen": -214.8709716796875,
388
+ "eval_logps/rejected": -216.1072235107422,
389
+ "eval_loss": 2.193157911300659,
390
+ "eval_nll_loss": 0.6171812415122986,
391
+ "eval_rewards/accuracies": 0.5260869860649109,
392
+ "eval_rewards/chosen": -21.487096786499023,
393
+ "eval_rewards/margins": 0.12362580001354218,
394
+ "eval_rewards/rejected": -21.6107234954834,
395
+ "eval_runtime": 73.1268,
396
+ "eval_samples_per_second": 24.97,
397
+ "eval_steps_per_second": 1.573,
398
+ "step": 21
399
+ },
400
+ {
401
+ "epoch": 0.5057471264367817,
402
+ "eval_logits/chosen": -0.6693909168243408,
403
+ "eval_logits/rejected": -0.6444550156593323,
404
+ "eval_logps/chosen": -212.89871215820312,
405
+ "eval_logps/rejected": -214.12872314453125,
406
+ "eval_loss": 2.1838579177856445,
407
+ "eval_nll_loss": 0.6109142899513245,
408
+ "eval_rewards/accuracies": 0.519565224647522,
409
+ "eval_rewards/chosen": -21.289873123168945,
410
+ "eval_rewards/margins": 0.1229993924498558,
411
+ "eval_rewards/rejected": -21.412874221801758,
412
+ "eval_runtime": 73.3336,
413
+ "eval_samples_per_second": 24.9,
414
+ "eval_steps_per_second": 1.568,
415
+ "step": 22
416
+ },
417
+ {
418
+ "epoch": 0.5287356321839081,
419
+ "eval_logits/chosen": -0.6940123438835144,
420
+ "eval_logits/rejected": -0.6688118577003479,
421
+ "eval_logps/chosen": -210.87289428710938,
422
+ "eval_logps/rejected": -212.1172332763672,
423
+ "eval_loss": 2.17464280128479,
424
+ "eval_nll_loss": 0.6044757962226868,
425
+ "eval_rewards/accuracies": 0.5260869860649109,
426
+ "eval_rewards/chosen": -21.087289810180664,
427
+ "eval_rewards/margins": 0.12443248927593231,
428
+ "eval_rewards/rejected": -21.21172332763672,
429
+ "eval_runtime": 73.7107,
430
+ "eval_samples_per_second": 24.773,
431
+ "eval_steps_per_second": 1.56,
432
+ "step": 23
433
+ },
434
+ {
435
+ "epoch": 0.5517241379310345,
436
+ "eval_logits/chosen": -0.7184363603591919,
437
+ "eval_logits/rejected": -0.6937569379806519,
438
+ "eval_logps/chosen": -209.13641357421875,
439
+ "eval_logps/rejected": -210.39794921875,
440
+ "eval_loss": 2.1655774116516113,
441
+ "eval_nll_loss": 0.5988763570785522,
442
+ "eval_rewards/accuracies": 0.5239130258560181,
443
+ "eval_rewards/chosen": -20.91364097595215,
444
+ "eval_rewards/margins": 0.12615376710891724,
445
+ "eval_rewards/rejected": -21.039793014526367,
446
+ "eval_runtime": 73.2196,
447
+ "eval_samples_per_second": 24.939,
448
+ "eval_steps_per_second": 1.571,
449
+ "step": 24
450
+ },
451
+ {
452
+ "epoch": 0.5747126436781609,
453
+ "eval_logits/chosen": -0.7364875078201294,
454
+ "eval_logits/rejected": -0.711971640586853,
455
+ "eval_logps/chosen": -207.19107055664062,
456
+ "eval_logps/rejected": -208.48138427734375,
457
+ "eval_loss": 2.155548572540283,
458
+ "eval_nll_loss": 0.5926215052604675,
459
+ "eval_rewards/accuracies": 0.5282608866691589,
460
+ "eval_rewards/chosen": -20.719106674194336,
461
+ "eval_rewards/margins": 0.12903204560279846,
462
+ "eval_rewards/rejected": -20.8481388092041,
463
+ "eval_runtime": 73.1876,
464
+ "eval_samples_per_second": 24.95,
465
+ "eval_steps_per_second": 1.571,
466
+ "step": 25
467
+ },
468
+ {
469
+ "epoch": 0.5977011494252874,
470
+ "eval_logits/chosen": -0.7545364499092102,
471
+ "eval_logits/rejected": -0.730129599571228,
472
+ "eval_logps/chosen": -205.48521423339844,
473
+ "eval_logps/rejected": -206.7897186279297,
474
+ "eval_loss": 2.1465742588043213,
475
+ "eval_nll_loss": 0.5872200727462769,
476
+ "eval_rewards/accuracies": 0.5282608866691589,
477
+ "eval_rewards/chosen": -20.548521041870117,
478
+ "eval_rewards/margins": 0.13045117259025574,
479
+ "eval_rewards/rejected": -20.678974151611328,
480
+ "eval_runtime": 73.5262,
481
+ "eval_samples_per_second": 24.835,
482
+ "eval_steps_per_second": 1.564,
483
+ "step": 26
484
+ },
485
+ {
486
+ "epoch": 0.6206896551724138,
487
+ "eval_logits/chosen": -0.7720378041267395,
488
+ "eval_logits/rejected": -0.7476205825805664,
489
+ "eval_logps/chosen": -203.7217559814453,
490
+ "eval_logps/rejected": -205.04006958007812,
491
+ "eval_loss": 2.139249801635742,
492
+ "eval_nll_loss": 0.5815550684928894,
493
+ "eval_rewards/accuracies": 0.5369565486907959,
494
+ "eval_rewards/chosen": -20.37217903137207,
495
+ "eval_rewards/margins": 0.13182921707630157,
496
+ "eval_rewards/rejected": -20.504005432128906,
497
+ "eval_runtime": 73.5829,
498
+ "eval_samples_per_second": 24.816,
499
+ "eval_steps_per_second": 1.563,
500
+ "step": 27
501
+ },
502
+ {
503
+ "epoch": 0.6436781609195402,
504
+ "eval_logits/chosen": -0.781804621219635,
505
+ "eval_logits/rejected": -0.7575309872627258,
506
+ "eval_logps/chosen": -201.85330200195312,
507
+ "eval_logps/rejected": -203.2164306640625,
508
+ "eval_loss": 2.1307995319366455,
509
+ "eval_nll_loss": 0.5756080150604248,
510
+ "eval_rewards/accuracies": 0.532608687877655,
511
+ "eval_rewards/chosen": -20.18532943725586,
512
+ "eval_rewards/margins": 0.13631057739257812,
513
+ "eval_rewards/rejected": -20.321643829345703,
514
+ "eval_runtime": 73.6844,
515
+ "eval_samples_per_second": 24.781,
516
+ "eval_steps_per_second": 1.561,
517
+ "step": 28
518
+ },
519
+ {
520
+ "epoch": 0.6666666666666666,
521
+ "eval_logits/chosen": -0.7925211787223816,
522
+ "eval_logits/rejected": -0.768252432346344,
523
+ "eval_logps/chosen": -199.9458770751953,
524
+ "eval_logps/rejected": -201.3154754638672,
525
+ "eval_loss": 2.1228978633880615,
526
+ "eval_nll_loss": 0.5694720149040222,
527
+ "eval_rewards/accuracies": 0.5282608866691589,
528
+ "eval_rewards/chosen": -19.994586944580078,
529
+ "eval_rewards/margins": 0.13696083426475525,
530
+ "eval_rewards/rejected": -20.131547927856445,
531
+ "eval_runtime": 73.7355,
532
+ "eval_samples_per_second": 24.764,
533
+ "eval_steps_per_second": 1.56,
534
+ "step": 29
535
+ },
536
+ {
537
+ "epoch": 0.6896551724137931,
538
+ "grad_norm": 55.80259323120117,
539
+ "learning_rate": 3.4210526315789473e-07,
540
+ "logits/chosen": -0.6812049150466919,
541
+ "logits/rejected": -0.6623071432113647,
542
+ "logps/chosen": -199.8437042236328,
543
+ "logps/rejected": -201.27694702148438,
544
+ "loss": 2.3172,
545
+ "nll_loss": 0.5909140706062317,
546
+ "rewards/accuracies": 0.53125,
547
+ "rewards/chosen": -19.984371185302734,
548
+ "rewards/margins": 0.14332275092601776,
549
+ "rewards/rejected": -20.127695083618164,
550
+ "step": 30
551
+ },
552
+ {
553
+ "epoch": 0.6896551724137931,
554
+ "eval_logits/chosen": -0.7975767254829407,
555
+ "eval_logits/rejected": -0.7734904885292053,
556
+ "eval_logps/chosen": -197.8929901123047,
557
+ "eval_logps/rejected": -199.30410766601562,
558
+ "eval_loss": 2.113354206085205,
559
+ "eval_nll_loss": 0.5630350708961487,
560
+ "eval_rewards/accuracies": 0.5260869860649109,
561
+ "eval_rewards/chosen": -19.78929901123047,
562
+ "eval_rewards/margins": 0.1411115825176239,
563
+ "eval_rewards/rejected": -19.93041229248047,
564
+ "eval_runtime": 73.6357,
565
+ "eval_samples_per_second": 24.798,
566
+ "eval_steps_per_second": 1.562,
567
+ "step": 30
568
+ },
569
+ {
570
+ "epoch": 0.7126436781609196,
571
+ "eval_logits/chosen": -0.7977136969566345,
572
+ "eval_logits/rejected": -0.7735068202018738,
573
+ "eval_logps/chosen": -195.95989990234375,
574
+ "eval_logps/rejected": -197.4013214111328,
575
+ "eval_loss": 2.1055009365081787,
576
+ "eval_nll_loss": 0.5569384098052979,
577
+ "eval_rewards/accuracies": 0.5282608866691589,
578
+ "eval_rewards/chosen": -19.595989227294922,
579
+ "eval_rewards/margins": 0.1441420167684555,
580
+ "eval_rewards/rejected": -19.74013328552246,
581
+ "eval_runtime": 73.0556,
582
+ "eval_samples_per_second": 24.995,
583
+ "eval_steps_per_second": 1.574,
584
+ "step": 31
585
+ },
586
+ {
587
+ "epoch": 0.735632183908046,
588
+ "eval_logits/chosen": -0.80599045753479,
589
+ "eval_logits/rejected": -0.7817136645317078,
590
+ "eval_logps/chosen": -194.0162811279297,
591
+ "eval_logps/rejected": -195.46153259277344,
592
+ "eval_loss": 2.0985281467437744,
593
+ "eval_nll_loss": 0.5507530570030212,
594
+ "eval_rewards/accuracies": 0.52173912525177,
595
+ "eval_rewards/chosen": -19.401628494262695,
596
+ "eval_rewards/margins": 0.14452561736106873,
597
+ "eval_rewards/rejected": -19.546154022216797,
598
+ "eval_runtime": 73.1881,
599
+ "eval_samples_per_second": 24.949,
600
+ "eval_steps_per_second": 1.571,
601
+ "step": 32
602
+ },
603
+ {
604
+ "epoch": 0.7586206896551724,
605
+ "eval_logits/chosen": -0.8030232787132263,
606
+ "eval_logits/rejected": -0.7785286903381348,
607
+ "eval_logps/chosen": -192.11659240722656,
608
+ "eval_logps/rejected": -193.61715698242188,
609
+ "eval_loss": 2.0903804302215576,
610
+ "eval_nll_loss": 0.5446676015853882,
611
+ "eval_rewards/accuracies": 0.5239130258560181,
612
+ "eval_rewards/chosen": -19.211658477783203,
613
+ "eval_rewards/margins": 0.1500559002161026,
614
+ "eval_rewards/rejected": -19.36171531677246,
615
+ "eval_runtime": 73.4088,
616
+ "eval_samples_per_second": 24.874,
617
+ "eval_steps_per_second": 1.567,
618
+ "step": 33
619
+ },
620
+ {
621
+ "epoch": 0.7816091954022989,
622
+ "eval_logits/chosen": -0.8003183603286743,
623
+ "eval_logits/rejected": -0.7758002281188965,
624
+ "eval_logps/chosen": -190.38067626953125,
625
+ "eval_logps/rejected": -191.8131561279297,
626
+ "eval_loss": 2.08504056930542,
627
+ "eval_nll_loss": 0.539174497127533,
628
+ "eval_rewards/accuracies": 0.5239130258560181,
629
+ "eval_rewards/chosen": -19.038066864013672,
630
+ "eval_rewards/margins": 0.1432473063468933,
631
+ "eval_rewards/rejected": -19.18131446838379,
632
+ "eval_runtime": 73.4902,
633
+ "eval_samples_per_second": 24.847,
634
+ "eval_steps_per_second": 1.565,
635
+ "step": 34
636
+ },
637
+ {
638
+ "epoch": 0.8045977011494253,
639
+ "eval_logits/chosen": -0.796375036239624,
640
+ "eval_logits/rejected": -0.7714610695838928,
641
+ "eval_logps/chosen": -188.9884033203125,
642
+ "eval_logps/rejected": -190.43736267089844,
643
+ "eval_loss": 2.0792500972747803,
644
+ "eval_nll_loss": 0.5345708131790161,
645
+ "eval_rewards/accuracies": 0.5173913240432739,
646
+ "eval_rewards/chosen": -18.898839950561523,
647
+ "eval_rewards/margins": 0.1448965221643448,
648
+ "eval_rewards/rejected": -19.043737411499023,
649
+ "eval_runtime": 73.2997,
650
+ "eval_samples_per_second": 24.911,
651
+ "eval_steps_per_second": 1.569,
652
+ "step": 35
653
+ },
654
+ {
655
+ "epoch": 0.8275862068965517,
656
+ "eval_logits/chosen": -0.7951973080635071,
657
+ "eval_logits/rejected": -0.7701032757759094,
658
+ "eval_logps/chosen": -187.54518127441406,
659
+ "eval_logps/rejected": -188.98013305664062,
660
+ "eval_loss": 2.0720129013061523,
661
+ "eval_nll_loss": 0.5298618078231812,
662
+ "eval_rewards/accuracies": 0.519565224647522,
663
+ "eval_rewards/chosen": -18.754518508911133,
664
+ "eval_rewards/margins": 0.14349476993083954,
665
+ "eval_rewards/rejected": -18.898012161254883,
666
+ "eval_runtime": 73.4171,
667
+ "eval_samples_per_second": 24.872,
668
+ "eval_steps_per_second": 1.566,
669
+ "step": 36
670
+ },
671
+ {
672
+ "epoch": 0.8505747126436781,
673
+ "eval_logits/chosen": -0.7926805019378662,
674
+ "eval_logits/rejected": -0.7679208517074585,
675
+ "eval_logps/chosen": -186.56715393066406,
676
+ "eval_logps/rejected": -188.0532684326172,
677
+ "eval_loss": 2.0663270950317383,
678
+ "eval_nll_loss": 0.526580810546875,
679
+ "eval_rewards/accuracies": 0.5260869860649109,
680
+ "eval_rewards/chosen": -18.656715393066406,
681
+ "eval_rewards/margins": 0.14861242473125458,
682
+ "eval_rewards/rejected": -18.805326461791992,
683
+ "eval_runtime": 73.5278,
684
+ "eval_samples_per_second": 24.834,
685
+ "eval_steps_per_second": 1.564,
686
+ "step": 37
687
+ },
688
+ {
689
+ "epoch": 0.8735632183908046,
690
+ "eval_logits/chosen": -0.7882456183433533,
691
+ "eval_logits/rejected": -0.7631468176841736,
692
+ "eval_logps/chosen": -185.62677001953125,
693
+ "eval_logps/rejected": -187.13912963867188,
694
+ "eval_loss": 2.0643482208251953,
695
+ "eval_nll_loss": 0.5234898924827576,
696
+ "eval_rewards/accuracies": 0.5239130258560181,
697
+ "eval_rewards/chosen": -18.56267738342285,
698
+ "eval_rewards/margins": 0.15123440325260162,
699
+ "eval_rewards/rejected": -18.713911056518555,
700
+ "eval_runtime": 73.4858,
701
+ "eval_samples_per_second": 24.848,
702
+ "eval_steps_per_second": 1.565,
703
+ "step": 38
704
+ },
705
+ {
706
+ "epoch": 0.896551724137931,
707
+ "eval_logits/chosen": -0.7857053279876709,
708
+ "eval_logits/rejected": -0.7608606815338135,
709
+ "eval_logps/chosen": -185.09970092773438,
710
+ "eval_logps/rejected": -186.60646057128906,
711
+ "eval_loss": 2.0600922107696533,
712
+ "eval_nll_loss": 0.5217379927635193,
713
+ "eval_rewards/accuracies": 0.5282608866691589,
714
+ "eval_rewards/chosen": -18.509971618652344,
715
+ "eval_rewards/margins": 0.15067508816719055,
716
+ "eval_rewards/rejected": -18.66064453125,
717
+ "eval_runtime": 73.7485,
718
+ "eval_samples_per_second": 24.76,
719
+ "eval_steps_per_second": 1.559,
720
+ "step": 39
721
+ },
722
+ {
723
+ "epoch": 0.9195402298850575,
724
+ "grad_norm": 50.088340759277344,
725
+ "learning_rate": 7.894736842105262e-08,
726
+ "logits/chosen": -0.8007175326347351,
727
+ "logits/rejected": -0.7798112630844116,
728
+ "logps/chosen": -190.50381469726562,
729
+ "logps/rejected": -193.3760223388672,
730
+ "loss": 2.1039,
731
+ "nll_loss": 0.5438653230667114,
732
+ "rewards/accuracies": 0.546875,
733
+ "rewards/chosen": -19.05038070678711,
734
+ "rewards/margins": 0.2872214913368225,
735
+ "rewards/rejected": -19.337600708007812,
736
+ "step": 40
737
+ },
738
+ {
739
+ "epoch": 0.9195402298850575,
740
+ "eval_logits/chosen": -0.785999596118927,
741
+ "eval_logits/rejected": -0.7610748410224915,
742
+ "eval_logps/chosen": -184.6099090576172,
743
+ "eval_logps/rejected": -186.1282958984375,
744
+ "eval_loss": 2.0597591400146484,
745
+ "eval_nll_loss": 0.5201125144958496,
746
+ "eval_rewards/accuracies": 0.5282608866691589,
747
+ "eval_rewards/chosen": -18.46099090576172,
748
+ "eval_rewards/margins": 0.15183939039707184,
749
+ "eval_rewards/rejected": -18.612829208374023,
750
+ "eval_runtime": 73.6777,
751
+ "eval_samples_per_second": 24.784,
752
+ "eval_steps_per_second": 1.561,
753
+ "step": 40
754
+ },
755
+ {
756
+ "epoch": 0.9425287356321839,
757
+ "eval_logits/chosen": -0.7789402604103088,
758
+ "eval_logits/rejected": -0.754026472568512,
759
+ "eval_logps/chosen": -184.23236083984375,
760
+ "eval_logps/rejected": -185.80072021484375,
761
+ "eval_loss": 2.0538711547851562,
762
+ "eval_nll_loss": 0.5189568400382996,
763
+ "eval_rewards/accuracies": 0.5260869860649109,
764
+ "eval_rewards/chosen": -18.423233032226562,
765
+ "eval_rewards/margins": 0.15683722496032715,
766
+ "eval_rewards/rejected": -18.5800724029541,
767
+ "eval_runtime": 73.0726,
768
+ "eval_samples_per_second": 24.989,
769
+ "eval_steps_per_second": 1.574,
770
+ "step": 41
771
+ },
772
+ {
773
+ "epoch": 0.9655172413793104,
774
+ "eval_logits/chosen": -0.777718722820282,
775
+ "eval_logits/rejected": -0.7525457739830017,
776
+ "eval_logps/chosen": -183.968994140625,
777
+ "eval_logps/rejected": -185.52581787109375,
778
+ "eval_loss": 2.054420232772827,
779
+ "eval_nll_loss": 0.518138587474823,
780
+ "eval_rewards/accuracies": 0.5282608866691589,
781
+ "eval_rewards/chosen": -18.396900177001953,
782
+ "eval_rewards/margins": 0.15568143129348755,
783
+ "eval_rewards/rejected": -18.552579879760742,
784
+ "eval_runtime": 73.2982,
785
+ "eval_samples_per_second": 24.912,
786
+ "eval_steps_per_second": 1.569,
787
+ "step": 42
788
+ },
789
+ {
790
+ "epoch": 0.9885057471264368,
791
+ "eval_logits/chosen": -0.779742419719696,
792
+ "eval_logits/rejected": -0.755063533782959,
793
+ "eval_logps/chosen": -183.93116760253906,
794
+ "eval_logps/rejected": -185.45208740234375,
795
+ "eval_loss": 2.0544536113739014,
796
+ "eval_nll_loss": 0.5179869532585144,
797
+ "eval_rewards/accuracies": 0.5260869860649109,
798
+ "eval_rewards/chosen": -18.393117904663086,
799
+ "eval_rewards/margins": 0.15209028124809265,
800
+ "eval_rewards/rejected": -18.54520606994629,
801
+ "eval_runtime": 73.5834,
802
+ "eval_samples_per_second": 24.815,
803
+ "eval_steps_per_second": 1.563,
804
+ "step": 43
805
+ },
806
+ {
807
+ "epoch": 0.9885057471264368,
808
+ "step": 43,
809
+ "total_flos": 0.0,
810
+ "train_loss": 2.3523660704146985,
811
+ "train_runtime": 5387.5537,
812
+ "train_samples_per_second": 1.031,
813
+ "train_steps_per_second": 0.008
814
+ }
815
+ ],
816
+ "logging_steps": 10,
817
+ "max_steps": 43,
818
+ "num_input_tokens_seen": 0,
819
+ "num_train_epochs": 1,
820
+ "save_steps": 10,
821
+ "stateful_callbacks": {
822
+ "TrainerControl": {
823
+ "args": {
824
+ "should_epoch_stop": false,
825
+ "should_evaluate": false,
826
+ "should_log": false,
827
+ "should_save": true,
828
+ "should_training_stop": true
829
+ },
830
+ "attributes": {}
831
+ }
832
+ },
833
+ "total_flos": 0.0,
834
+ "train_batch_size": 4,
835
+ "trial_name": null,
836
+ "trial_params": null
837
+ }