Isaak Carter Augustus commited on
Commit
6eb8ee4
1 Parent(s): c40629c

Update josie_dict.txt

Browse files
Files changed (1) hide show
  1. josie_dict.txt +569 -394
josie_dict.txt CHANGED
@@ -1,403 +1,578 @@
1
- Model's state_dict:
2
- encoder.modality_preprocessors.vision.cls_token torch.Size([1, 1, 768])
3
- encoder.modality_preprocessors.vision.rgbt_stem.proj.1.weight torch.Size([768, 3, 2, 14, 14])
4
- encoder.modality_preprocessors.vision.pos_embedding_helper.pos_embed torch.Size([1, 7681, 768])
 
 
 
 
 
5
  encoder.modality_preprocessors.audio.cls_token torch.Size([1, 1, 768])
6
  encoder.modality_preprocessors.audio.rgbt_stem.proj.weight torch.Size([768, 1, 16, 16])
7
  encoder.modality_preprocessors.audio.rgbt_stem.norm_layer.weight torch.Size([768])
 
8
  encoder.modality_preprocessors.audio.pos_embedding_helper.pos_embed torch.Size([1, 229, 768])
9
- encoder.modality_preprocessors.depth.cls_token torch.Size([1, 1, 384])
10
- encoder.modality_preprocessors.depth.depth_stem.proj.weight torch.Size([384, 1, 16, 16])
11
- encoder.modality_preprocessors.depth.depth_stem.norm_layer.weight torch.Size([384])
12
- encoder.modality_preprocessors.depth.pos_embedding_helper.pos_embed torch.Size([1, 197, 384])
13
- encoder.modality_preprocessors.thermal.cls_token torch.Size([1, 1, 768])
14
- encoder.modality_preprocessors.thermal.rgbt_stem.proj.weight torch.Size([768, 1, 16, 16])
15
- encoder.modality_preprocessors.thermal.rgbt_stem.norm_layer.weight torch.Size([768])
16
- encoder.modality_preprocessors.thermal.pos_embedding_helper.pos_embed torch.Size([1, 197, 768])
17
- encoder.modality_transformers.vision.pre_transformer_layer.0.weight torch.Size([768])
18
- encoder.modality_transformers.vision.blocks.0.attn.in_proj_weight torch.Size([2304, 768])
19
- encoder.modality_transformers.vision.blocks.0.attn.in_proj_bias torch.Size([2304])
20
- encoder.modality_transformers.vision.blocks.0.attn.out_proj.weight torch.Size([768, 768])
21
- encoder.modality_transformers.vision.blocks.0.attn.out_proj.bias torch.Size([768])
22
- encoder.modality_transformers.vision.blocks.0.norm1.weight torch.Size([768])
23
- encoder.modality_transformers.vision.blocks.0.norm2.weight torch.Size([768])
24
- encoder.modality_transformers.vision.blocks.0.mlp.w1.weight torch.Size([512, 768])
25
- encoder.modality_transformers.vision.blocks.0.mlp.w2.weight torch.Size([768, 512])
26
- encoder.modality_transformers.vision.blocks.0.mlp.w3.weight torch.Size([512, 768])
27
- encoder.modality_transformers.vision.blocks.1.attn.in_proj_weight torch.Size([2304, 768])
28
- encoder.modality_transformers.vision.blocks.1.attn.in_proj_bias torch.Size([2304])
29
- encoder.modality_transformers.vision.blocks.1.attn.out_proj.weight torch.Size([768, 768])
30
- encoder.modality_transformers.vision.blocks.1.attn.out_proj.bias torch.Size([768])
31
- encoder.modality_transformers.vision.blocks.1.norm1.weight torch.Size([768])
32
- encoder.modality_transformers.vision.blocks.1.norm2.weight torch.Size([768])
33
- encoder.modality_transformers.vision.blocks.1.mlp.w1.weight torch.Size([512, 768])
34
- encoder.modality_transformers.vision.blocks.1.mlp.w2.weight torch.Size([768, 512])
35
- encoder.modality_transformers.vision.blocks.1.mlp.w3.weight torch.Size([512, 768])
36
- encoder.modality_transformers.vision.blocks.2.attn.in_proj_weight torch.Size([2304, 768])
37
- encoder.modality_transformers.vision.blocks.2.attn.in_proj_bias torch.Size([2304])
38
- encoder.modality_transformers.vision.blocks.2.attn.out_proj.weight torch.Size([768, 768])
39
- encoder.modality_transformers.vision.blocks.2.attn.out_proj.bias torch.Size([768])
40
- encoder.modality_transformers.vision.blocks.2.norm1.weight torch.Size([768])
41
- encoder.modality_transformers.vision.blocks.2.norm2.weight torch.Size([768])
42
- encoder.modality_transformers.vision.blocks.2.mlp.w1.weight torch.Size([512, 768])
43
- encoder.modality_transformers.vision.blocks.2.mlp.w2.weight torch.Size([768, 512])
44
- encoder.modality_transformers.vision.blocks.2.mlp.w3.weight torch.Size([512, 768])
45
- encoder.modality_transformers.vision.blocks.3.attn.in_proj_weight torch.Size([2304, 768])
46
- encoder.modality_transformers.vision.blocks.3.attn.in_proj_bias torch.Size([2304])
47
- encoder.modality_transformers.vision.blocks.3.attn.out_proj.weight torch.Size([768, 768])
48
- encoder.modality_transformers.vision.blocks.3.attn.out_proj.bias torch.Size([768])
49
- encoder.modality_transformers.vision.blocks.3.norm1.weight torch.Size([768])
50
- encoder.modality_transformers.vision.blocks.3.norm2.weight torch.Size([768])
51
- encoder.modality_transformers.vision.blocks.3.mlp.w1.weight torch.Size([512, 768])
52
- encoder.modality_transformers.vision.blocks.3.mlp.w2.weight torch.Size([768, 512])
53
- encoder.modality_transformers.vision.blocks.3.mlp.w3.weight torch.Size([512, 768])
54
- encoder.modality_transformers.vision.blocks.4.attn.in_proj_weight torch.Size([2304, 768])
55
- encoder.modality_transformers.vision.blocks.4.attn.in_proj_bias torch.Size([2304])
56
- encoder.modality_transformers.vision.blocks.4.attn.out_proj.weight torch.Size([768, 768])
57
- encoder.modality_transformers.vision.blocks.4.attn.out_proj.bias torch.Size([768])
58
- encoder.modality_transformers.vision.blocks.4.norm1.weight torch.Size([768])
59
- encoder.modality_transformers.vision.blocks.4.norm2.weight torch.Size([768])
60
- encoder.modality_transformers.vision.blocks.4.mlp.w1.weight torch.Size([512, 768])
61
- encoder.modality_transformers.vision.blocks.4.mlp.w2.weight torch.Size([768, 512])
62
- encoder.modality_transformers.vision.blocks.4.mlp.w3.weight torch.Size([512, 768])
63
- encoder.modality_transformers.vision.blocks.5.attn.in_proj_weight torch.Size([2304, 768])
64
- encoder.modality_transformers.vision.blocks.5.attn.in_proj_bias torch.Size([2304])
65
- encoder.modality_transformers.vision.blocks.5.attn.out_proj.weight torch.Size([768, 768])
66
- encoder.modality_transformers.vision.blocks.5.attn.out_proj.bias torch.Size([768])
67
- encoder.modality_transformers.vision.blocks.5.norm1.weight torch.Size([768])
68
- encoder.modality_transformers.vision.blocks.5.norm2.weight torch.Size([768])
69
- encoder.modality_transformers.vision.blocks.5.mlp.w1.weight torch.Size([512, 768])
70
- encoder.modality_transformers.vision.blocks.5.mlp.w2.weight torch.Size([768, 512])
71
- encoder.modality_transformers.vision.blocks.5.mlp.w3.weight torch.Size([512, 768])
72
- encoder.modality_transformers.vision.blocks.6.attn.in_proj_weight torch.Size([2304, 768])
73
- encoder.modality_transformers.vision.blocks.6.attn.in_proj_bias torch.Size([2304])
74
- encoder.modality_transformers.vision.blocks.6.attn.out_proj.weight torch.Size([768, 768])
75
- encoder.modality_transformers.vision.blocks.6.attn.out_proj.bias torch.Size([768])
76
- encoder.modality_transformers.vision.blocks.6.norm1.weight torch.Size([768])
77
- encoder.modality_transformers.vision.blocks.6.norm2.weight torch.Size([768])
78
- encoder.modality_transformers.vision.blocks.6.mlp.w1.weight torch.Size([512, 768])
79
- encoder.modality_transformers.vision.blocks.6.mlp.w2.weight torch.Size([768, 512])
80
- encoder.modality_transformers.vision.blocks.6.mlp.w3.weight torch.Size([512, 768])
81
- encoder.modality_transformers.vision.blocks.7.attn.in_proj_weight torch.Size([2304, 768])
82
- encoder.modality_transformers.vision.blocks.7.attn.in_proj_bias torch.Size([2304])
83
- encoder.modality_transformers.vision.blocks.7.attn.out_proj.weight torch.Size([768, 768])
84
- encoder.modality_transformers.vision.blocks.7.attn.out_proj.bias torch.Size([768])
85
- encoder.modality_transformers.vision.blocks.7.norm1.weight torch.Size([768])
86
- encoder.modality_transformers.vision.blocks.7.norm2.weight torch.Size([768])
87
- encoder.modality_transformers.vision.blocks.7.mlp.w1.weight torch.Size([512, 768])
88
- encoder.modality_transformers.vision.blocks.7.mlp.w2.weight torch.Size([768, 512])
89
- encoder.modality_transformers.vision.blocks.7.mlp.w3.weight torch.Size([512, 768])
90
- encoder.modality_transformers.vision.blocks.8.attn.in_proj_weight torch.Size([2304, 768])
91
- encoder.modality_transformers.vision.blocks.8.attn.in_proj_bias torch.Size([2304])
92
- encoder.modality_transformers.vision.blocks.8.attn.out_proj.weight torch.Size([768, 768])
93
- encoder.modality_transformers.vision.blocks.8.attn.out_proj.bias torch.Size([768])
94
- encoder.modality_transformers.vision.blocks.8.norm1.weight torch.Size([768])
95
- encoder.modality_transformers.vision.blocks.8.norm2.weight torch.Size([768])
96
- encoder.modality_transformers.vision.blocks.8.mlp.w1.weight torch.Size([512, 768])
97
- encoder.modality_transformers.vision.blocks.8.mlp.w2.weight torch.Size([768, 512])
98
- encoder.modality_transformers.vision.blocks.8.mlp.w3.weight torch.Size([512, 768])
99
- encoder.modality_transformers.vision.blocks.9.attn.in_proj_weight torch.Size([2304, 768])
100
- encoder.modality_transformers.vision.blocks.9.attn.in_proj_bias torch.Size([2304])
101
- encoder.modality_transformers.vision.blocks.9.attn.out_proj.weight torch.Size([768, 768])
102
- encoder.modality_transformers.vision.blocks.9.attn.out_proj.bias torch.Size([768])
103
- encoder.modality_transformers.vision.blocks.9.norm1.weight torch.Size([768])
104
- encoder.modality_transformers.vision.blocks.9.norm2.weight torch.Size([768])
105
- encoder.modality_transformers.vision.blocks.9.mlp.w1.weight torch.Size([512, 768])
106
- encoder.modality_transformers.vision.blocks.9.mlp.w2.weight torch.Size([768, 512])
107
- encoder.modality_transformers.vision.blocks.9.mlp.w3.weight torch.Size([512, 768])
108
- encoder.modality_transformers.vision.blocks.10.attn.in_proj_weight torch.Size([2304, 768])
109
- encoder.modality_transformers.vision.blocks.10.attn.in_proj_bias torch.Size([2304])
110
- encoder.modality_transformers.vision.blocks.10.attn.out_proj.weight torch.Size([768, 768])
111
- encoder.modality_transformers.vision.blocks.10.attn.out_proj.bias torch.Size([768])
112
- encoder.modality_transformers.vision.blocks.10.norm1.weight torch.Size([768])
113
- encoder.modality_transformers.vision.blocks.10.norm2.weight torch.Size([768])
114
- encoder.modality_transformers.vision.blocks.10.mlp.w1.weight torch.Size([512, 768])
115
- encoder.modality_transformers.vision.blocks.10.mlp.w2.weight torch.Size([768, 512])
116
- encoder.modality_transformers.vision.blocks.10.mlp.w3.weight torch.Size([512, 768])
117
- encoder.modality_transformers.vision.blocks.11.attn.in_proj_weight torch.Size([2304, 768])
118
- encoder.modality_transformers.vision.blocks.11.attn.in_proj_bias torch.Size([2304])
119
- encoder.modality_transformers.vision.blocks.11.attn.out_proj.weight torch.Size([768, 768])
120
- encoder.modality_transformers.vision.blocks.11.attn.out_proj.bias torch.Size([768])
121
- encoder.modality_transformers.vision.blocks.11.norm1.weight torch.Size([768])
122
- encoder.modality_transformers.vision.blocks.11.norm2.weight torch.Size([768])
123
- encoder.modality_transformers.vision.blocks.11.mlp.w1.weight torch.Size([512, 768])
124
- encoder.modality_transformers.vision.blocks.11.mlp.w2.weight torch.Size([768, 512])
125
- encoder.modality_transformers.vision.blocks.11.mlp.w3.weight torch.Size([512, 768])
126
- encoder.modality_transformers.audio.pre_transformer_layer.0.weight torch.Size([768])
127
- encoder.modality_transformers.audio.blocks.0.attn.in_proj_weight torch.Size([2304, 768])
128
- encoder.modality_transformers.audio.blocks.0.attn.in_proj_bias torch.Size([2304])
129
- encoder.modality_transformers.audio.blocks.0.attn.bias_k torch.Size([1, 1, 768])
130
- encoder.modality_transformers.audio.blocks.0.attn.bias_v torch.Size([1, 1, 768])
131
- encoder.modality_transformers.audio.blocks.0.attn.out_proj.weight torch.Size([768, 768])
132
- encoder.modality_transformers.audio.blocks.0.attn.out_proj.bias torch.Size([768])
133
- encoder.modality_transformers.audio.blocks.0.norm1.weight torch.Size([768])
134
- encoder.modality_transformers.audio.blocks.0.norm2.weight torch.Size([768])
135
- encoder.modality_transformers.audio.blocks.0.mlp.w1.weight torch.Size([512, 768])
136
- encoder.modality_transformers.audio.blocks.0.mlp.w2.weight torch.Size([768, 512])
137
- encoder.modality_transformers.audio.blocks.0.mlp.w3.weight torch.Size([512, 768])
138
- encoder.modality_transformers.audio.blocks.1.attn.in_proj_weight torch.Size([2304, 768])
139
- encoder.modality_transformers.audio.blocks.1.attn.in_proj_bias torch.Size([2304])
140
- encoder.modality_transformers.audio.blocks.1.attn.bias_k torch.Size([1, 1, 768])
141
- encoder.modality_transformers.audio.blocks.1.attn.bias_v torch.Size([1, 1, 768])
142
- encoder.modality_transformers.audio.blocks.1.attn.out_proj.weight torch.Size([768, 768])
143
- encoder.modality_transformers.audio.blocks.1.attn.out_proj.bias torch.Size([768])
144
- encoder.modality_transformers.audio.blocks.1.norm1.weight torch.Size([768])
145
- encoder.modality_transformers.audio.blocks.1.norm2.weight torch.Size([768])
146
- encoder.modality_transformers.audio.blocks.1.mlp.w1.weight torch.Size([512, 768])
147
- encoder.modality_transformers.audio.blocks.1.mlp.w2.weight torch.Size([768, 512])
148
- encoder.modality_transformers.audio.blocks.1.mlp.w3.weight torch.Size([512, 768])
149
- encoder.modality_transformers.audio.blocks.2.attn.in_proj_weight torch.Size([2304, 768])
150
- encoder.modality_transformers.audio.blocks.2.attn.in_proj_bias torch.Size([2304])
151
- encoder.modality_transformers.audio.blocks.2.attn.bias_k torch.Size([1, 1, 768])
152
- encoder.modality_transformers.audio.blocks.2.attn.bias_v torch.Size([1, 1, 768])
153
- encoder.modality_transformers.audio.blocks.2.attn.out_proj.weight torch.Size([768, 768])
154
- encoder.modality_transformers.audio.blocks.2.attn.out_proj.bias torch.Size([768])
155
- encoder.modality_transformers.audio.blocks.2.norm1.weight torch.Size([768])
156
- encoder.modality_transformers.audio.blocks.2.norm2.weight torch.Size([768])
157
- encoder.modality_transformers.audio.blocks.2.mlp.w1.weight torch.Size([512, 768])
158
- encoder.modality_transformers.audio.blocks.2.mlp.w2.weight torch.Size([768, 512])
159
- encoder.modality_transformers.audio.blocks.2.mlp.w3.weight torch.Size([512, 768])
160
- encoder.modality_transformers.audio.blocks.3.attn.in_proj_weight torch.Size([2304, 768])
161
- encoder.modality_transformers.audio.blocks.3.attn.in_proj_bias torch.Size([2304])
162
- encoder.modality_transformers.audio.blocks.3.attn.bias_k torch.Size([1, 1, 768])
163
- encoder.modality_transformers.audio.blocks.3.attn.bias_v torch.Size([1, 1, 768])
164
- encoder.modality_transformers.audio.blocks.3.attn.out_proj.weight torch.Size([768, 768])
165
- encoder.modality_transformers.audio.blocks.3.attn.out_proj.bias torch.Size([768])
166
- encoder.modality_transformers.audio.blocks.3.norm1.weight torch.Size([768])
167
- encoder.modality_transformers.audio.blocks.3.norm2.weight torch.Size([768])
168
- encoder.modality_transformers.audio.blocks.3.mlp.w1.weight torch.Size([512, 768])
169
- encoder.modality_transformers.audio.blocks.3.mlp.w2.weight torch.Size([768, 512])
170
- encoder.modality_transformers.audio.blocks.3.mlp.w3.weight torch.Size([512, 768])
171
- encoder.modality_transformers.audio.blocks.4.attn.in_proj_weight torch.Size([2304, 768])
172
- encoder.modality_transformers.audio.blocks.4.attn.in_proj_bias torch.Size([2304])
173
- encoder.modality_transformers.audio.blocks.4.attn.bias_k torch.Size([1, 1, 768])
174
- encoder.modality_transformers.audio.blocks.4.attn.bias_v torch.Size([1, 1, 768])
175
- encoder.modality_transformers.audio.blocks.4.attn.out_proj.weight torch.Size([768, 768])
176
- encoder.modality_transformers.audio.blocks.4.attn.out_proj.bias torch.Size([768])
177
- encoder.modality_transformers.audio.blocks.4.norm1.weight torch.Size([768])
178
- encoder.modality_transformers.audio.blocks.4.norm2.weight torch.Size([768])
179
- encoder.modality_transformers.audio.blocks.4.mlp.w1.weight torch.Size([512, 768])
180
- encoder.modality_transformers.audio.blocks.4.mlp.w2.weight torch.Size([768, 512])
181
- encoder.modality_transformers.audio.blocks.4.mlp.w3.weight torch.Size([512, 768])
182
- encoder.modality_transformers.audio.blocks.5.attn.in_proj_weight torch.Size([2304, 768])
183
- encoder.modality_transformers.audio.blocks.5.attn.in_proj_bias torch.Size([2304])
184
- encoder.modality_transformers.audio.blocks.5.attn.bias_k torch.Size([1, 1, 768])
185
- encoder.modality_transformers.audio.blocks.5.attn.bias_v torch.Size([1, 1, 768])
186
- encoder.modality_transformers.audio.blocks.5.attn.out_proj.weight torch.Size([768, 768])
187
- encoder.modality_transformers.audio.blocks.5.attn.out_proj.bias torch.Size([768])
188
- encoder.modality_transformers.audio.blocks.5.norm1.weight torch.Size([768])
189
- encoder.modality_transformers.audio.blocks.5.norm2.weight torch.Size([768])
190
- encoder.modality_transformers.audio.blocks.5.mlp.w1.weight torch.Size([512, 768])
191
- encoder.modality_transformers.audio.blocks.5.mlp.w2.weight torch.Size([768, 512])
192
- encoder.modality_transformers.audio.blocks.5.mlp.w3.weight torch.Size([512, 768])
193
- encoder.modality_transformers.audio.blocks.6.attn.in_proj_weight torch.Size([2304, 768])
194
- encoder.modality_transformers.audio.blocks.6.attn.in_proj_bias torch.Size([2304])
195
- encoder.modality_transformers.audio.blocks.6.attn.bias_k torch.Size([1, 1, 768])
196
- encoder.modality_transformers.audio.blocks.6.attn.bias_v torch.Size([1, 1, 768])
197
- encoder.modality_transformers.audio.blocks.6.attn.out_proj.weight torch.Size([768, 768])
198
- encoder.modality_transformers.audio.blocks.6.attn.out_proj.bias torch.Size([768])
199
- encoder.modality_transformers.audio.blocks.6.norm1.weight torch.Size([768])
200
- encoder.modality_transformers.audio.blocks.6.norm2.weight torch.Size([768])
201
- encoder.modality_transformers.audio.blocks.6.mlp.w1.weight torch.Size([512, 768])
202
- encoder.modality_transformers.audio.blocks.6.mlp.w2.weight torch.Size([768, 512])
203
- encoder.modality_transformers.audio.blocks.6.mlp.w3.weight torch.Size([512, 768])
204
- encoder.modality_transformers.audio.blocks.7.attn.in_proj_weight torch.Size([2304, 768])
205
- encoder.modality_transformers.audio.blocks.7.attn.in_proj_bias torch.Size([2304])
206
- encoder.modality_transformers.audio.blocks.7.attn.bias_k torch.Size([1, 1, 768])
207
- encoder.modality_transformers.audio.blocks.7.attn.bias_v torch.Size([1, 1, 768])
208
- encoder.modality_transformers.audio.blocks.7.attn.out_proj.weight torch.Size([768, 768])
209
- encoder.modality_transformers.audio.blocks.7.attn.out_proj.bias torch.Size([768])
210
- encoder.modality_transformers.audio.blocks.7.norm1.weight torch.Size([768])
211
- encoder.modality_transformers.audio.blocks.7.norm2.weight torch.Size([768])
212
- encoder.modality_transformers.audio.blocks.7.mlp.w1.weight torch.Size([512, 768])
213
- encoder.modality_transformers.audio.blocks.7.mlp.w2.weight torch.Size([768, 512])
214
- encoder.modality_transformers.audio.blocks.7.mlp.w3.weight torch.Size([512, 768])
215
- encoder.modality_transformers.audio.blocks.8.attn.in_proj_weight torch.Size([2304, 768])
216
- encoder.modality_transformers.audio.blocks.8.attn.in_proj_bias torch.Size([2304])
217
- encoder.modality_transformers.audio.blocks.8.attn.bias_k torch.Size([1, 1, 768])
218
- encoder.modality_transformers.audio.blocks.8.attn.bias_v torch.Size([1, 1, 768])
219
- encoder.modality_transformers.audio.blocks.8.attn.out_proj.weight torch.Size([768, 768])
220
- encoder.modality_transformers.audio.blocks.8.attn.out_proj.bias torch.Size([768])
221
- encoder.modality_transformers.audio.blocks.8.norm1.weight torch.Size([768])
222
- encoder.modality_transformers.audio.blocks.8.norm2.weight torch.Size([768])
223
- encoder.modality_transformers.audio.blocks.8.mlp.w1.weight torch.Size([512, 768])
224
- encoder.modality_transformers.audio.blocks.8.mlp.w2.weight torch.Size([768, 512])
225
- encoder.modality_transformers.audio.blocks.8.mlp.w3.weight torch.Size([512, 768])
226
- encoder.modality_transformers.audio.blocks.9.attn.in_proj_weight torch.Size([2304, 768])
227
- encoder.modality_transformers.audio.blocks.9.attn.in_proj_bias torch.Size([2304])
228
- encoder.modality_transformers.audio.blocks.9.attn.bias_k torch.Size([1, 1, 768])
229
- encoder.modality_transformers.audio.blocks.9.attn.bias_v torch.Size([1, 1, 768])
230
- encoder.modality_transformers.audio.blocks.9.attn.out_proj.weight torch.Size([768, 768])
231
- encoder.modality_transformers.audio.blocks.9.attn.out_proj.bias torch.Size([768])
232
- encoder.modality_transformers.audio.blocks.9.norm1.weight torch.Size([768])
233
- encoder.modality_transformers.audio.blocks.9.norm2.weight torch.Size([768])
234
- encoder.modality_transformers.audio.blocks.9.mlp.w1.weight torch.Size([512, 768])
235
- encoder.modality_transformers.audio.blocks.9.mlp.w2.weight torch.Size([768, 512])
236
- encoder.modality_transformers.audio.blocks.9.mlp.w3.weight torch.Size([512, 768])
237
- encoder.modality_transformers.audio.blocks.10.attn.in_proj_weight torch.Size([2304, 768])
238
- encoder.modality_transformers.audio.blocks.10.attn.in_proj_bias torch.Size([2304])
239
- encoder.modality_transformers.audio.blocks.10.attn.bias_k torch.Size([1, 1, 768])
240
- encoder.modality_transformers.audio.blocks.10.attn.bias_v torch.Size([1, 1, 768])
241
- encoder.modality_transformers.audio.blocks.10.attn.out_proj.weight torch.Size([768, 768])
242
- encoder.modality_transformers.audio.blocks.10.attn.out_proj.bias torch.Size([768])
243
- encoder.modality_transformers.audio.blocks.10.norm1.weight torch.Size([768])
244
- encoder.modality_transformers.audio.blocks.10.norm2.weight torch.Size([768])
245
- encoder.modality_transformers.audio.blocks.10.mlp.w1.weight torch.Size([512, 768])
246
- encoder.modality_transformers.audio.blocks.10.mlp.w2.weight torch.Size([768, 512])
247
- encoder.modality_transformers.audio.blocks.10.mlp.w3.weight torch.Size([512, 768])
248
- encoder.modality_transformers.audio.blocks.11.attn.in_proj_weight torch.Size([2304, 768])
249
- encoder.modality_transformers.audio.blocks.11.attn.in_proj_bias torch.Size([2304])
250
- encoder.modality_transformers.audio.blocks.11.attn.bias_k torch.Size([1, 1, 768])
251
- encoder.modality_transformers.audio.blocks.11.attn.bias_v torch.Size([1, 1, 768])
252
- encoder.modality_transformers.audio.blocks.11.attn.out_proj.weight torch.Size([768, 768])
253
- encoder.modality_transformers.audio.blocks.11.attn.out_proj.bias torch.Size([768])
254
- encoder.modality_transformers.audio.blocks.11.norm1.weight torch.Size([768])
255
- encoder.modality_transformers.audio.blocks.11.norm2.weight torch.Size([768])
256
- encoder.modality_transformers.audio.blocks.11.mlp.w1.weight torch.Size([512, 768])
257
- encoder.modality_transformers.audio.blocks.11.mlp.w2.weight torch.Size([768, 512])
258
- encoder.modality_transformers.audio.blocks.11.mlp.w3.weight torch.Size([512, 768])
259
- encoder.modality_transformers.depth.pre_transformer_layer.0.weight torch.Size([384])
260
- encoder.modality_transformers.depth.blocks.0.attn.in_proj_weight torch.Size([1152, 384])
261
- encoder.modality_transformers.depth.blocks.0.attn.in_proj_bias torch.Size([1152])
262
- encoder.modality_transformers.depth.blocks.0.attn.bias_k torch.Size([1, 1, 384])
263
- encoder.modality_transformers.depth.blocks.0.attn.bias_v torch.Size([1, 1, 384])
264
- encoder.modality_transformers.depth.blocks.0.attn.out_proj.weight torch.Size([384, 384])
265
- encoder.modality_transformers.depth.blocks.0.attn.out_proj.bias torch.Size([384])
266
- encoder.modality_transformers.depth.blocks.0.norm1.weight torch.Size([384])
267
- encoder.modality_transformers.depth.blocks.0.norm2.weight torch.Size([384])
268
- encoder.modality_transformers.depth.blocks.0.mlp.w1.weight torch.Size([256, 384])
269
- encoder.modality_transformers.depth.blocks.0.mlp.w2.weight torch.Size([384, 256])
270
- encoder.modality_transformers.depth.blocks.0.mlp.w3.weight torch.Size([256, 384])
271
- encoder.modality_transformers.depth.blocks.1.attn.in_proj_weight torch.Size([1152, 384])
272
- encoder.modality_transformers.depth.blocks.1.attn.in_proj_bias torch.Size([1152])
273
- encoder.modality_transformers.depth.blocks.1.attn.bias_k torch.Size([1, 1, 384])
274
- encoder.modality_transformers.depth.blocks.1.attn.bias_v torch.Size([1, 1, 384])
275
- encoder.modality_transformers.depth.blocks.1.attn.out_proj.weight torch.Size([384, 384])
276
- encoder.modality_transformers.depth.blocks.1.attn.out_proj.bias torch.Size([384])
277
- encoder.modality_transformers.depth.blocks.1.norm1.weight torch.Size([384])
278
- encoder.modality_transformers.depth.blocks.1.norm2.weight torch.Size([384])
279
- encoder.modality_transformers.depth.blocks.1.mlp.w1.weight torch.Size([256, 384])
280
- encoder.modality_transformers.depth.blocks.1.mlp.w2.weight torch.Size([384, 256])
281
- encoder.modality_transformers.depth.blocks.1.mlp.w3.weight torch.Size([256, 384])
282
- encoder.modality_transformers.depth.blocks.2.attn.in_proj_weight torch.Size([1152, 384])
283
- encoder.modality_transformers.depth.blocks.2.attn.in_proj_bias torch.Size([1152])
284
- encoder.modality_transformers.depth.blocks.2.attn.bias_k torch.Size([1, 1, 384])
285
- encoder.modality_transformers.depth.blocks.2.attn.bias_v torch.Size([1, 1, 384])
286
- encoder.modality_transformers.depth.blocks.2.attn.out_proj.weight torch.Size([384, 384])
287
- encoder.modality_transformers.depth.blocks.2.attn.out_proj.bias torch.Size([384])
288
- encoder.modality_transformers.depth.blocks.2.norm1.weight torch.Size([384])
289
- encoder.modality_transformers.depth.blocks.2.norm2.weight torch.Size([384])
290
- encoder.modality_transformers.depth.blocks.2.mlp.w1.weight torch.Size([256, 384])
291
- encoder.modality_transformers.depth.blocks.2.mlp.w2.weight torch.Size([384, 256])
292
- encoder.modality_transformers.depth.blocks.2.mlp.w3.weight torch.Size([256, 384])
293
- encoder.modality_transformers.depth.blocks.3.attn.in_proj_weight torch.Size([1152, 384])
294
- encoder.modality_transformers.depth.blocks.3.attn.in_proj_bias torch.Size([1152])
295
- encoder.modality_transformers.depth.blocks.3.attn.bias_k torch.Size([1, 1, 384])
296
- encoder.modality_transformers.depth.blocks.3.attn.bias_v torch.Size([1, 1, 384])
297
- encoder.modality_transformers.depth.blocks.3.attn.out_proj.weight torch.Size([384, 384])
298
- encoder.modality_transformers.depth.blocks.3.attn.out_proj.bias torch.Size([384])
299
- encoder.modality_transformers.depth.blocks.3.norm1.weight torch.Size([384])
300
- encoder.modality_transformers.depth.blocks.3.norm2.weight torch.Size([384])
301
- encoder.modality_transformers.depth.blocks.3.mlp.w1.weight torch.Size([256, 384])
302
- encoder.modality_transformers.depth.blocks.3.mlp.w2.weight torch.Size([384, 256])
303
- encoder.modality_transformers.depth.blocks.3.mlp.w3.weight torch.Size([256, 384])
304
- encoder.modality_transformers.depth.blocks.4.attn.in_proj_weight torch.Size([1152, 384])
305
- encoder.modality_transformers.depth.blocks.4.attn.in_proj_bias torch.Size([1152])
306
- encoder.modality_transformers.depth.blocks.4.attn.bias_k torch.Size([1, 1, 384])
307
- encoder.modality_transformers.depth.blocks.4.attn.bias_v torch.Size([1, 1, 384])
308
- encoder.modality_transformers.depth.blocks.4.attn.out_proj.weight torch.Size([384, 384])
309
- encoder.modality_transformers.depth.blocks.4.attn.out_proj.bias torch.Size([384])
310
- encoder.modality_transformers.depth.blocks.4.norm1.weight torch.Size([384])
311
- encoder.modality_transformers.depth.blocks.4.norm2.weight torch.Size([384])
312
- encoder.modality_transformers.depth.blocks.4.mlp.w1.weight torch.Size([256, 384])
313
- encoder.modality_transformers.depth.blocks.4.mlp.w2.weight torch.Size([384, 256])
314
- encoder.modality_transformers.depth.blocks.4.mlp.w3.weight torch.Size([256, 384])
315
- encoder.modality_transformers.depth.blocks.5.attn.in_proj_weight torch.Size([1152, 384])
316
- encoder.modality_transformers.depth.blocks.5.attn.in_proj_bias torch.Size([1152])
317
- encoder.modality_transformers.depth.blocks.5.attn.bias_k torch.Size([1, 1, 384])
318
- encoder.modality_transformers.depth.blocks.5.attn.bias_v torch.Size([1, 1, 384])
319
- encoder.modality_transformers.depth.blocks.5.attn.out_proj.weight torch.Size([384, 384])
320
- encoder.modality_transformers.depth.blocks.5.attn.out_proj.bias torch.Size([384])
321
- encoder.modality_transformers.depth.blocks.5.norm1.weight torch.Size([384])
322
- encoder.modality_transformers.depth.blocks.5.norm2.weight torch.Size([384])
323
- encoder.modality_transformers.depth.blocks.5.mlp.w1.weight torch.Size([256, 384])
324
- encoder.modality_transformers.depth.blocks.5.mlp.w2.weight torch.Size([384, 256])
325
- encoder.modality_transformers.depth.blocks.5.mlp.w3.weight torch.Size([256, 384])
326
- encoder.modality_transformers.thermal.pre_transformer_layer.0.weight torch.Size([768])
327
- encoder.modality_transformers.thermal.blocks.0.attn.in_proj_weight torch.Size([2304, 768])
328
- encoder.modality_transformers.thermal.blocks.0.attn.in_proj_bias torch.Size([2304])
329
- encoder.modality_transformers.thermal.blocks.0.attn.bias_k torch.Size([1, 1, 768])
330
- encoder.modality_transformers.thermal.blocks.0.attn.bias_v torch.Size([1, 1, 768])
331
- encoder.modality_transformers.thermal.blocks.0.attn.out_proj.weight torch.Size([768, 768])
332
- encoder.modality_transformers.thermal.blocks.0.attn.out_proj.bias torch.Size([768])
333
- encoder.modality_transformers.thermal.blocks.0.norm1.weight torch.Size([768])
334
- encoder.modality_transformers.thermal.blocks.0.norm2.weight torch.Size([768])
335
- encoder.modality_transformers.thermal.blocks.0.mlp.w1.weight torch.Size([512, 768])
336
- encoder.modality_transformers.thermal.blocks.0.mlp.w2.weight torch.Size([768, 512])
337
- encoder.modality_transformers.thermal.blocks.0.mlp.w3.weight torch.Size([512, 768])
338
- encoder.modality_transformers.thermal.blocks.1.attn.in_proj_weight torch.Size([2304, 768])
339
- encoder.modality_transformers.thermal.blocks.1.attn.in_proj_bias torch.Size([2304])
340
- encoder.modality_transformers.thermal.blocks.1.attn.bias_k torch.Size([1, 1, 768])
341
- encoder.modality_transformers.thermal.blocks.1.attn.bias_v torch.Size([1, 1, 768])
342
- encoder.modality_transformers.thermal.blocks.1.attn.out_proj.weight torch.Size([768, 768])
343
- encoder.modality_transformers.thermal.blocks.1.attn.out_proj.bias torch.Size([768])
344
- encoder.modality_transformers.thermal.blocks.1.norm1.weight torch.Size([768])
345
- encoder.modality_transformers.thermal.blocks.1.norm2.weight torch.Size([768])
346
- encoder.modality_transformers.thermal.blocks.1.mlp.w1.weight torch.Size([512, 768])
347
- encoder.modality_transformers.thermal.blocks.1.mlp.w2.weight torch.Size([768, 512])
348
- encoder.modality_transformers.thermal.blocks.1.mlp.w3.weight torch.Size([512, 768])
349
- encoder.modality_transformers.thermal.blocks.2.attn.in_proj_weight torch.Size([2304, 768])
350
- encoder.modality_transformers.thermal.blocks.2.attn.in_proj_bias torch.Size([2304])
351
- encoder.modality_transformers.thermal.blocks.2.attn.bias_k torch.Size([1, 1, 768])
352
- encoder.modality_transformers.thermal.blocks.2.attn.bias_v torch.Size([1, 1, 768])
353
- encoder.modality_transformers.thermal.blocks.2.attn.out_proj.weight torch.Size([768, 768])
354
- encoder.modality_transformers.thermal.blocks.2.attn.out_proj.bias torch.Size([768])
355
- encoder.modality_transformers.thermal.blocks.2.norm1.weight torch.Size([768])
356
- encoder.modality_transformers.thermal.blocks.2.norm2.weight torch.Size([768])
357
- encoder.modality_transformers.thermal.blocks.2.mlp.w1.weight torch.Size([512, 768])
358
- encoder.modality_transformers.thermal.blocks.2.mlp.w2.weight torch.Size([768, 512])
359
- encoder.modality_transformers.thermal.blocks.2.mlp.w3.weight torch.Size([512, 768])
360
- encoder.modality_transformers.thermal.blocks.3.attn.in_proj_weight torch.Size([2304, 768])
361
- encoder.modality_transformers.thermal.blocks.3.attn.in_proj_bias torch.Size([2304])
362
- encoder.modality_transformers.thermal.blocks.3.attn.bias_k torch.Size([1, 1, 768])
363
- encoder.modality_transformers.thermal.blocks.3.attn.bias_v torch.Size([1, 1, 768])
364
- encoder.modality_transformers.thermal.blocks.3.attn.out_proj.weight torch.Size([768, 768])
365
- encoder.modality_transformers.thermal.blocks.3.attn.out_proj.bias torch.Size([768])
366
- encoder.modality_transformers.thermal.blocks.3.norm1.weight torch.Size([768])
367
- encoder.modality_transformers.thermal.blocks.3.norm2.weight torch.Size([768])
368
- encoder.modality_transformers.thermal.blocks.3.mlp.w1.weight torch.Size([512, 768])
369
- encoder.modality_transformers.thermal.blocks.3.mlp.w2.weight torch.Size([768, 512])
370
- encoder.modality_transformers.thermal.blocks.3.mlp.w3.weight torch.Size([512, 768])
371
- encoder.modality_transformers.thermal.blocks.4.attn.in_proj_weight torch.Size([2304, 768])
372
- encoder.modality_transformers.thermal.blocks.4.attn.in_proj_bias torch.Size([2304])
373
- encoder.modality_transformers.thermal.blocks.4.attn.bias_k torch.Size([1, 1, 768])
374
- encoder.modality_transformers.thermal.blocks.4.attn.bias_v torch.Size([1, 1, 768])
375
- encoder.modality_transformers.thermal.blocks.4.attn.out_proj.weight torch.Size([768, 768])
376
- encoder.modality_transformers.thermal.blocks.4.attn.out_proj.bias torch.Size([768])
377
- encoder.modality_transformers.thermal.blocks.4.norm1.weight torch.Size([768])
378
- encoder.modality_transformers.thermal.blocks.4.norm2.weight torch.Size([768])
379
- encoder.modality_transformers.thermal.blocks.4.mlp.w1.weight torch.Size([512, 768])
380
- encoder.modality_transformers.thermal.blocks.4.mlp.w2.weight torch.Size([768, 512])
381
- encoder.modality_transformers.thermal.blocks.4.mlp.w3.weight torch.Size([512, 768])
382
- encoder.modality_transformers.thermal.blocks.5.attn.in_proj_weight torch.Size([2304, 768])
383
- encoder.modality_transformers.thermal.blocks.5.attn.in_proj_bias torch.Size([2304])
384
- encoder.modality_transformers.thermal.blocks.5.attn.bias_k torch.Size([1, 1, 768])
385
- encoder.modality_transformers.thermal.blocks.5.attn.bias_v torch.Size([1, 1, 768])
386
- encoder.modality_transformers.thermal.blocks.5.attn.out_proj.weight torch.Size([768, 768])
387
- encoder.modality_transformers.thermal.blocks.5.attn.out_proj.bias torch.Size([768])
388
- encoder.modality_transformers.thermal.blocks.5.norm1.weight torch.Size([768])
389
- encoder.modality_transformers.thermal.blocks.5.norm2.weight torch.Size([768])
390
- encoder.modality_transformers.thermal.blocks.5.mlp.w1.weight torch.Size([512, 768])
391
- encoder.modality_transformers.thermal.blocks.5.mlp.w2.weight torch.Size([768, 512])
392
- encoder.modality_transformers.thermal.blocks.5.mlp.w3.weight torch.Size([512, 768])
393
- encoder.modality_heads.vision.0.weight torch.Size([768])
394
- encoder.modality_heads.vision.2.weight torch.Size([1024, 768])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
395
  encoder.modality_heads.audio.0.weight torch.Size([768])
 
396
  encoder.modality_heads.audio.2.weight torch.Size([1024, 768])
397
- encoder.modality_heads.depth.0.weight torch.Size([384])
398
- encoder.modality_heads.depth.2.weight torch.Size([1024, 384])
399
- encoder.modality_heads.thermal.0.weight torch.Size([768])
400
- encoder.modality_heads.thermal.2.weight torch.Size([1024, 768])
401
  reasoner.model.embed_tokens.weight torch.Size([151936, 896])
402
  reasoner.model.layers.0.self_attn.q_proj.weight torch.Size([896, 896])
403
  reasoner.model.layers.0.self_attn.q_proj.bias torch.Size([896])
 
1
+ Qwen2-0.5B as the Reasoner for tisrt sucsessfull multimodal-realtime inference.
2
+
3
+ On a MacBook Air M1 8GB with ca. 3s latency.
4
+ On RTX 4090 with ca. 800ms.
5
+
6
+
7
+ encoder.modality_preprocessors.vision.cls_token torch.Size([1, 1, 1280])
8
+ encoder.modality_preprocessors.vision.rgbt_stem.proj.1.weight torch.Size([1280, 3, 2, 14, 14])
9
+ encoder.modality_preprocessors.vision.pos_embedding_helper.pos_embed torch.Size([1, 257, 1280])
10
  encoder.modality_preprocessors.audio.cls_token torch.Size([1, 1, 768])
11
  encoder.modality_preprocessors.audio.rgbt_stem.proj.weight torch.Size([768, 1, 16, 16])
12
  encoder.modality_preprocessors.audio.rgbt_stem.norm_layer.weight torch.Size([768])
13
+ encoder.modality_preprocessors.audio.rgbt_stem.norm_layer.bias torch.Size([768])
14
  encoder.modality_preprocessors.audio.pos_embedding_helper.pos_embed torch.Size([1, 229, 768])
15
+ encoder.modality_trunks.vision.pre_transformer_layer.0.weight torch.Size([1280])
16
+ encoder.modality_trunks.vision.pre_transformer_layer.0.bias torch.Size([1280])
17
+ encoder.modality_trunks.vision.blocks.0.attn.in_proj_weight torch.Size([3840, 1280])
18
+ encoder.modality_trunks.vision.blocks.0.attn.in_proj_bias torch.Size([3840])
19
+ encoder.modality_trunks.vision.blocks.0.attn.out_proj.weight torch.Size([1280, 1280])
20
+ encoder.modality_trunks.vision.blocks.0.attn.out_proj.bias torch.Size([1280])
21
+ encoder.modality_trunks.vision.blocks.0.norm_1.weight torch.Size([1280])
22
+ encoder.modality_trunks.vision.blocks.0.norm_1.bias torch.Size([1280])
23
+ encoder.modality_trunks.vision.blocks.0.mlp.fc1.weight torch.Size([5120, 1280])
24
+ encoder.modality_trunks.vision.blocks.0.mlp.fc1.bias torch.Size([5120])
25
+ encoder.modality_trunks.vision.blocks.0.mlp.fc2.weight torch.Size([1280, 5120])
26
+ encoder.modality_trunks.vision.blocks.0.mlp.fc2.bias torch.Size([1280])
27
+ encoder.modality_trunks.vision.blocks.0.norm_2.weight torch.Size([1280])
28
+ encoder.modality_trunks.vision.blocks.0.norm_2.bias torch.Size([1280])
29
+ encoder.modality_trunks.vision.blocks.1.attn.in_proj_weight torch.Size([3840, 1280])
30
+ encoder.modality_trunks.vision.blocks.1.attn.in_proj_bias torch.Size([3840])
31
+ encoder.modality_trunks.vision.blocks.1.attn.out_proj.weight torch.Size([1280, 1280])
32
+ encoder.modality_trunks.vision.blocks.1.attn.out_proj.bias torch.Size([1280])
33
+ encoder.modality_trunks.vision.blocks.1.norm_1.weight torch.Size([1280])
34
+ encoder.modality_trunks.vision.blocks.1.norm_1.bias torch.Size([1280])
35
+ encoder.modality_trunks.vision.blocks.1.mlp.fc1.weight torch.Size([5120, 1280])
36
+ encoder.modality_trunks.vision.blocks.1.mlp.fc1.bias torch.Size([5120])
37
+ encoder.modality_trunks.vision.blocks.1.mlp.fc2.weight torch.Size([1280, 5120])
38
+ encoder.modality_trunks.vision.blocks.1.mlp.fc2.bias torch.Size([1280])
39
+ encoder.modality_trunks.vision.blocks.1.norm_2.weight torch.Size([1280])
40
+ encoder.modality_trunks.vision.blocks.1.norm_2.bias torch.Size([1280])
41
+ encoder.modality_trunks.vision.blocks.2.attn.in_proj_weight torch.Size([3840, 1280])
42
+ encoder.modality_trunks.vision.blocks.2.attn.in_proj_bias torch.Size([3840])
43
+ encoder.modality_trunks.vision.blocks.2.attn.out_proj.weight torch.Size([1280, 1280])
44
+ encoder.modality_trunks.vision.blocks.2.attn.out_proj.bias torch.Size([1280])
45
+ encoder.modality_trunks.vision.blocks.2.norm_1.weight torch.Size([1280])
46
+ encoder.modality_trunks.vision.blocks.2.norm_1.bias torch.Size([1280])
47
+ encoder.modality_trunks.vision.blocks.2.mlp.fc1.weight torch.Size([5120, 1280])
48
+ encoder.modality_trunks.vision.blocks.2.mlp.fc1.bias torch.Size([5120])
49
+ encoder.modality_trunks.vision.blocks.2.mlp.fc2.weight torch.Size([1280, 5120])
50
+ encoder.modality_trunks.vision.blocks.2.mlp.fc2.bias torch.Size([1280])
51
+ encoder.modality_trunks.vision.blocks.2.norm_2.weight torch.Size([1280])
52
+ encoder.modality_trunks.vision.blocks.2.norm_2.bias torch.Size([1280])
53
+ encoder.modality_trunks.vision.blocks.3.attn.in_proj_weight torch.Size([3840, 1280])
54
+ encoder.modality_trunks.vision.blocks.3.attn.in_proj_bias torch.Size([3840])
55
+ encoder.modality_trunks.vision.blocks.3.attn.out_proj.weight torch.Size([1280, 1280])
56
+ encoder.modality_trunks.vision.blocks.3.attn.out_proj.bias torch.Size([1280])
57
+ encoder.modality_trunks.vision.blocks.3.norm_1.weight torch.Size([1280])
58
+ encoder.modality_trunks.vision.blocks.3.norm_1.bias torch.Size([1280])
59
+ encoder.modality_trunks.vision.blocks.3.mlp.fc1.weight torch.Size([5120, 1280])
60
+ encoder.modality_trunks.vision.blocks.3.mlp.fc1.bias torch.Size([5120])
61
+ encoder.modality_trunks.vision.blocks.3.mlp.fc2.weight torch.Size([1280, 5120])
62
+ encoder.modality_trunks.vision.blocks.3.mlp.fc2.bias torch.Size([1280])
63
+ encoder.modality_trunks.vision.blocks.3.norm_2.weight torch.Size([1280])
64
+ encoder.modality_trunks.vision.blocks.3.norm_2.bias torch.Size([1280])
65
+ encoder.modality_trunks.vision.blocks.4.attn.in_proj_weight torch.Size([3840, 1280])
66
+ encoder.modality_trunks.vision.blocks.4.attn.in_proj_bias torch.Size([3840])
67
+ encoder.modality_trunks.vision.blocks.4.attn.out_proj.weight torch.Size([1280, 1280])
68
+ encoder.modality_trunks.vision.blocks.4.attn.out_proj.bias torch.Size([1280])
69
+ encoder.modality_trunks.vision.blocks.4.norm_1.weight torch.Size([1280])
70
+ encoder.modality_trunks.vision.blocks.4.norm_1.bias torch.Size([1280])
71
+ encoder.modality_trunks.vision.blocks.4.mlp.fc1.weight torch.Size([5120, 1280])
72
+ encoder.modality_trunks.vision.blocks.4.mlp.fc1.bias torch.Size([5120])
73
+ encoder.modality_trunks.vision.blocks.4.mlp.fc2.weight torch.Size([1280, 5120])
74
+ encoder.modality_trunks.vision.blocks.4.mlp.fc2.bias torch.Size([1280])
75
+ encoder.modality_trunks.vision.blocks.4.norm_2.weight torch.Size([1280])
76
+ encoder.modality_trunks.vision.blocks.4.norm_2.bias torch.Size([1280])
77
+ encoder.modality_trunks.vision.blocks.5.attn.in_proj_weight torch.Size([3840, 1280])
78
+ encoder.modality_trunks.vision.blocks.5.attn.in_proj_bias torch.Size([3840])
79
+ encoder.modality_trunks.vision.blocks.5.attn.out_proj.weight torch.Size([1280, 1280])
80
+ encoder.modality_trunks.vision.blocks.5.attn.out_proj.bias torch.Size([1280])
81
+ encoder.modality_trunks.vision.blocks.5.norm_1.weight torch.Size([1280])
82
+ encoder.modality_trunks.vision.blocks.5.norm_1.bias torch.Size([1280])
83
+ encoder.modality_trunks.vision.blocks.5.mlp.fc1.weight torch.Size([5120, 1280])
84
+ encoder.modality_trunks.vision.blocks.5.mlp.fc1.bias torch.Size([5120])
85
+ encoder.modality_trunks.vision.blocks.5.mlp.fc2.weight torch.Size([1280, 5120])
86
+ encoder.modality_trunks.vision.blocks.5.mlp.fc2.bias torch.Size([1280])
87
+ encoder.modality_trunks.vision.blocks.5.norm_2.weight torch.Size([1280])
88
+ encoder.modality_trunks.vision.blocks.5.norm_2.bias torch.Size([1280])
89
+ encoder.modality_trunks.vision.blocks.6.attn.in_proj_weight torch.Size([3840, 1280])
90
+ encoder.modality_trunks.vision.blocks.6.attn.in_proj_bias torch.Size([3840])
91
+ encoder.modality_trunks.vision.blocks.6.attn.out_proj.weight torch.Size([1280, 1280])
92
+ encoder.modality_trunks.vision.blocks.6.attn.out_proj.bias torch.Size([1280])
93
+ encoder.modality_trunks.vision.blocks.6.norm_1.weight torch.Size([1280])
94
+ encoder.modality_trunks.vision.blocks.6.norm_1.bias torch.Size([1280])
95
+ encoder.modality_trunks.vision.blocks.6.mlp.fc1.weight torch.Size([5120, 1280])
96
+ encoder.modality_trunks.vision.blocks.6.mlp.fc1.bias torch.Size([5120])
97
+ encoder.modality_trunks.vision.blocks.6.mlp.fc2.weight torch.Size([1280, 5120])
98
+ encoder.modality_trunks.vision.blocks.6.mlp.fc2.bias torch.Size([1280])
99
+ encoder.modality_trunks.vision.blocks.6.norm_2.weight torch.Size([1280])
100
+ encoder.modality_trunks.vision.blocks.6.norm_2.bias torch.Size([1280])
101
+ encoder.modality_trunks.vision.blocks.7.attn.in_proj_weight torch.Size([3840, 1280])
102
+ encoder.modality_trunks.vision.blocks.7.attn.in_proj_bias torch.Size([3840])
103
+ encoder.modality_trunks.vision.blocks.7.attn.out_proj.weight torch.Size([1280, 1280])
104
+ encoder.modality_trunks.vision.blocks.7.attn.out_proj.bias torch.Size([1280])
105
+ encoder.modality_trunks.vision.blocks.7.norm_1.weight torch.Size([1280])
106
+ encoder.modality_trunks.vision.blocks.7.norm_1.bias torch.Size([1280])
107
+ encoder.modality_trunks.vision.blocks.7.mlp.fc1.weight torch.Size([5120, 1280])
108
+ encoder.modality_trunks.vision.blocks.7.mlp.fc1.bias torch.Size([5120])
109
+ encoder.modality_trunks.vision.blocks.7.mlp.fc2.weight torch.Size([1280, 5120])
110
+ encoder.modality_trunks.vision.blocks.7.mlp.fc2.bias torch.Size([1280])
111
+ encoder.modality_trunks.vision.blocks.7.norm_2.weight torch.Size([1280])
112
+ encoder.modality_trunks.vision.blocks.7.norm_2.bias torch.Size([1280])
113
+ encoder.modality_trunks.vision.blocks.8.attn.in_proj_weight torch.Size([3840, 1280])
114
+ encoder.modality_trunks.vision.blocks.8.attn.in_proj_bias torch.Size([3840])
115
+ encoder.modality_trunks.vision.blocks.8.attn.out_proj.weight torch.Size([1280, 1280])
116
+ encoder.modality_trunks.vision.blocks.8.attn.out_proj.bias torch.Size([1280])
117
+ encoder.modality_trunks.vision.blocks.8.norm_1.weight torch.Size([1280])
118
+ encoder.modality_trunks.vision.blocks.8.norm_1.bias torch.Size([1280])
119
+ encoder.modality_trunks.vision.blocks.8.mlp.fc1.weight torch.Size([5120, 1280])
120
+ encoder.modality_trunks.vision.blocks.8.mlp.fc1.bias torch.Size([5120])
121
+ encoder.modality_trunks.vision.blocks.8.mlp.fc2.weight torch.Size([1280, 5120])
122
+ encoder.modality_trunks.vision.blocks.8.mlp.fc2.bias torch.Size([1280])
123
+ encoder.modality_trunks.vision.blocks.8.norm_2.weight torch.Size([1280])
124
+ encoder.modality_trunks.vision.blocks.8.norm_2.bias torch.Size([1280])
125
+ encoder.modality_trunks.vision.blocks.9.attn.in_proj_weight torch.Size([3840, 1280])
126
+ encoder.modality_trunks.vision.blocks.9.attn.in_proj_bias torch.Size([3840])
127
+ encoder.modality_trunks.vision.blocks.9.attn.out_proj.weight torch.Size([1280, 1280])
128
+ encoder.modality_trunks.vision.blocks.9.attn.out_proj.bias torch.Size([1280])
129
+ encoder.modality_trunks.vision.blocks.9.norm_1.weight torch.Size([1280])
130
+ encoder.modality_trunks.vision.blocks.9.norm_1.bias torch.Size([1280])
131
+ encoder.modality_trunks.vision.blocks.9.mlp.fc1.weight torch.Size([5120, 1280])
132
+ encoder.modality_trunks.vision.blocks.9.mlp.fc1.bias torch.Size([5120])
133
+ encoder.modality_trunks.vision.blocks.9.mlp.fc2.weight torch.Size([1280, 5120])
134
+ encoder.modality_trunks.vision.blocks.9.mlp.fc2.bias torch.Size([1280])
135
+ encoder.modality_trunks.vision.blocks.9.norm_2.weight torch.Size([1280])
136
+ encoder.modality_trunks.vision.blocks.9.norm_2.bias torch.Size([1280])
137
+ encoder.modality_trunks.vision.blocks.10.attn.in_proj_weight torch.Size([3840, 1280])
138
+ encoder.modality_trunks.vision.blocks.10.attn.in_proj_bias torch.Size([3840])
139
+ encoder.modality_trunks.vision.blocks.10.attn.out_proj.weight torch.Size([1280, 1280])
140
+ encoder.modality_trunks.vision.blocks.10.attn.out_proj.bias torch.Size([1280])
141
+ encoder.modality_trunks.vision.blocks.10.norm_1.weight torch.Size([1280])
142
+ encoder.modality_trunks.vision.blocks.10.norm_1.bias torch.Size([1280])
143
+ encoder.modality_trunks.vision.blocks.10.mlp.fc1.weight torch.Size([5120, 1280])
144
+ encoder.modality_trunks.vision.blocks.10.mlp.fc1.bias torch.Size([5120])
145
+ encoder.modality_trunks.vision.blocks.10.mlp.fc2.weight torch.Size([1280, 5120])
146
+ encoder.modality_trunks.vision.blocks.10.mlp.fc2.bias torch.Size([1280])
147
+ encoder.modality_trunks.vision.blocks.10.norm_2.weight torch.Size([1280])
148
+ encoder.modality_trunks.vision.blocks.10.norm_2.bias torch.Size([1280])
149
+ encoder.modality_trunks.vision.blocks.11.attn.in_proj_weight torch.Size([3840, 1280])
150
+ encoder.modality_trunks.vision.blocks.11.attn.in_proj_bias torch.Size([3840])
151
+ encoder.modality_trunks.vision.blocks.11.attn.out_proj.weight torch.Size([1280, 1280])
152
+ encoder.modality_trunks.vision.blocks.11.attn.out_proj.bias torch.Size([1280])
153
+ encoder.modality_trunks.vision.blocks.11.norm_1.weight torch.Size([1280])
154
+ encoder.modality_trunks.vision.blocks.11.norm_1.bias torch.Size([1280])
155
+ encoder.modality_trunks.vision.blocks.11.mlp.fc1.weight torch.Size([5120, 1280])
156
+ encoder.modality_trunks.vision.blocks.11.mlp.fc1.bias torch.Size([5120])
157
+ encoder.modality_trunks.vision.blocks.11.mlp.fc2.weight torch.Size([1280, 5120])
158
+ encoder.modality_trunks.vision.blocks.11.mlp.fc2.bias torch.Size([1280])
159
+ encoder.modality_trunks.vision.blocks.11.norm_2.weight torch.Size([1280])
160
+ encoder.modality_trunks.vision.blocks.11.norm_2.bias torch.Size([1280])
161
+ encoder.modality_trunks.vision.blocks.12.attn.in_proj_weight torch.Size([3840, 1280])
162
+ encoder.modality_trunks.vision.blocks.12.attn.in_proj_bias torch.Size([3840])
163
+ encoder.modality_trunks.vision.blocks.12.attn.out_proj.weight torch.Size([1280, 1280])
164
+ encoder.modality_trunks.vision.blocks.12.attn.out_proj.bias torch.Size([1280])
165
+ encoder.modality_trunks.vision.blocks.12.norm_1.weight torch.Size([1280])
166
+ encoder.modality_trunks.vision.blocks.12.norm_1.bias torch.Size([1280])
167
+ encoder.modality_trunks.vision.blocks.12.mlp.fc1.weight torch.Size([5120, 1280])
168
+ encoder.modality_trunks.vision.blocks.12.mlp.fc1.bias torch.Size([5120])
169
+ encoder.modality_trunks.vision.blocks.12.mlp.fc2.weight torch.Size([1280, 5120])
170
+ encoder.modality_trunks.vision.blocks.12.mlp.fc2.bias torch.Size([1280])
171
+ encoder.modality_trunks.vision.blocks.12.norm_2.weight torch.Size([1280])
172
+ encoder.modality_trunks.vision.blocks.12.norm_2.bias torch.Size([1280])
173
+ encoder.modality_trunks.vision.blocks.13.attn.in_proj_weight torch.Size([3840, 1280])
174
+ encoder.modality_trunks.vision.blocks.13.attn.in_proj_bias torch.Size([3840])
175
+ encoder.modality_trunks.vision.blocks.13.attn.out_proj.weight torch.Size([1280, 1280])
176
+ encoder.modality_trunks.vision.blocks.13.attn.out_proj.bias torch.Size([1280])
177
+ encoder.modality_trunks.vision.blocks.13.norm_1.weight torch.Size([1280])
178
+ encoder.modality_trunks.vision.blocks.13.norm_1.bias torch.Size([1280])
179
+ encoder.modality_trunks.vision.blocks.13.mlp.fc1.weight torch.Size([5120, 1280])
180
+ encoder.modality_trunks.vision.blocks.13.mlp.fc1.bias torch.Size([5120])
181
+ encoder.modality_trunks.vision.blocks.13.mlp.fc2.weight torch.Size([1280, 5120])
182
+ encoder.modality_trunks.vision.blocks.13.mlp.fc2.bias torch.Size([1280])
183
+ encoder.modality_trunks.vision.blocks.13.norm_2.weight torch.Size([1280])
184
+ encoder.modality_trunks.vision.blocks.13.norm_2.bias torch.Size([1280])
185
+ encoder.modality_trunks.vision.blocks.14.attn.in_proj_weight torch.Size([3840, 1280])
186
+ encoder.modality_trunks.vision.blocks.14.attn.in_proj_bias torch.Size([3840])
187
+ encoder.modality_trunks.vision.blocks.14.attn.out_proj.weight torch.Size([1280, 1280])
188
+ encoder.modality_trunks.vision.blocks.14.attn.out_proj.bias torch.Size([1280])
189
+ encoder.modality_trunks.vision.blocks.14.norm_1.weight torch.Size([1280])
190
+ encoder.modality_trunks.vision.blocks.14.norm_1.bias torch.Size([1280])
191
+ encoder.modality_trunks.vision.blocks.14.mlp.fc1.weight torch.Size([5120, 1280])
192
+ encoder.modality_trunks.vision.blocks.14.mlp.fc1.bias torch.Size([5120])
193
+ encoder.modality_trunks.vision.blocks.14.mlp.fc2.weight torch.Size([1280, 5120])
194
+ encoder.modality_trunks.vision.blocks.14.mlp.fc2.bias torch.Size([1280])
195
+ encoder.modality_trunks.vision.blocks.14.norm_2.weight torch.Size([1280])
196
+ encoder.modality_trunks.vision.blocks.14.norm_2.bias torch.Size([1280])
197
+ encoder.modality_trunks.vision.blocks.15.attn.in_proj_weight torch.Size([3840, 1280])
198
+ encoder.modality_trunks.vision.blocks.15.attn.in_proj_bias torch.Size([3840])
199
+ encoder.modality_trunks.vision.blocks.15.attn.out_proj.weight torch.Size([1280, 1280])
200
+ encoder.modality_trunks.vision.blocks.15.attn.out_proj.bias torch.Size([1280])
201
+ encoder.modality_trunks.vision.blocks.15.norm_1.weight torch.Size([1280])
202
+ encoder.modality_trunks.vision.blocks.15.norm_1.bias torch.Size([1280])
203
+ encoder.modality_trunks.vision.blocks.15.mlp.fc1.weight torch.Size([5120, 1280])
204
+ encoder.modality_trunks.vision.blocks.15.mlp.fc1.bias torch.Size([5120])
205
+ encoder.modality_trunks.vision.blocks.15.mlp.fc2.weight torch.Size([1280, 5120])
206
+ encoder.modality_trunks.vision.blocks.15.mlp.fc2.bias torch.Size([1280])
207
+ encoder.modality_trunks.vision.blocks.15.norm_2.weight torch.Size([1280])
208
+ encoder.modality_trunks.vision.blocks.15.norm_2.bias torch.Size([1280])
209
+ encoder.modality_trunks.vision.blocks.16.attn.in_proj_weight torch.Size([3840, 1280])
210
+ encoder.modality_trunks.vision.blocks.16.attn.in_proj_bias torch.Size([3840])
211
+ encoder.modality_trunks.vision.blocks.16.attn.out_proj.weight torch.Size([1280, 1280])
212
+ encoder.modality_trunks.vision.blocks.16.attn.out_proj.bias torch.Size([1280])
213
+ encoder.modality_trunks.vision.blocks.16.norm_1.weight torch.Size([1280])
214
+ encoder.modality_trunks.vision.blocks.16.norm_1.bias torch.Size([1280])
215
+ encoder.modality_trunks.vision.blocks.16.mlp.fc1.weight torch.Size([5120, 1280])
216
+ encoder.modality_trunks.vision.blocks.16.mlp.fc1.bias torch.Size([5120])
217
+ encoder.modality_trunks.vision.blocks.16.mlp.fc2.weight torch.Size([1280, 5120])
218
+ encoder.modality_trunks.vision.blocks.16.mlp.fc2.bias torch.Size([1280])
219
+ encoder.modality_trunks.vision.blocks.16.norm_2.weight torch.Size([1280])
220
+ encoder.modality_trunks.vision.blocks.16.norm_2.bias torch.Size([1280])
221
+ encoder.modality_trunks.vision.blocks.17.attn.in_proj_weight torch.Size([3840, 1280])
222
+ encoder.modality_trunks.vision.blocks.17.attn.in_proj_bias torch.Size([3840])
223
+ encoder.modality_trunks.vision.blocks.17.attn.out_proj.weight torch.Size([1280, 1280])
224
+ encoder.modality_trunks.vision.blocks.17.attn.out_proj.bias torch.Size([1280])
225
+ encoder.modality_trunks.vision.blocks.17.norm_1.weight torch.Size([1280])
226
+ encoder.modality_trunks.vision.blocks.17.norm_1.bias torch.Size([1280])
227
+ encoder.modality_trunks.vision.blocks.17.mlp.fc1.weight torch.Size([5120, 1280])
228
+ encoder.modality_trunks.vision.blocks.17.mlp.fc1.bias torch.Size([5120])
229
+ encoder.modality_trunks.vision.blocks.17.mlp.fc2.weight torch.Size([1280, 5120])
230
+ encoder.modality_trunks.vision.blocks.17.mlp.fc2.bias torch.Size([1280])
231
+ encoder.modality_trunks.vision.blocks.17.norm_2.weight torch.Size([1280])
232
+ encoder.modality_trunks.vision.blocks.17.norm_2.bias torch.Size([1280])
233
+ encoder.modality_trunks.vision.blocks.18.attn.in_proj_weight torch.Size([3840, 1280])
234
+ encoder.modality_trunks.vision.blocks.18.attn.in_proj_bias torch.Size([3840])
235
+ encoder.modality_trunks.vision.blocks.18.attn.out_proj.weight torch.Size([1280, 1280])
236
+ encoder.modality_trunks.vision.blocks.18.attn.out_proj.bias torch.Size([1280])
237
+ encoder.modality_trunks.vision.blocks.18.norm_1.weight torch.Size([1280])
238
+ encoder.modality_trunks.vision.blocks.18.norm_1.bias torch.Size([1280])
239
+ encoder.modality_trunks.vision.blocks.18.mlp.fc1.weight torch.Size([5120, 1280])
240
+ encoder.modality_trunks.vision.blocks.18.mlp.fc1.bias torch.Size([5120])
241
+ encoder.modality_trunks.vision.blocks.18.mlp.fc2.weight torch.Size([1280, 5120])
242
+ encoder.modality_trunks.vision.blocks.18.mlp.fc2.bias torch.Size([1280])
243
+ encoder.modality_trunks.vision.blocks.18.norm_2.weight torch.Size([1280])
244
+ encoder.modality_trunks.vision.blocks.18.norm_2.bias torch.Size([1280])
245
+ encoder.modality_trunks.vision.blocks.19.attn.in_proj_weight torch.Size([3840, 1280])
246
+ encoder.modality_trunks.vision.blocks.19.attn.in_proj_bias torch.Size([3840])
247
+ encoder.modality_trunks.vision.blocks.19.attn.out_proj.weight torch.Size([1280, 1280])
248
+ encoder.modality_trunks.vision.blocks.19.attn.out_proj.bias torch.Size([1280])
249
+ encoder.modality_trunks.vision.blocks.19.norm_1.weight torch.Size([1280])
250
+ encoder.modality_trunks.vision.blocks.19.norm_1.bias torch.Size([1280])
251
+ encoder.modality_trunks.vision.blocks.19.mlp.fc1.weight torch.Size([5120, 1280])
252
+ encoder.modality_trunks.vision.blocks.19.mlp.fc1.bias torch.Size([5120])
253
+ encoder.modality_trunks.vision.blocks.19.mlp.fc2.weight torch.Size([1280, 5120])
254
+ encoder.modality_trunks.vision.blocks.19.mlp.fc2.bias torch.Size([1280])
255
+ encoder.modality_trunks.vision.blocks.19.norm_2.weight torch.Size([1280])
256
+ encoder.modality_trunks.vision.blocks.19.norm_2.bias torch.Size([1280])
257
+ encoder.modality_trunks.vision.blocks.20.attn.in_proj_weight torch.Size([3840, 1280])
258
+ encoder.modality_trunks.vision.blocks.20.attn.in_proj_bias torch.Size([3840])
259
+ encoder.modality_trunks.vision.blocks.20.attn.out_proj.weight torch.Size([1280, 1280])
260
+ encoder.modality_trunks.vision.blocks.20.attn.out_proj.bias torch.Size([1280])
261
+ encoder.modality_trunks.vision.blocks.20.norm_1.weight torch.Size([1280])
262
+ encoder.modality_trunks.vision.blocks.20.norm_1.bias torch.Size([1280])
263
+ encoder.modality_trunks.vision.blocks.20.mlp.fc1.weight torch.Size([5120, 1280])
264
+ encoder.modality_trunks.vision.blocks.20.mlp.fc1.bias torch.Size([5120])
265
+ encoder.modality_trunks.vision.blocks.20.mlp.fc2.weight torch.Size([1280, 5120])
266
+ encoder.modality_trunks.vision.blocks.20.mlp.fc2.bias torch.Size([1280])
267
+ encoder.modality_trunks.vision.blocks.20.norm_2.weight torch.Size([1280])
268
+ encoder.modality_trunks.vision.blocks.20.norm_2.bias torch.Size([1280])
269
+ encoder.modality_trunks.vision.blocks.21.attn.in_proj_weight torch.Size([3840, 1280])
270
+ encoder.modality_trunks.vision.blocks.21.attn.in_proj_bias torch.Size([3840])
271
+ encoder.modality_trunks.vision.blocks.21.attn.out_proj.weight torch.Size([1280, 1280])
272
+ encoder.modality_trunks.vision.blocks.21.attn.out_proj.bias torch.Size([1280])
273
+ encoder.modality_trunks.vision.blocks.21.norm_1.weight torch.Size([1280])
274
+ encoder.modality_trunks.vision.blocks.21.norm_1.bias torch.Size([1280])
275
+ encoder.modality_trunks.vision.blocks.21.mlp.fc1.weight torch.Size([5120, 1280])
276
+ encoder.modality_trunks.vision.blocks.21.mlp.fc1.bias torch.Size([5120])
277
+ encoder.modality_trunks.vision.blocks.21.mlp.fc2.weight torch.Size([1280, 5120])
278
+ encoder.modality_trunks.vision.blocks.21.mlp.fc2.bias torch.Size([1280])
279
+ encoder.modality_trunks.vision.blocks.21.norm_2.weight torch.Size([1280])
280
+ encoder.modality_trunks.vision.blocks.21.norm_2.bias torch.Size([1280])
281
+ encoder.modality_trunks.vision.blocks.22.attn.in_proj_weight torch.Size([3840, 1280])
282
+ encoder.modality_trunks.vision.blocks.22.attn.in_proj_bias torch.Size([3840])
283
+ encoder.modality_trunks.vision.blocks.22.attn.out_proj.weight torch.Size([1280, 1280])
284
+ encoder.modality_trunks.vision.blocks.22.attn.out_proj.bias torch.Size([1280])
285
+ encoder.modality_trunks.vision.blocks.22.norm_1.weight torch.Size([1280])
286
+ encoder.modality_trunks.vision.blocks.22.norm_1.bias torch.Size([1280])
287
+ encoder.modality_trunks.vision.blocks.22.mlp.fc1.weight torch.Size([5120, 1280])
288
+ encoder.modality_trunks.vision.blocks.22.mlp.fc1.bias torch.Size([5120])
289
+ encoder.modality_trunks.vision.blocks.22.mlp.fc2.weight torch.Size([1280, 5120])
290
+ encoder.modality_trunks.vision.blocks.22.mlp.fc2.bias torch.Size([1280])
291
+ encoder.modality_trunks.vision.blocks.22.norm_2.weight torch.Size([1280])
292
+ encoder.modality_trunks.vision.blocks.22.norm_2.bias torch.Size([1280])
293
+ encoder.modality_trunks.vision.blocks.23.attn.in_proj_weight torch.Size([3840, 1280])
294
+ encoder.modality_trunks.vision.blocks.23.attn.in_proj_bias torch.Size([3840])
295
+ encoder.modality_trunks.vision.blocks.23.attn.out_proj.weight torch.Size([1280, 1280])
296
+ encoder.modality_trunks.vision.blocks.23.attn.out_proj.bias torch.Size([1280])
297
+ encoder.modality_trunks.vision.blocks.23.norm_1.weight torch.Size([1280])
298
+ encoder.modality_trunks.vision.blocks.23.norm_1.bias torch.Size([1280])
299
+ encoder.modality_trunks.vision.blocks.23.mlp.fc1.weight torch.Size([5120, 1280])
300
+ encoder.modality_trunks.vision.blocks.23.mlp.fc1.bias torch.Size([5120])
301
+ encoder.modality_trunks.vision.blocks.23.mlp.fc2.weight torch.Size([1280, 5120])
302
+ encoder.modality_trunks.vision.blocks.23.mlp.fc2.bias torch.Size([1280])
303
+ encoder.modality_trunks.vision.blocks.23.norm_2.weight torch.Size([1280])
304
+ encoder.modality_trunks.vision.blocks.23.norm_2.bias torch.Size([1280])
305
+ encoder.modality_trunks.vision.blocks.24.attn.in_proj_weight torch.Size([3840, 1280])
306
+ encoder.modality_trunks.vision.blocks.24.attn.in_proj_bias torch.Size([3840])
307
+ encoder.modality_trunks.vision.blocks.24.attn.out_proj.weight torch.Size([1280, 1280])
308
+ encoder.modality_trunks.vision.blocks.24.attn.out_proj.bias torch.Size([1280])
309
+ encoder.modality_trunks.vision.blocks.24.norm_1.weight torch.Size([1280])
310
+ encoder.modality_trunks.vision.blocks.24.norm_1.bias torch.Size([1280])
311
+ encoder.modality_trunks.vision.blocks.24.mlp.fc1.weight torch.Size([5120, 1280])
312
+ encoder.modality_trunks.vision.blocks.24.mlp.fc1.bias torch.Size([5120])
313
+ encoder.modality_trunks.vision.blocks.24.mlp.fc2.weight torch.Size([1280, 5120])
314
+ encoder.modality_trunks.vision.blocks.24.mlp.fc2.bias torch.Size([1280])
315
+ encoder.modality_trunks.vision.blocks.24.norm_2.weight torch.Size([1280])
316
+ encoder.modality_trunks.vision.blocks.24.norm_2.bias torch.Size([1280])
317
+ encoder.modality_trunks.vision.blocks.25.attn.in_proj_weight torch.Size([3840, 1280])
318
+ encoder.modality_trunks.vision.blocks.25.attn.in_proj_bias torch.Size([3840])
319
+ encoder.modality_trunks.vision.blocks.25.attn.out_proj.weight torch.Size([1280, 1280])
320
+ encoder.modality_trunks.vision.blocks.25.attn.out_proj.bias torch.Size([1280])
321
+ encoder.modality_trunks.vision.blocks.25.norm_1.weight torch.Size([1280])
322
+ encoder.modality_trunks.vision.blocks.25.norm_1.bias torch.Size([1280])
323
+ encoder.modality_trunks.vision.blocks.25.mlp.fc1.weight torch.Size([5120, 1280])
324
+ encoder.modality_trunks.vision.blocks.25.mlp.fc1.bias torch.Size([5120])
325
+ encoder.modality_trunks.vision.blocks.25.mlp.fc2.weight torch.Size([1280, 5120])
326
+ encoder.modality_trunks.vision.blocks.25.mlp.fc2.bias torch.Size([1280])
327
+ encoder.modality_trunks.vision.blocks.25.norm_2.weight torch.Size([1280])
328
+ encoder.modality_trunks.vision.blocks.25.norm_2.bias torch.Size([1280])
329
+ encoder.modality_trunks.vision.blocks.26.attn.in_proj_weight torch.Size([3840, 1280])
330
+ encoder.modality_trunks.vision.blocks.26.attn.in_proj_bias torch.Size([3840])
331
+ encoder.modality_trunks.vision.blocks.26.attn.out_proj.weight torch.Size([1280, 1280])
332
+ encoder.modality_trunks.vision.blocks.26.attn.out_proj.bias torch.Size([1280])
333
+ encoder.modality_trunks.vision.blocks.26.norm_1.weight torch.Size([1280])
334
+ encoder.modality_trunks.vision.blocks.26.norm_1.bias torch.Size([1280])
335
+ encoder.modality_trunks.vision.blocks.26.mlp.fc1.weight torch.Size([5120, 1280])
336
+ encoder.modality_trunks.vision.blocks.26.mlp.fc1.bias torch.Size([5120])
337
+ encoder.modality_trunks.vision.blocks.26.mlp.fc2.weight torch.Size([1280, 5120])
338
+ encoder.modality_trunks.vision.blocks.26.mlp.fc2.bias torch.Size([1280])
339
+ encoder.modality_trunks.vision.blocks.26.norm_2.weight torch.Size([1280])
340
+ encoder.modality_trunks.vision.blocks.26.norm_2.bias torch.Size([1280])
341
+ encoder.modality_trunks.vision.blocks.27.attn.in_proj_weight torch.Size([3840, 1280])
342
+ encoder.modality_trunks.vision.blocks.27.attn.in_proj_bias torch.Size([3840])
343
+ encoder.modality_trunks.vision.blocks.27.attn.out_proj.weight torch.Size([1280, 1280])
344
+ encoder.modality_trunks.vision.blocks.27.attn.out_proj.bias torch.Size([1280])
345
+ encoder.modality_trunks.vision.blocks.27.norm_1.weight torch.Size([1280])
346
+ encoder.modality_trunks.vision.blocks.27.norm_1.bias torch.Size([1280])
347
+ encoder.modality_trunks.vision.blocks.27.mlp.fc1.weight torch.Size([5120, 1280])
348
+ encoder.modality_trunks.vision.blocks.27.mlp.fc1.bias torch.Size([5120])
349
+ encoder.modality_trunks.vision.blocks.27.mlp.fc2.weight torch.Size([1280, 5120])
350
+ encoder.modality_trunks.vision.blocks.27.mlp.fc2.bias torch.Size([1280])
351
+ encoder.modality_trunks.vision.blocks.27.norm_2.weight torch.Size([1280])
352
+ encoder.modality_trunks.vision.blocks.27.norm_2.bias torch.Size([1280])
353
+ encoder.modality_trunks.vision.blocks.28.attn.in_proj_weight torch.Size([3840, 1280])
354
+ encoder.modality_trunks.vision.blocks.28.attn.in_proj_bias torch.Size([3840])
355
+ encoder.modality_trunks.vision.blocks.28.attn.out_proj.weight torch.Size([1280, 1280])
356
+ encoder.modality_trunks.vision.blocks.28.attn.out_proj.bias torch.Size([1280])
357
+ encoder.modality_trunks.vision.blocks.28.norm_1.weight torch.Size([1280])
358
+ encoder.modality_trunks.vision.blocks.28.norm_1.bias torch.Size([1280])
359
+ encoder.modality_trunks.vision.blocks.28.mlp.fc1.weight torch.Size([5120, 1280])
360
+ encoder.modality_trunks.vision.blocks.28.mlp.fc1.bias torch.Size([5120])
361
+ encoder.modality_trunks.vision.blocks.28.mlp.fc2.weight torch.Size([1280, 5120])
362
+ encoder.modality_trunks.vision.blocks.28.mlp.fc2.bias torch.Size([1280])
363
+ encoder.modality_trunks.vision.blocks.28.norm_2.weight torch.Size([1280])
364
+ encoder.modality_trunks.vision.blocks.28.norm_2.bias torch.Size([1280])
365
+ encoder.modality_trunks.vision.blocks.29.attn.in_proj_weight torch.Size([3840, 1280])
366
+ encoder.modality_trunks.vision.blocks.29.attn.in_proj_bias torch.Size([3840])
367
+ encoder.modality_trunks.vision.blocks.29.attn.out_proj.weight torch.Size([1280, 1280])
368
+ encoder.modality_trunks.vision.blocks.29.attn.out_proj.bias torch.Size([1280])
369
+ encoder.modality_trunks.vision.blocks.29.norm_1.weight torch.Size([1280])
370
+ encoder.modality_trunks.vision.blocks.29.norm_1.bias torch.Size([1280])
371
+ encoder.modality_trunks.vision.blocks.29.mlp.fc1.weight torch.Size([5120, 1280])
372
+ encoder.modality_trunks.vision.blocks.29.mlp.fc1.bias torch.Size([5120])
373
+ encoder.modality_trunks.vision.blocks.29.mlp.fc2.weight torch.Size([1280, 5120])
374
+ encoder.modality_trunks.vision.blocks.29.mlp.fc2.bias torch.Size([1280])
375
+ encoder.modality_trunks.vision.blocks.29.norm_2.weight torch.Size([1280])
376
+ encoder.modality_trunks.vision.blocks.29.norm_2.bias torch.Size([1280])
377
+ encoder.modality_trunks.vision.blocks.30.attn.in_proj_weight torch.Size([3840, 1280])
378
+ encoder.modality_trunks.vision.blocks.30.attn.in_proj_bias torch.Size([3840])
379
+ encoder.modality_trunks.vision.blocks.30.attn.out_proj.weight torch.Size([1280, 1280])
380
+ encoder.modality_trunks.vision.blocks.30.attn.out_proj.bias torch.Size([1280])
381
+ encoder.modality_trunks.vision.blocks.30.norm_1.weight torch.Size([1280])
382
+ encoder.modality_trunks.vision.blocks.30.norm_1.bias torch.Size([1280])
383
+ encoder.modality_trunks.vision.blocks.30.mlp.fc1.weight torch.Size([5120, 1280])
384
+ encoder.modality_trunks.vision.blocks.30.mlp.fc1.bias torch.Size([5120])
385
+ encoder.modality_trunks.vision.blocks.30.mlp.fc2.weight torch.Size([1280, 5120])
386
+ encoder.modality_trunks.vision.blocks.30.mlp.fc2.bias torch.Size([1280])
387
+ encoder.modality_trunks.vision.blocks.30.norm_2.weight torch.Size([1280])
388
+ encoder.modality_trunks.vision.blocks.30.norm_2.bias torch.Size([1280])
389
+ encoder.modality_trunks.vision.blocks.31.attn.in_proj_weight torch.Size([3840, 1280])
390
+ encoder.modality_trunks.vision.blocks.31.attn.in_proj_bias torch.Size([3840])
391
+ encoder.modality_trunks.vision.blocks.31.attn.out_proj.weight torch.Size([1280, 1280])
392
+ encoder.modality_trunks.vision.blocks.31.attn.out_proj.bias torch.Size([1280])
393
+ encoder.modality_trunks.vision.blocks.31.norm_1.weight torch.Size([1280])
394
+ encoder.modality_trunks.vision.blocks.31.norm_1.bias torch.Size([1280])
395
+ encoder.modality_trunks.vision.blocks.31.mlp.fc1.weight torch.Size([5120, 1280])
396
+ encoder.modality_trunks.vision.blocks.31.mlp.fc1.bias torch.Size([5120])
397
+ encoder.modality_trunks.vision.blocks.31.mlp.fc2.weight torch.Size([1280, 5120])
398
+ encoder.modality_trunks.vision.blocks.31.mlp.fc2.bias torch.Size([1280])
399
+ encoder.modality_trunks.vision.blocks.31.norm_2.weight torch.Size([1280])
400
+ encoder.modality_trunks.vision.blocks.31.norm_2.bias torch.Size([1280])
401
+ encoder.modality_trunks.audio.blocks.0.attn.in_proj_weight torch.Size([2304, 768])
402
+ encoder.modality_trunks.audio.blocks.0.attn.in_proj_bias torch.Size([2304])
403
+ encoder.modality_trunks.audio.blocks.0.attn.bias_k torch.Size([1, 1, 768])
404
+ encoder.modality_trunks.audio.blocks.0.attn.bias_v torch.Size([1, 1, 768])
405
+ encoder.modality_trunks.audio.blocks.0.attn.out_proj.weight torch.Size([768, 768])
406
+ encoder.modality_trunks.audio.blocks.0.attn.out_proj.bias torch.Size([768])
407
+ encoder.modality_trunks.audio.blocks.0.norm_1.weight torch.Size([768])
408
+ encoder.modality_trunks.audio.blocks.0.norm_1.bias torch.Size([768])
409
+ encoder.modality_trunks.audio.blocks.0.mlp.fc1.weight torch.Size([3072, 768])
410
+ encoder.modality_trunks.audio.blocks.0.mlp.fc1.bias torch.Size([3072])
411
+ encoder.modality_trunks.audio.blocks.0.mlp.fc2.weight torch.Size([768, 3072])
412
+ encoder.modality_trunks.audio.blocks.0.mlp.fc2.bias torch.Size([768])
413
+ encoder.modality_trunks.audio.blocks.0.norm_2.weight torch.Size([768])
414
+ encoder.modality_trunks.audio.blocks.0.norm_2.bias torch.Size([768])
415
+ encoder.modality_trunks.audio.blocks.1.attn.in_proj_weight torch.Size([2304, 768])
416
+ encoder.modality_trunks.audio.blocks.1.attn.in_proj_bias torch.Size([2304])
417
+ encoder.modality_trunks.audio.blocks.1.attn.bias_k torch.Size([1, 1, 768])
418
+ encoder.modality_trunks.audio.blocks.1.attn.bias_v torch.Size([1, 1, 768])
419
+ encoder.modality_trunks.audio.blocks.1.attn.out_proj.weight torch.Size([768, 768])
420
+ encoder.modality_trunks.audio.blocks.1.attn.out_proj.bias torch.Size([768])
421
+ encoder.modality_trunks.audio.blocks.1.norm_1.weight torch.Size([768])
422
+ encoder.modality_trunks.audio.blocks.1.norm_1.bias torch.Size([768])
423
+ encoder.modality_trunks.audio.blocks.1.mlp.fc1.weight torch.Size([3072, 768])
424
+ encoder.modality_trunks.audio.blocks.1.mlp.fc1.bias torch.Size([3072])
425
+ encoder.modality_trunks.audio.blocks.1.mlp.fc2.weight torch.Size([768, 3072])
426
+ encoder.modality_trunks.audio.blocks.1.mlp.fc2.bias torch.Size([768])
427
+ encoder.modality_trunks.audio.blocks.1.norm_2.weight torch.Size([768])
428
+ encoder.modality_trunks.audio.blocks.1.norm_2.bias torch.Size([768])
429
+ encoder.modality_trunks.audio.blocks.2.attn.in_proj_weight torch.Size([2304, 768])
430
+ encoder.modality_trunks.audio.blocks.2.attn.in_proj_bias torch.Size([2304])
431
+ encoder.modality_trunks.audio.blocks.2.attn.bias_k torch.Size([1, 1, 768])
432
+ encoder.modality_trunks.audio.blocks.2.attn.bias_v torch.Size([1, 1, 768])
433
+ encoder.modality_trunks.audio.blocks.2.attn.out_proj.weight torch.Size([768, 768])
434
+ encoder.modality_trunks.audio.blocks.2.attn.out_proj.bias torch.Size([768])
435
+ encoder.modality_trunks.audio.blocks.2.norm_1.weight torch.Size([768])
436
+ encoder.modality_trunks.audio.blocks.2.norm_1.bias torch.Size([768])
437
+ encoder.modality_trunks.audio.blocks.2.mlp.fc1.weight torch.Size([3072, 768])
438
+ encoder.modality_trunks.audio.blocks.2.mlp.fc1.bias torch.Size([3072])
439
+ encoder.modality_trunks.audio.blocks.2.mlp.fc2.weight torch.Size([768, 3072])
440
+ encoder.modality_trunks.audio.blocks.2.mlp.fc2.bias torch.Size([768])
441
+ encoder.modality_trunks.audio.blocks.2.norm_2.weight torch.Size([768])
442
+ encoder.modality_trunks.audio.blocks.2.norm_2.bias torch.Size([768])
443
+ encoder.modality_trunks.audio.blocks.3.attn.in_proj_weight torch.Size([2304, 768])
444
+ encoder.modality_trunks.audio.blocks.3.attn.in_proj_bias torch.Size([2304])
445
+ encoder.modality_trunks.audio.blocks.3.attn.bias_k torch.Size([1, 1, 768])
446
+ encoder.modality_trunks.audio.blocks.3.attn.bias_v torch.Size([1, 1, 768])
447
+ encoder.modality_trunks.audio.blocks.3.attn.out_proj.weight torch.Size([768, 768])
448
+ encoder.modality_trunks.audio.blocks.3.attn.out_proj.bias torch.Size([768])
449
+ encoder.modality_trunks.audio.blocks.3.norm_1.weight torch.Size([768])
450
+ encoder.modality_trunks.audio.blocks.3.norm_1.bias torch.Size([768])
451
+ encoder.modality_trunks.audio.blocks.3.mlp.fc1.weight torch.Size([3072, 768])
452
+ encoder.modality_trunks.audio.blocks.3.mlp.fc1.bias torch.Size([3072])
453
+ encoder.modality_trunks.audio.blocks.3.mlp.fc2.weight torch.Size([768, 3072])
454
+ encoder.modality_trunks.audio.blocks.3.mlp.fc2.bias torch.Size([768])
455
+ encoder.modality_trunks.audio.blocks.3.norm_2.weight torch.Size([768])
456
+ encoder.modality_trunks.audio.blocks.3.norm_2.bias torch.Size([768])
457
+ encoder.modality_trunks.audio.blocks.4.attn.in_proj_weight torch.Size([2304, 768])
458
+ encoder.modality_trunks.audio.blocks.4.attn.in_proj_bias torch.Size([2304])
459
+ encoder.modality_trunks.audio.blocks.4.attn.bias_k torch.Size([1, 1, 768])
460
+ encoder.modality_trunks.audio.blocks.4.attn.bias_v torch.Size([1, 1, 768])
461
+ encoder.modality_trunks.audio.blocks.4.attn.out_proj.weight torch.Size([768, 768])
462
+ encoder.modality_trunks.audio.blocks.4.attn.out_proj.bias torch.Size([768])
463
+ encoder.modality_trunks.audio.blocks.4.norm_1.weight torch.Size([768])
464
+ encoder.modality_trunks.audio.blocks.4.norm_1.bias torch.Size([768])
465
+ encoder.modality_trunks.audio.blocks.4.mlp.fc1.weight torch.Size([3072, 768])
466
+ encoder.modality_trunks.audio.blocks.4.mlp.fc1.bias torch.Size([3072])
467
+ encoder.modality_trunks.audio.blocks.4.mlp.fc2.weight torch.Size([768, 3072])
468
+ encoder.modality_trunks.audio.blocks.4.mlp.fc2.bias torch.Size([768])
469
+ encoder.modality_trunks.audio.blocks.4.norm_2.weight torch.Size([768])
470
+ encoder.modality_trunks.audio.blocks.4.norm_2.bias torch.Size([768])
471
+ encoder.modality_trunks.audio.blocks.5.attn.in_proj_weight torch.Size([2304, 768])
472
+ encoder.modality_trunks.audio.blocks.5.attn.in_proj_bias torch.Size([2304])
473
+ encoder.modality_trunks.audio.blocks.5.attn.bias_k torch.Size([1, 1, 768])
474
+ encoder.modality_trunks.audio.blocks.5.attn.bias_v torch.Size([1, 1, 768])
475
+ encoder.modality_trunks.audio.blocks.5.attn.out_proj.weight torch.Size([768, 768])
476
+ encoder.modality_trunks.audio.blocks.5.attn.out_proj.bias torch.Size([768])
477
+ encoder.modality_trunks.audio.blocks.5.norm_1.weight torch.Size([768])
478
+ encoder.modality_trunks.audio.blocks.5.norm_1.bias torch.Size([768])
479
+ encoder.modality_trunks.audio.blocks.5.mlp.fc1.weight torch.Size([3072, 768])
480
+ encoder.modality_trunks.audio.blocks.5.mlp.fc1.bias torch.Size([3072])
481
+ encoder.modality_trunks.audio.blocks.5.mlp.fc2.weight torch.Size([768, 3072])
482
+ encoder.modality_trunks.audio.blocks.5.mlp.fc2.bias torch.Size([768])
483
+ encoder.modality_trunks.audio.blocks.5.norm_2.weight torch.Size([768])
484
+ encoder.modality_trunks.audio.blocks.5.norm_2.bias torch.Size([768])
485
+ encoder.modality_trunks.audio.blocks.6.attn.in_proj_weight torch.Size([2304, 768])
486
+ encoder.modality_trunks.audio.blocks.6.attn.in_proj_bias torch.Size([2304])
487
+ encoder.modality_trunks.audio.blocks.6.attn.bias_k torch.Size([1, 1, 768])
488
+ encoder.modality_trunks.audio.blocks.6.attn.bias_v torch.Size([1, 1, 768])
489
+ encoder.modality_trunks.audio.blocks.6.attn.out_proj.weight torch.Size([768, 768])
490
+ encoder.modality_trunks.audio.blocks.6.attn.out_proj.bias torch.Size([768])
491
+ encoder.modality_trunks.audio.blocks.6.norm_1.weight torch.Size([768])
492
+ encoder.modality_trunks.audio.blocks.6.norm_1.bias torch.Size([768])
493
+ encoder.modality_trunks.audio.blocks.6.mlp.fc1.weight torch.Size([3072, 768])
494
+ encoder.modality_trunks.audio.blocks.6.mlp.fc1.bias torch.Size([3072])
495
+ encoder.modality_trunks.audio.blocks.6.mlp.fc2.weight torch.Size([768, 3072])
496
+ encoder.modality_trunks.audio.blocks.6.mlp.fc2.bias torch.Size([768])
497
+ encoder.modality_trunks.audio.blocks.6.norm_2.weight torch.Size([768])
498
+ encoder.modality_trunks.audio.blocks.6.norm_2.bias torch.Size([768])
499
+ encoder.modality_trunks.audio.blocks.7.attn.in_proj_weight torch.Size([2304, 768])
500
+ encoder.modality_trunks.audio.blocks.7.attn.in_proj_bias torch.Size([2304])
501
+ encoder.modality_trunks.audio.blocks.7.attn.bias_k torch.Size([1, 1, 768])
502
+ encoder.modality_trunks.audio.blocks.7.attn.bias_v torch.Size([1, 1, 768])
503
+ encoder.modality_trunks.audio.blocks.7.attn.out_proj.weight torch.Size([768, 768])
504
+ encoder.modality_trunks.audio.blocks.7.attn.out_proj.bias torch.Size([768])
505
+ encoder.modality_trunks.audio.blocks.7.norm_1.weight torch.Size([768])
506
+ encoder.modality_trunks.audio.blocks.7.norm_1.bias torch.Size([768])
507
+ encoder.modality_trunks.audio.blocks.7.mlp.fc1.weight torch.Size([3072, 768])
508
+ encoder.modality_trunks.audio.blocks.7.mlp.fc1.bias torch.Size([3072])
509
+ encoder.modality_trunks.audio.blocks.7.mlp.fc2.weight torch.Size([768, 3072])
510
+ encoder.modality_trunks.audio.blocks.7.mlp.fc2.bias torch.Size([768])
511
+ encoder.modality_trunks.audio.blocks.7.norm_2.weight torch.Size([768])
512
+ encoder.modality_trunks.audio.blocks.7.norm_2.bias torch.Size([768])
513
+ encoder.modality_trunks.audio.blocks.8.attn.in_proj_weight torch.Size([2304, 768])
514
+ encoder.modality_trunks.audio.blocks.8.attn.in_proj_bias torch.Size([2304])
515
+ encoder.modality_trunks.audio.blocks.8.attn.bias_k torch.Size([1, 1, 768])
516
+ encoder.modality_trunks.audio.blocks.8.attn.bias_v torch.Size([1, 1, 768])
517
+ encoder.modality_trunks.audio.blocks.8.attn.out_proj.weight torch.Size([768, 768])
518
+ encoder.modality_trunks.audio.blocks.8.attn.out_proj.bias torch.Size([768])
519
+ encoder.modality_trunks.audio.blocks.8.norm_1.weight torch.Size([768])
520
+ encoder.modality_trunks.audio.blocks.8.norm_1.bias torch.Size([768])
521
+ encoder.modality_trunks.audio.blocks.8.mlp.fc1.weight torch.Size([3072, 768])
522
+ encoder.modality_trunks.audio.blocks.8.mlp.fc1.bias torch.Size([3072])
523
+ encoder.modality_trunks.audio.blocks.8.mlp.fc2.weight torch.Size([768, 3072])
524
+ encoder.modality_trunks.audio.blocks.8.mlp.fc2.bias torch.Size([768])
525
+ encoder.modality_trunks.audio.blocks.8.norm_2.weight torch.Size([768])
526
+ encoder.modality_trunks.audio.blocks.8.norm_2.bias torch.Size([768])
527
+ encoder.modality_trunks.audio.blocks.9.attn.in_proj_weight torch.Size([2304, 768])
528
+ encoder.modality_trunks.audio.blocks.9.attn.in_proj_bias torch.Size([2304])
529
+ encoder.modality_trunks.audio.blocks.9.attn.bias_k torch.Size([1, 1, 768])
530
+ encoder.modality_trunks.audio.blocks.9.attn.bias_v torch.Size([1, 1, 768])
531
+ encoder.modality_trunks.audio.blocks.9.attn.out_proj.weight torch.Size([768, 768])
532
+ encoder.modality_trunks.audio.blocks.9.attn.out_proj.bias torch.Size([768])
533
+ encoder.modality_trunks.audio.blocks.9.norm_1.weight torch.Size([768])
534
+ encoder.modality_trunks.audio.blocks.9.norm_1.bias torch.Size([768])
535
+ encoder.modality_trunks.audio.blocks.9.mlp.fc1.weight torch.Size([3072, 768])
536
+ encoder.modality_trunks.audio.blocks.9.mlp.fc1.bias torch.Size([3072])
537
+ encoder.modality_trunks.audio.blocks.9.mlp.fc2.weight torch.Size([768, 3072])
538
+ encoder.modality_trunks.audio.blocks.9.mlp.fc2.bias torch.Size([768])
539
+ encoder.modality_trunks.audio.blocks.9.norm_2.weight torch.Size([768])
540
+ encoder.modality_trunks.audio.blocks.9.norm_2.bias torch.Size([768])
541
+ encoder.modality_trunks.audio.blocks.10.attn.in_proj_weight torch.Size([2304, 768])
542
+ encoder.modality_trunks.audio.blocks.10.attn.in_proj_bias torch.Size([2304])
543
+ encoder.modality_trunks.audio.blocks.10.attn.bias_k torch.Size([1, 1, 768])
544
+ encoder.modality_trunks.audio.blocks.10.attn.bias_v torch.Size([1, 1, 768])
545
+ encoder.modality_trunks.audio.blocks.10.attn.out_proj.weight torch.Size([768, 768])
546
+ encoder.modality_trunks.audio.blocks.10.attn.out_proj.bias torch.Size([768])
547
+ encoder.modality_trunks.audio.blocks.10.norm_1.weight torch.Size([768])
548
+ encoder.modality_trunks.audio.blocks.10.norm_1.bias torch.Size([768])
549
+ encoder.modality_trunks.audio.blocks.10.mlp.fc1.weight torch.Size([3072, 768])
550
+ encoder.modality_trunks.audio.blocks.10.mlp.fc1.bias torch.Size([3072])
551
+ encoder.modality_trunks.audio.blocks.10.mlp.fc2.weight torch.Size([768, 3072])
552
+ encoder.modality_trunks.audio.blocks.10.mlp.fc2.bias torch.Size([768])
553
+ encoder.modality_trunks.audio.blocks.10.norm_2.weight torch.Size([768])
554
+ encoder.modality_trunks.audio.blocks.10.norm_2.bias torch.Size([768])
555
+ encoder.modality_trunks.audio.blocks.11.attn.in_proj_weight torch.Size([2304, 768])
556
+ encoder.modality_trunks.audio.blocks.11.attn.in_proj_bias torch.Size([2304])
557
+ encoder.modality_trunks.audio.blocks.11.attn.bias_k torch.Size([1, 1, 768])
558
+ encoder.modality_trunks.audio.blocks.11.attn.bias_v torch.Size([1, 1, 768])
559
+ encoder.modality_trunks.audio.blocks.11.attn.out_proj.weight torch.Size([768, 768])
560
+ encoder.modality_trunks.audio.blocks.11.attn.out_proj.bias torch.Size([768])
561
+ encoder.modality_trunks.audio.blocks.11.norm_1.weight torch.Size([768])
562
+ encoder.modality_trunks.audio.blocks.11.norm_1.bias torch.Size([768])
563
+ encoder.modality_trunks.audio.blocks.11.mlp.fc1.weight torch.Size([3072, 768])
564
+ encoder.modality_trunks.audio.blocks.11.mlp.fc1.bias torch.Size([3072])
565
+ encoder.modality_trunks.audio.blocks.11.mlp.fc2.weight torch.Size([768, 3072])
566
+ encoder.modality_trunks.audio.blocks.11.mlp.fc2.bias torch.Size([768])
567
+ encoder.modality_trunks.audio.blocks.11.norm_2.weight torch.Size([768])
568
+ encoder.modality_trunks.audio.blocks.11.norm_2.bias torch.Size([768])
569
+ encoder.modality_heads.vision.0.weight torch.Size([1280])
570
+ encoder.modality_heads.vision.0.bias torch.Size([1280])
571
+ encoder.modality_heads.vision.2.weight torch.Size([1024, 1280])
572
  encoder.modality_heads.audio.0.weight torch.Size([768])
573
+ encoder.modality_heads.audio.0.bias torch.Size([768])
574
  encoder.modality_heads.audio.2.weight torch.Size([1024, 768])
575
+ encoder.modality_postprocessors.audio.1.log_logit_scale torch.Size([])
 
 
 
576
  reasoner.model.embed_tokens.weight torch.Size([151936, 896])
577
  reasoner.model.layers.0.self_attn.q_proj.weight torch.Size([896, 896])
578
  reasoner.model.layers.0.self_attn.q_proj.bias torch.Size([896])