diff --git "a/nncf_output.log" "b/nncf_output.log" --- "a/nncf_output.log" +++ "b/nncf_output.log" @@ -1549,3 +1549,5856 @@ Epoch 0 |+==============+=====================+====================+============ Epoch 0 || 8 | 100.00 % (77 / 77) | 100.00 % (101 / | 100.00 % (178 / | Epoch 0 || | | 101) | 178) | Epoch 0 |+--------------+---------------------+--------------------+--------------------+ +INFO:nncf:Statistics of the sparsified model: +Epoch 1 |+-----------------------------------------+-------+ +Epoch 1 || Statistic's name | Value | +Epoch 1 |+=========================================+=======+ +Epoch 1 || Sparsity level of the whole model | 0 | +Epoch 1 |+-----------------------------------------+-------+ +Epoch 1 || Sparsity level of all sparsified layers | 0 | +Epoch 1 |+-----------------------------------------+-------+ +Epoch 1 | +Epoch 1 |Statistics by sparsified layers: +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 1 |+======================+================+================+=====================+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[0]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[0]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[0]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[0]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[0]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[0]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[0]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[0]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072, 768] | 0 | 2.775 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[0]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072] | 0 | 0.004 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[0]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 3072] | 0 | 2.775 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[0]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[0]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[1]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[1]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[1]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[1]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[1]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[1]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[1]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[1]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072, 768] | 0 | 2.775 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[1]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072] | 0 | 0.004 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[1]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 3072] | 0 | 2.775 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[1]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[1]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[2]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[2]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[2]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[2]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[2]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[2]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[2]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[2]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072, 768] | 0 | 2.775 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[2]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072] | 0 | 0.004 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[2]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 3072] | 0 | 2.775 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[2]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[2]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[3]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[3]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[3]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[3]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[3]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[3]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[3]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[3]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072, 768] | 0 | 2.775 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[3]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072] | 0 | 0.004 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[3]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 3072] | 0 | 2.775 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[3]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[3]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[4]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[4]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[4]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[4]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[4]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[4]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[4]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[4]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072, 768] | 0 | 2.775 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[4]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072] | 0 | 0.004 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[4]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 3072] | 0 | 2.775 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[4]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[4]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[5]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[5]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[5]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[5]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[5]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[5]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[5]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[5]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072, 768] | 0 | 2.775 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[5]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072] | 0 | 0.004 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[5]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 3072] | 0 | 2.775 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[5]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[5]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[6]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[6]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[6]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[6]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[6]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[6]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[6]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[6]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072, 768] | 0 | 2.775 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[6]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072] | 0 | 0.004 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[6]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 3072] | 0 | 2.775 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[6]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[6]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[7]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[7]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[7]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[7]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[7]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[7]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[7]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[7]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072, 768] | 0 | 2.775 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[7]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072] | 0 | 0.004 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[7]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 3072] | 0 | 2.775 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[7]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[7]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[8]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[8]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[8]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[8]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[8]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[8]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[8]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[8]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072, 768] | 0 | 2.775 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[8]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072] | 0 | 0.004 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[8]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 3072] | 0 | 2.775 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[8]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[8]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[9]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[9]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[9]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[9]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[9]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[9]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[9]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[9]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072, 768] | 0 | 2.775 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[9]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072] | 0 | 0.004 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[9]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 3072] | 0 | 2.775 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[9]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[9]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[10]/B | | | | +Epoch 1 || ertAttention[attenti | | | | +Epoch 1 || on]/BertSelfAttentio | | | | +Epoch 1 || n[self]/NNCFLinear[q | | | | +Epoch 1 || uery]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[10]/B | | | | +Epoch 1 || ertAttention[attenti | | | | +Epoch 1 || on]/BertSelfAttentio | | | | +Epoch 1 || n[self]/NNCFLinear[q | | | | +Epoch 1 || uery]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[10]/B | | | | +Epoch 1 || ertAttention[attenti | | | | +Epoch 1 || on]/BertSelfAttentio | | | | +Epoch 1 || n[self]/NNCFLinear[k | | | | +Epoch 1 || ey]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[10]/B | | | | +Epoch 1 || ertAttention[attenti | | | | +Epoch 1 || on]/BertSelfAttentio | | | | +Epoch 1 || n[self]/NNCFLinear[k | | | | +Epoch 1 || ey]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[10]/B | | | | +Epoch 1 || ertAttention[attenti | | | | +Epoch 1 || on]/BertSelfAttentio | | | | +Epoch 1 || n[self]/NNCFLinear[v | | | | +Epoch 1 || alue]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[10]/B | | | | +Epoch 1 || ertAttention[attenti | | | | +Epoch 1 || on]/BertSelfAttentio | | | | +Epoch 1 || n[self]/NNCFLinear[v | | | | +Epoch 1 || alue]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[10]/B | | | | +Epoch 1 || ertAttention[attenti | | | | +Epoch 1 || on]/BertSelfOutput[o | | | | +Epoch 1 || utput]/NNCFLinear[de | | | | +Epoch 1 || nse]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[10]/B | | | | +Epoch 1 || ertAttention[attenti | | | | +Epoch 1 || on]/BertSelfOutput[o | | | | +Epoch 1 || utput]/NNCFLinear[de | | | | +Epoch 1 || nse]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072, 768] | 0 | 2.775 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[10]/B | | | | +Epoch 1 || ertIntermediate[inte | | | | +Epoch 1 || rmediate]/NNCFLinear | | | | +Epoch 1 || [dense]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072] | 0 | 0.004 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[10]/B | | | | +Epoch 1 || ertIntermediate[inte | | | | +Epoch 1 || rmediate]/NNCFLinear | | | | +Epoch 1 || [dense]/linear_0/bia | | | | +Epoch 1 || s | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 3072] | 0 | 2.775 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[10]/B | | | | +Epoch 1 || ertOutput[output]/NN | | | | +Epoch 1 || CFLinear[dense]/line | | | | +Epoch 1 || ar_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[10]/B | | | | +Epoch 1 || ertOutput[output]/NN | | | | +Epoch 1 || CFLinear[dense]/line | | | | +Epoch 1 || ar_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[11]/B | | | | +Epoch 1 || ertAttention[attenti | | | | +Epoch 1 || on]/BertSelfAttentio | | | | +Epoch 1 || n[self]/NNCFLinear[q | | | | +Epoch 1 || uery]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[11]/B | | | | +Epoch 1 || ertAttention[attenti | | | | +Epoch 1 || on]/BertSelfAttentio | | | | +Epoch 1 || n[self]/NNCFLinear[q | | | | +Epoch 1 || uery]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[11]/B | | | | +Epoch 1 || ertAttention[attenti | | | | +Epoch 1 || on]/BertSelfAttentio | | | | +Epoch 1 || n[self]/NNCFLinear[k | | | | +Epoch 1 || ey]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[11]/B | | | | +Epoch 1 || ertAttention[attenti | | | | +Epoch 1 || on]/BertSelfAttentio | | | | +Epoch 1 || n[self]/NNCFLinear[k | | | | +Epoch 1 || ey]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[11]/B | | | | +Epoch 1 || ertAttention[attenti | | | | +Epoch 1 || on]/BertSelfAttentio | | | | +Epoch 1 || n[self]/NNCFLinear[v | | | | +Epoch 1 || alue]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[11]/B | | | | +Epoch 1 || ertAttention[attenti | | | | +Epoch 1 || on]/BertSelfAttentio | | | | +Epoch 1 || n[self]/NNCFLinear[v | | | | +Epoch 1 || alue]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[11]/B | | | | +Epoch 1 || ertAttention[attenti | | | | +Epoch 1 || on]/BertSelfOutput[o | | | | +Epoch 1 || utput]/NNCFLinear[de | | | | +Epoch 1 || nse]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[11]/B | | | | +Epoch 1 || ertAttention[attenti | | | | +Epoch 1 || on]/BertSelfOutput[o | | | | +Epoch 1 || utput]/NNCFLinear[de | | | | +Epoch 1 || nse]/linear_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072, 768] | 0 | 2.775 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[11]/B | | | | +Epoch 1 || ertIntermediate[inte | | | | +Epoch 1 || rmediate]/NNCFLinear | | | | +Epoch 1 || [dense]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072] | 0 | 0.004 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[11]/B | | | | +Epoch 1 || ertIntermediate[inte | | | | +Epoch 1 || rmediate]/NNCFLinear | | | | +Epoch 1 || [dense]/linear_0/bia | | | | +Epoch 1 || s | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 3072] | 0 | 2.775 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[11]/B | | | | +Epoch 1 || ertOutput[output]/NN | | | | +Epoch 1 || CFLinear[dense]/line | | | | +Epoch 1 || ar_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[11]/B | | | | +Epoch 1 || ertOutput[output]/NN | | | | +Epoch 1 || CFLinear[dense]/line | | | | +Epoch 1 || ar_0/bias | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 | +Epoch 1 |Statistics of the movement-sparsity algorithm: +Epoch 1 |+----------------------------------+-------+ +Epoch 1 || Statistic's name | Value | +Epoch 1 |+==================================+=======+ +Epoch 1 || Mask Importance Threshold | -inf | +Epoch 1 |+----------------------------------+-------+ +Epoch 1 || Importance Regularization Factor | 0 | +Epoch 1 |+----------------------------------+-------+ +Epoch 1 | +Epoch 1 |Statistics of the quantization algorithm: +Epoch 1 |+--------------------------------+-------+ +Epoch 1 || Statistic's name | Value | +Epoch 1 |+================================+=======+ +Epoch 1 || Ratio of enabled quantizations | 100 | +Epoch 1 |+--------------------------------+-------+ +Epoch 1 | +Epoch 1 |Statistics of the quantization share: +Epoch 1 |+----------------------------------+----------------------+ +Epoch 1 || Statistic's name | Value | +Epoch 1 |+==================================+======================+ +Epoch 1 || Symmetric WQs / All placed WQs | 100.00 % (77 / 77) | +Epoch 1 |+----------------------------------+----------------------+ +Epoch 1 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 77) | +Epoch 1 |+----------------------------------+----------------------+ +Epoch 1 || Signed WQs / All placed WQs | 100.00 % (77 / 77) | +Epoch 1 |+----------------------------------+----------------------+ +Epoch 1 || Unsigned WQs / All placed WQs | 0.00 % (0 / 77) | +Epoch 1 |+----------------------------------+----------------------+ +Epoch 1 || Per-tensor WQs / All placed WQs | 3.90 % (3 / 77) | +Epoch 1 |+----------------------------------+----------------------+ +Epoch 1 || Per-channel WQs / All placed WQs | 96.10 % (74 / 77) | +Epoch 1 |+----------------------------------+----------------------+ +Epoch 1 || Placed WQs / Potential WQs | 75.49 % (77 / 102) | +Epoch 1 |+----------------------------------+----------------------+ +Epoch 1 || Symmetric AQs / All placed AQs | 23.76 % (24 / 101) | +Epoch 1 |+----------------------------------+----------------------+ +Epoch 1 || Asymmetric AQs / All placed AQs | 76.24 % (77 / 101) | +Epoch 1 |+----------------------------------+----------------------+ +Epoch 1 || Signed AQs / All placed AQs | 100.00 % (101 / 101) | +Epoch 1 |+----------------------------------+----------------------+ +Epoch 1 || Unsigned AQs / All placed AQs | 0.00 % (0 / 101) | +Epoch 1 |+----------------------------------+----------------------+ +Epoch 1 || Per-tensor AQs / All placed AQs | 100.00 % (101 / 101) | +Epoch 1 |+----------------------------------+----------------------+ +Epoch 1 || Per-channel AQs / All placed AQs | 0.00 % (0 / 101) | +Epoch 1 |+----------------------------------+----------------------+ +Epoch 1 | +Epoch 1 |Statistics of the bitwidth distribution: +Epoch 1 |+--------------+---------------------+--------------------+--------------------+ +Epoch 1 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 1 || | WQs | Placed AQs | Qs | +Epoch 1 |+==============+=====================+====================+====================+ +Epoch 1 || 8 | 100.00 % (77 / 77) | 100.00 % (101 / | 100.00 % (178 / | +Epoch 1 || | | 101) | 178) | +Epoch 1 |+--------------+---------------------+--------------------+--------------------+ +INFO:nncf:Movement sparsity automatically calculates `init_importance_threshold` as -0.002674092771485448 so that warmup starts from ~0.1% linear layer sparsity. +INFO:nncf:Statistics of the sparsified model: +Epoch 2 |+-----------------------------------------+-------+ +Epoch 2 || Statistic's name | Value | +Epoch 2 |+=========================================+=======+ +Epoch 2 || Sparsity level of the whole model | 0.649 | +Epoch 2 |+-----------------------------------------+-------+ +Epoch 2 || Sparsity level of all sparsified layers | 0.836 | +Epoch 2 |+-----------------------------------------+-------+ +Epoch 2 | +Epoch 2 |Statistics by sparsified layers: +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 2 |+======================+================+================+=====================+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.981 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[0]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 0.958 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[0]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.981 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[0]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[0]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.981 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[0]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[0]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.981 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[0]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 0.875 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[0]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072, 768] | 0.679 | 2.775 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[0]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072] | 0.454 | 0.004 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[0]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 3072] | 0.723 | 2.775 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[0]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[0]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.892 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[1]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[1]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.896 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[1]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[1]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.835 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[1]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[1]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.842 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[1]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 0.833 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[1]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072, 768] | 0.667 | 2.775 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[1]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072] | 0.467 | 0.004 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[1]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 3072] | 0.717 | 2.775 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[1]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[1]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.957 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[2]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[2]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.944 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[2]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[2]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.868 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[2]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[2]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.873 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[2]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 0.417 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[2]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072, 768] | 0.631 | 2.775 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[2]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072] | 0.464 | 0.004 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[2]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 3072] | 0.677 | 2.775 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[2]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[2]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.981 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[3]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[3]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.981 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[3]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[3]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.981 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[3]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[3]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.981 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[3]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 0.708 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[3]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072, 768] | 0.646 | 2.775 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[3]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072] | 0.470 | 0.004 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[3]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 3072] | 0.693 | 2.775 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[3]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[3]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.981 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[4]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 0.958 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[4]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.981 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[4]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[4]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.974 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[4]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[4]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.981 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[4]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 0.917 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[4]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072, 768] | 0.653 | 2.775 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[4]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072] | 0.459 | 0.004 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[4]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 3072] | 0.715 | 2.775 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[4]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[4]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.951 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[5]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[5]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.953 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[5]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[5]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.941 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[5]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[5]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.938 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[5]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 0.917 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[5]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072, 768] | 0.680 | 2.775 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[5]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072] | 0.470 | 0.004 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[5]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 3072] | 0.746 | 2.775 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[5]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[5]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.950 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[6]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[6]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.946 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[6]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[6]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.924 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[6]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[6]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.925 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[6]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 0.875 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[6]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072, 768] | 0.691 | 2.775 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[6]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072] | 0.467 | 0.004 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[6]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 3072] | 0.756 | 2.775 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[6]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[6]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.903 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[7]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[7]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.894 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[7]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[7]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.898 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[7]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[7]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.891 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[7]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 0.917 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[7]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072, 768] | 0.749 | 2.775 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[7]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072] | 0.454 | 0.004 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[7]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 3072] | 0.788 | 2.775 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[7]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[7]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.957 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[8]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 0.958 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[8]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.944 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[8]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[8]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.934 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[8]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[8]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.931 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[8]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 0.958 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[8]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072, 768] | 0.883 | 2.775 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[8]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072] | 0.461 | 0.004 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[8]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 3072] | 0.884 | 2.775 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[8]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[8]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.958 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[9]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 0.917 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[9]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.924 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[9]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[9]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.908 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[9]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[9]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.927 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[9]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 0.958 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[9]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072, 768] | 0.955 | 2.775 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[9]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072] | 0.482 | 0.004 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[9]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 3072] | 0.954 | 2.775 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[9]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[9]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.981 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[10]/B | | | | +Epoch 2 || ertAttention[attenti | | | | +Epoch 2 || on]/BertSelfAttentio | | | | +Epoch 2 || n[self]/NNCFLinear[q | | | | +Epoch 2 || uery]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[10]/B | | | | +Epoch 2 || ertAttention[attenti | | | | +Epoch 2 || on]/BertSelfAttentio | | | | +Epoch 2 || n[self]/NNCFLinear[q | | | | +Epoch 2 || uery]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.981 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[10]/B | | | | +Epoch 2 || ertAttention[attenti | | | | +Epoch 2 || on]/BertSelfAttentio | | | | +Epoch 2 || n[self]/NNCFLinear[k | | | | +Epoch 2 || ey]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[10]/B | | | | +Epoch 2 || ertAttention[attenti | | | | +Epoch 2 || on]/BertSelfAttentio | | | | +Epoch 2 || n[self]/NNCFLinear[k | | | | +Epoch 2 || ey]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.951 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[10]/B | | | | +Epoch 2 || ertAttention[attenti | | | | +Epoch 2 || on]/BertSelfAttentio | | | | +Epoch 2 || n[self]/NNCFLinear[v | | | | +Epoch 2 || alue]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[10]/B | | | | +Epoch 2 || ertAttention[attenti | | | | +Epoch 2 || on]/BertSelfAttentio | | | | +Epoch 2 || n[self]/NNCFLinear[v | | | | +Epoch 2 || alue]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.958 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[10]/B | | | | +Epoch 2 || ertAttention[attenti | | | | +Epoch 2 || on]/BertSelfOutput[o | | | | +Epoch 2 || utput]/NNCFLinear[de | | | | +Epoch 2 || nse]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[10]/B | | | | +Epoch 2 || ertAttention[attenti | | | | +Epoch 2 || on]/BertSelfOutput[o | | | | +Epoch 2 || utput]/NNCFLinear[de | | | | +Epoch 2 || nse]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072, 768] | 0.979 | 2.775 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[10]/B | | | | +Epoch 2 || ertIntermediate[inte | | | | +Epoch 2 || rmediate]/NNCFLinear | | | | +Epoch 2 || [dense]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072] | 0.484 | 0.004 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[10]/B | | | | +Epoch 2 || ertIntermediate[inte | | | | +Epoch 2 || rmediate]/NNCFLinear | | | | +Epoch 2 || [dense]/linear_0/bia | | | | +Epoch 2 || s | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 3072] | 0.975 | 2.775 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[10]/B | | | | +Epoch 2 || ertOutput[output]/NN | | | | +Epoch 2 || CFLinear[dense]/line | | | | +Epoch 2 || ar_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[10]/B | | | | +Epoch 2 || ertOutput[output]/NN | | | | +Epoch 2 || CFLinear[dense]/line | | | | +Epoch 2 || ar_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.981 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[11]/B | | | | +Epoch 2 || ertAttention[attenti | | | | +Epoch 2 || on]/BertSelfAttentio | | | | +Epoch 2 || n[self]/NNCFLinear[q | | | | +Epoch 2 || uery]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[11]/B | | | | +Epoch 2 || ertAttention[attenti | | | | +Epoch 2 || on]/BertSelfAttentio | | | | +Epoch 2 || n[self]/NNCFLinear[q | | | | +Epoch 2 || uery]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.981 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[11]/B | | | | +Epoch 2 || ertAttention[attenti | | | | +Epoch 2 || on]/BertSelfAttentio | | | | +Epoch 2 || n[self]/NNCFLinear[k | | | | +Epoch 2 || ey]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[11]/B | | | | +Epoch 2 || ertAttention[attenti | | | | +Epoch 2 || on]/BertSelfAttentio | | | | +Epoch 2 || n[self]/NNCFLinear[k | | | | +Epoch 2 || ey]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.981 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[11]/B | | | | +Epoch 2 || ertAttention[attenti | | | | +Epoch 2 || on]/BertSelfAttentio | | | | +Epoch 2 || n[self]/NNCFLinear[v | | | | +Epoch 2 || alue]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[11]/B | | | | +Epoch 2 || ertAttention[attenti | | | | +Epoch 2 || on]/BertSelfAttentio | | | | +Epoch 2 || n[self]/NNCFLinear[v | | | | +Epoch 2 || alue]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.981 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[11]/B | | | | +Epoch 2 || ertAttention[attenti | | | | +Epoch 2 || on]/BertSelfOutput[o | | | | +Epoch 2 || utput]/NNCFLinear[de | | | | +Epoch 2 || nse]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[11]/B | | | | +Epoch 2 || ertAttention[attenti | | | | +Epoch 2 || on]/BertSelfOutput[o | | | | +Epoch 2 || utput]/NNCFLinear[de | | | | +Epoch 2 || nse]/linear_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072, 768] | 0.978 | 2.775 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[11]/B | | | | +Epoch 2 || ertIntermediate[inte | | | | +Epoch 2 || rmediate]/NNCFLinear | | | | +Epoch 2 || [dense]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072] | 0.509 | 0.004 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[11]/B | | | | +Epoch 2 || ertIntermediate[inte | | | | +Epoch 2 || rmediate]/NNCFLinear | | | | +Epoch 2 || [dense]/linear_0/bia | | | | +Epoch 2 || s | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 3072] | 0.977 | 2.775 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[11]/B | | | | +Epoch 2 || ertOutput[output]/NN | | | | +Epoch 2 || CFLinear[dense]/line | | | | +Epoch 2 || ar_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768] | 1 | 0.001 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[11]/B | | | | +Epoch 2 || ertOutput[output]/NN | | | | +Epoch 2 || CFLinear[dense]/line | | | | +Epoch 2 || ar_0/bias | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 | +Epoch 2 |Statistics of the movement-sparsity algorithm: +Epoch 2 |+----------------------------------+--------+ +Epoch 2 || Statistic's name | Value | +Epoch 2 |+==================================+========+ +Epoch 2 || Mask Importance Threshold | -0.000 | +Epoch 2 |+----------------------------------+--------+ +Epoch 2 || Importance Regularization Factor | 0.050 | +Epoch 2 |+----------------------------------+--------+ +Epoch 2 | +Epoch 2 |Statistics of the quantization algorithm: +Epoch 2 |+--------------------------------+-------+ +Epoch 2 || Statistic's name | Value | +Epoch 2 |+================================+=======+ +Epoch 2 || Ratio of enabled quantizations | 100 | +Epoch 2 |+--------------------------------+-------+ +Epoch 2 | +Epoch 2 |Statistics of the quantization share: +Epoch 2 |+----------------------------------+----------------------+ +Epoch 2 || Statistic's name | Value | +Epoch 2 |+==================================+======================+ +Epoch 2 || Symmetric WQs / All placed WQs | 100.00 % (77 / 77) | +Epoch 2 |+----------------------------------+----------------------+ +Epoch 2 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 77) | +Epoch 2 |+----------------------------------+----------------------+ +Epoch 2 || Signed WQs / All placed WQs | 100.00 % (77 / 77) | +Epoch 2 |+----------------------------------+----------------------+ +Epoch 2 || Unsigned WQs / All placed WQs | 0.00 % (0 / 77) | +Epoch 2 |+----------------------------------+----------------------+ +Epoch 2 || Per-tensor WQs / All placed WQs | 3.90 % (3 / 77) | +Epoch 2 |+----------------------------------+----------------------+ +Epoch 2 || Per-channel WQs / All placed WQs | 96.10 % (74 / 77) | +Epoch 2 |+----------------------------------+----------------------+ +Epoch 2 || Placed WQs / Potential WQs | 75.49 % (77 / 102) | +Epoch 2 |+----------------------------------+----------------------+ +Epoch 2 || Symmetric AQs / All placed AQs | 23.76 % (24 / 101) | +Epoch 2 |+----------------------------------+----------------------+ +Epoch 2 || Asymmetric AQs / All placed AQs | 76.24 % (77 / 101) | +Epoch 2 |+----------------------------------+----------------------+ +Epoch 2 || Signed AQs / All placed AQs | 100.00 % (101 / 101) | +Epoch 2 |+----------------------------------+----------------------+ +Epoch 2 || Unsigned AQs / All placed AQs | 0.00 % (0 / 101) | +Epoch 2 |+----------------------------------+----------------------+ +Epoch 2 || Per-tensor AQs / All placed AQs | 100.00 % (101 / 101) | +Epoch 2 |+----------------------------------+----------------------+ +Epoch 2 || Per-channel AQs / All placed AQs | 0.00 % (0 / 101) | +Epoch 2 |+----------------------------------+----------------------+ +Epoch 2 | +Epoch 2 |Statistics of the bitwidth distribution: +Epoch 2 |+--------------+---------------------+--------------------+--------------------+ +Epoch 2 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 2 || | WQs | Placed AQs | Qs | +Epoch 2 |+==============+=====================+====================+====================+ +Epoch 2 || 8 | 100.00 % (77 / 77) | 100.00 % (101 / | 100.00 % (178 / | +Epoch 2 || | | 101) | 178) | +Epoch 2 |+--------------+---------------------+--------------------+--------------------+ +INFO:nncf:Statistics of the sparsified model: +Epoch 3 |+-----------------------------------------+-------+ +Epoch 3 || Statistic's name | Value | +Epoch 3 |+=========================================+=======+ +Epoch 3 || Sparsity level of the whole model | 0.314 | +Epoch 3 |+-----------------------------------------+-------+ +Epoch 3 || Sparsity level of all sparsified layers | 0.405 | +Epoch 3 |+-----------------------------------------+-------+ +Epoch 3 | +Epoch 3 |Statistics by sparsified layers: +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 3 |+======================+================+================+=====================+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.500 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[0]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0.500 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[0]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.500 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[0]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0.500 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[0]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.500 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[0]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0.500 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[0]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.500 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[0]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[0]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072, 768] | 0.210 | 2.775 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[0]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072] | 0.210 | 0.004 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[0]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 3072] | 0.210 | 2.775 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[0]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[0]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.667 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[1]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0.667 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[1]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.667 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[1]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0.667 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[1]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.667 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[1]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0.667 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[1]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.667 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[1]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[1]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072, 768] | 0.213 | 2.775 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[1]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072] | 0.213 | 0.004 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[1]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 3072] | 0.213 | 2.775 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[1]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[1]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.667 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[2]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0.667 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[2]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.667 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[2]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0.667 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[2]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.667 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[2]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0.667 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[2]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.667 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[2]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[2]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072, 768] | 0.185 | 2.775 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[2]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072] | 0.185 | 0.004 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[2]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 3072] | 0.185 | 2.775 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[2]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[2]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.250 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[3]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0.250 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[3]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.250 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[3]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0.250 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[3]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.250 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[3]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0.250 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[3]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.250 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[3]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[3]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072, 768] | 0.208 | 2.775 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[3]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072] | 0.208 | 0.004 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[3]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 3072] | 0.208 | 2.775 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[3]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[3]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.250 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[4]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0.250 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[4]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.250 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[4]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0.250 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[4]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.250 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[4]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0.250 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[4]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.250 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[4]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[4]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072, 768] | 0.212 | 2.775 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[4]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072] | 0.212 | 0.004 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[4]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 3072] | 0.212 | 2.775 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[4]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[4]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.667 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[5]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0.667 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[5]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.667 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[5]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0.667 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[5]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.667 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[5]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0.667 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[5]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.667 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[5]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[5]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072, 768] | 0.241 | 2.775 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[5]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072] | 0.241 | 0.004 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[5]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 3072] | 0.241 | 2.775 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[5]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[5]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.583 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[6]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0.583 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[6]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.583 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[6]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0.583 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[6]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.583 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[6]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0.583 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[6]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.583 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[6]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[6]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072, 768] | 0.239 | 2.775 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[6]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072] | 0.239 | 0.004 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[6]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 3072] | 0.239 | 2.775 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[6]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[6]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.750 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[7]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0.750 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[7]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.750 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[7]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0.750 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[7]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.750 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[7]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0.750 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[7]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.750 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[7]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[7]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072, 768] | 0.256 | 2.775 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[7]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072] | 0.256 | 0.004 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[7]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 3072] | 0.256 | 2.775 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[7]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[7]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.667 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[8]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0.667 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[8]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.667 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[8]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0.667 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[8]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.667 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[8]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0.667 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[8]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.667 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[8]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[8]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072, 768] | 0.351 | 2.775 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[8]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072] | 0.351 | 0.004 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[8]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 3072] | 0.351 | 2.775 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[8]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[8]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.833 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[9]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0.833 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[9]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.833 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[9]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0.833 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[9]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.833 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[9]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0.833 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[9]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.833 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[9]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[9]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072, 768] | 0.434 | 2.775 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[9]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072] | 0.434 | 0.004 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[9]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 3072] | 0.434 | 2.775 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[9]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[9]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.833 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[10]/B | | | | +Epoch 3 || ertAttention[attenti | | | | +Epoch 3 || on]/BertSelfAttentio | | | | +Epoch 3 || n[self]/NNCFLinear[q | | | | +Epoch 3 || uery]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0.833 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[10]/B | | | | +Epoch 3 || ertAttention[attenti | | | | +Epoch 3 || on]/BertSelfAttentio | | | | +Epoch 3 || n[self]/NNCFLinear[q | | | | +Epoch 3 || uery]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.833 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[10]/B | | | | +Epoch 3 || ertAttention[attenti | | | | +Epoch 3 || on]/BertSelfAttentio | | | | +Epoch 3 || n[self]/NNCFLinear[k | | | | +Epoch 3 || ey]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0.833 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[10]/B | | | | +Epoch 3 || ertAttention[attenti | | | | +Epoch 3 || on]/BertSelfAttentio | | | | +Epoch 3 || n[self]/NNCFLinear[k | | | | +Epoch 3 || ey]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.833 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[10]/B | | | | +Epoch 3 || ertAttention[attenti | | | | +Epoch 3 || on]/BertSelfAttentio | | | | +Epoch 3 || n[self]/NNCFLinear[v | | | | +Epoch 3 || alue]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0.833 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[10]/B | | | | +Epoch 3 || ertAttention[attenti | | | | +Epoch 3 || on]/BertSelfAttentio | | | | +Epoch 3 || n[self]/NNCFLinear[v | | | | +Epoch 3 || alue]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.833 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[10]/B | | | | +Epoch 3 || ertAttention[attenti | | | | +Epoch 3 || on]/BertSelfOutput[o | | | | +Epoch 3 || utput]/NNCFLinear[de | | | | +Epoch 3 || nse]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[10]/B | | | | +Epoch 3 || ertAttention[attenti | | | | +Epoch 3 || on]/BertSelfOutput[o | | | | +Epoch 3 || utput]/NNCFLinear[de | | | | +Epoch 3 || nse]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072, 768] | 0.461 | 2.775 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[10]/B | | | | +Epoch 3 || ertIntermediate[inte | | | | +Epoch 3 || rmediate]/NNCFLinear | | | | +Epoch 3 || [dense]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072] | 0.461 | 0.004 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[10]/B | | | | +Epoch 3 || ertIntermediate[inte | | | | +Epoch 3 || rmediate]/NNCFLinear | | | | +Epoch 3 || [dense]/linear_0/bia | | | | +Epoch 3 || s | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 3072] | 0.461 | 2.775 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[10]/B | | | | +Epoch 3 || ertOutput[output]/NN | | | | +Epoch 3 || CFLinear[dense]/line | | | | +Epoch 3 || ar_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[10]/B | | | | +Epoch 3 || ertOutput[output]/NN | | | | +Epoch 3 || CFLinear[dense]/line | | | | +Epoch 3 || ar_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.917 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[11]/B | | | | +Epoch 3 || ertAttention[attenti | | | | +Epoch 3 || on]/BertSelfAttentio | | | | +Epoch 3 || n[self]/NNCFLinear[q | | | | +Epoch 3 || uery]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0.917 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[11]/B | | | | +Epoch 3 || ertAttention[attenti | | | | +Epoch 3 || on]/BertSelfAttentio | | | | +Epoch 3 || n[self]/NNCFLinear[q | | | | +Epoch 3 || uery]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.917 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[11]/B | | | | +Epoch 3 || ertAttention[attenti | | | | +Epoch 3 || on]/BertSelfAttentio | | | | +Epoch 3 || n[self]/NNCFLinear[k | | | | +Epoch 3 || ey]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0.917 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[11]/B | | | | +Epoch 3 || ertAttention[attenti | | | | +Epoch 3 || on]/BertSelfAttentio | | | | +Epoch 3 || n[self]/NNCFLinear[k | | | | +Epoch 3 || ey]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.917 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[11]/B | | | | +Epoch 3 || ertAttention[attenti | | | | +Epoch 3 || on]/BertSelfAttentio | | | | +Epoch 3 || n[self]/NNCFLinear[v | | | | +Epoch 3 || alue]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0.917 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[11]/B | | | | +Epoch 3 || ertAttention[attenti | | | | +Epoch 3 || on]/BertSelfAttentio | | | | +Epoch 3 || n[self]/NNCFLinear[v | | | | +Epoch 3 || alue]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.917 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[11]/B | | | | +Epoch 3 || ertAttention[attenti | | | | +Epoch 3 || on]/BertSelfOutput[o | | | | +Epoch 3 || utput]/NNCFLinear[de | | | | +Epoch 3 || nse]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[11]/B | | | | +Epoch 3 || ertAttention[attenti | | | | +Epoch 3 || on]/BertSelfOutput[o | | | | +Epoch 3 || utput]/NNCFLinear[de | | | | +Epoch 3 || nse]/linear_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072, 768] | 0.486 | 2.775 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[11]/B | | | | +Epoch 3 || ertIntermediate[inte | | | | +Epoch 3 || rmediate]/NNCFLinear | | | | +Epoch 3 || [dense]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072] | 0.486 | 0.004 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[11]/B | | | | +Epoch 3 || ertIntermediate[inte | | | | +Epoch 3 || rmediate]/NNCFLinear | | | | +Epoch 3 || [dense]/linear_0/bia | | | | +Epoch 3 || s | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 3072] | 0.486 | 2.775 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[11]/B | | | | +Epoch 3 || ertOutput[output]/NN | | | | +Epoch 3 || CFLinear[dense]/line | | | | +Epoch 3 || ar_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[11]/B | | | | +Epoch 3 || ertOutput[output]/NN | | | | +Epoch 3 || CFLinear[dense]/line | | | | +Epoch 3 || ar_0/bias | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 | +Epoch 3 |Statistics of the movement-sparsity algorithm: +Epoch 3 |+----------------------------------+-------+ +Epoch 3 || Statistic's name | Value | +Epoch 3 |+==================================+=======+ +Epoch 3 || Mask Importance Threshold | 0 | +Epoch 3 |+----------------------------------+-------+ +Epoch 3 || Importance Regularization Factor | 0.050 | +Epoch 3 |+----------------------------------+-------+ +Epoch 3 | +Epoch 3 |Statistics of the quantization algorithm: +Epoch 3 |+--------------------------------+-------+ +Epoch 3 || Statistic's name | Value | +Epoch 3 |+================================+=======+ +Epoch 3 || Ratio of enabled quantizations | 100 | +Epoch 3 |+--------------------------------+-------+ +Epoch 3 | +Epoch 3 |Statistics of the quantization share: +Epoch 3 |+----------------------------------+----------------------+ +Epoch 3 || Statistic's name | Value | +Epoch 3 |+==================================+======================+ +Epoch 3 || Symmetric WQs / All placed WQs | 100.00 % (77 / 77) | +Epoch 3 |+----------------------------------+----------------------+ +Epoch 3 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 77) | +Epoch 3 |+----------------------------------+----------------------+ +Epoch 3 || Signed WQs / All placed WQs | 100.00 % (77 / 77) | +Epoch 3 |+----------------------------------+----------------------+ +Epoch 3 || Unsigned WQs / All placed WQs | 0.00 % (0 / 77) | +Epoch 3 |+----------------------------------+----------------------+ +Epoch 3 || Per-tensor WQs / All placed WQs | 3.90 % (3 / 77) | +Epoch 3 |+----------------------------------+----------------------+ +Epoch 3 || Per-channel WQs / All placed WQs | 96.10 % (74 / 77) | +Epoch 3 |+----------------------------------+----------------------+ +Epoch 3 || Placed WQs / Potential WQs | 75.49 % (77 / 102) | +Epoch 3 |+----------------------------------+----------------------+ +Epoch 3 || Symmetric AQs / All placed AQs | 23.76 % (24 / 101) | +Epoch 3 |+----------------------------------+----------------------+ +Epoch 3 || Asymmetric AQs / All placed AQs | 76.24 % (77 / 101) | +Epoch 3 |+----------------------------------+----------------------+ +Epoch 3 || Signed AQs / All placed AQs | 100.00 % (101 / 101) | +Epoch 3 |+----------------------------------+----------------------+ +Epoch 3 || Unsigned AQs / All placed AQs | 0.00 % (0 / 101) | +Epoch 3 |+----------------------------------+----------------------+ +Epoch 3 || Per-tensor AQs / All placed AQs | 100.00 % (101 / 101) | +Epoch 3 |+----------------------------------+----------------------+ +Epoch 3 || Per-channel AQs / All placed AQs | 0.00 % (0 / 101) | +Epoch 3 |+----------------------------------+----------------------+ +Epoch 3 | +Epoch 3 |Statistics of the bitwidth distribution: +Epoch 3 |+--------------+---------------------+--------------------+--------------------+ +Epoch 3 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 3 || | WQs | Placed AQs | Qs | +Epoch 3 |+==============+=====================+====================+====================+ +Epoch 3 || 8 | 100.00 % (77 / 77) | 100.00 % (101 / | 100.00 % (178 / | +Epoch 3 || | | 101) | 178) | +Epoch 3 |+--------------+---------------------+--------------------+--------------------+ +INFO:nncf:Statistics of the sparsified model: +Epoch 4 |+-----------------------------------------+-------+ +Epoch 4 || Statistic's name | Value | +Epoch 4 |+=========================================+=======+ +Epoch 4 || Sparsity level of the whole model | 0.314 | +Epoch 4 |+-----------------------------------------+-------+ +Epoch 4 || Sparsity level of all sparsified layers | 0.405 | +Epoch 4 |+-----------------------------------------+-------+ +Epoch 4 | +Epoch 4 |Statistics by sparsified layers: +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 4 |+======================+================+================+=====================+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.500 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[0]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0.500 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[0]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.500 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[0]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0.500 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[0]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.500 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[0]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0.500 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[0]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.500 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[0]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[0]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072, 768] | 0.210 | 2.775 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[0]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072] | 0.210 | 0.004 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[0]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 3072] | 0.210 | 2.775 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[0]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[0]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.667 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[1]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0.667 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[1]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.667 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[1]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0.667 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[1]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.667 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[1]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0.667 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[1]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.667 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[1]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[1]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072, 768] | 0.213 | 2.775 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[1]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072] | 0.213 | 0.004 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[1]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 3072] | 0.213 | 2.775 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[1]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[1]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.667 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[2]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0.667 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[2]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.667 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[2]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0.667 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[2]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.667 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[2]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0.667 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[2]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.667 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[2]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[2]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072, 768] | 0.185 | 2.775 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[2]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072] | 0.185 | 0.004 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[2]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 3072] | 0.185 | 2.775 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[2]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[2]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.250 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[3]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0.250 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[3]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.250 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[3]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0.250 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[3]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.250 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[3]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0.250 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[3]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.250 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[3]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[3]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072, 768] | 0.208 | 2.775 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[3]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072] | 0.208 | 0.004 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[3]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 3072] | 0.208 | 2.775 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[3]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[3]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.250 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[4]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0.250 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[4]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.250 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[4]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0.250 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[4]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.250 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[4]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0.250 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[4]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.250 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[4]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[4]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072, 768] | 0.212 | 2.775 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[4]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072] | 0.212 | 0.004 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[4]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 3072] | 0.212 | 2.775 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[4]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[4]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.667 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[5]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0.667 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[5]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.667 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[5]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0.667 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[5]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.667 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[5]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0.667 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[5]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.667 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[5]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[5]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072, 768] | 0.241 | 2.775 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[5]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072] | 0.241 | 0.004 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[5]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 3072] | 0.241 | 2.775 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[5]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[5]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.583 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[6]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0.583 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[6]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.583 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[6]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0.583 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[6]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.583 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[6]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0.583 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[6]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.583 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[6]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[6]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072, 768] | 0.239 | 2.775 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[6]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072] | 0.239 | 0.004 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[6]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 3072] | 0.239 | 2.775 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[6]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[6]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.750 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[7]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0.750 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[7]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.750 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[7]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0.750 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[7]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.750 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[7]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0.750 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[7]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.750 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[7]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[7]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072, 768] | 0.256 | 2.775 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[7]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072] | 0.256 | 0.004 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[7]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 3072] | 0.256 | 2.775 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[7]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[7]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.667 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[8]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0.667 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[8]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.667 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[8]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0.667 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[8]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.667 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[8]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0.667 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[8]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.667 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[8]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[8]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072, 768] | 0.351 | 2.775 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[8]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072] | 0.351 | 0.004 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[8]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 3072] | 0.351 | 2.775 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[8]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[8]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.833 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[9]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0.833 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[9]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.833 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[9]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0.833 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[9]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.833 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[9]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0.833 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[9]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.833 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[9]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[9]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072, 768] | 0.434 | 2.775 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[9]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072] | 0.434 | 0.004 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[9]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 3072] | 0.434 | 2.775 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[9]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[9]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.833 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[10]/B | | | | +Epoch 4 || ertAttention[attenti | | | | +Epoch 4 || on]/BertSelfAttentio | | | | +Epoch 4 || n[self]/NNCFLinear[q | | | | +Epoch 4 || uery]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0.833 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[10]/B | | | | +Epoch 4 || ertAttention[attenti | | | | +Epoch 4 || on]/BertSelfAttentio | | | | +Epoch 4 || n[self]/NNCFLinear[q | | | | +Epoch 4 || uery]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.833 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[10]/B | | | | +Epoch 4 || ertAttention[attenti | | | | +Epoch 4 || on]/BertSelfAttentio | | | | +Epoch 4 || n[self]/NNCFLinear[k | | | | +Epoch 4 || ey]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0.833 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[10]/B | | | | +Epoch 4 || ertAttention[attenti | | | | +Epoch 4 || on]/BertSelfAttentio | | | | +Epoch 4 || n[self]/NNCFLinear[k | | | | +Epoch 4 || ey]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.833 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[10]/B | | | | +Epoch 4 || ertAttention[attenti | | | | +Epoch 4 || on]/BertSelfAttentio | | | | +Epoch 4 || n[self]/NNCFLinear[v | | | | +Epoch 4 || alue]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0.833 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[10]/B | | | | +Epoch 4 || ertAttention[attenti | | | | +Epoch 4 || on]/BertSelfAttentio | | | | +Epoch 4 || n[self]/NNCFLinear[v | | | | +Epoch 4 || alue]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.833 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[10]/B | | | | +Epoch 4 || ertAttention[attenti | | | | +Epoch 4 || on]/BertSelfOutput[o | | | | +Epoch 4 || utput]/NNCFLinear[de | | | | +Epoch 4 || nse]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[10]/B | | | | +Epoch 4 || ertAttention[attenti | | | | +Epoch 4 || on]/BertSelfOutput[o | | | | +Epoch 4 || utput]/NNCFLinear[de | | | | +Epoch 4 || nse]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072, 768] | 0.461 | 2.775 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[10]/B | | | | +Epoch 4 || ertIntermediate[inte | | | | +Epoch 4 || rmediate]/NNCFLinear | | | | +Epoch 4 || [dense]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072] | 0.461 | 0.004 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[10]/B | | | | +Epoch 4 || ertIntermediate[inte | | | | +Epoch 4 || rmediate]/NNCFLinear | | | | +Epoch 4 || [dense]/linear_0/bia | | | | +Epoch 4 || s | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 3072] | 0.461 | 2.775 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[10]/B | | | | +Epoch 4 || ertOutput[output]/NN | | | | +Epoch 4 || CFLinear[dense]/line | | | | +Epoch 4 || ar_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[10]/B | | | | +Epoch 4 || ertOutput[output]/NN | | | | +Epoch 4 || CFLinear[dense]/line | | | | +Epoch 4 || ar_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.917 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[11]/B | | | | +Epoch 4 || ertAttention[attenti | | | | +Epoch 4 || on]/BertSelfAttentio | | | | +Epoch 4 || n[self]/NNCFLinear[q | | | | +Epoch 4 || uery]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0.917 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[11]/B | | | | +Epoch 4 || ertAttention[attenti | | | | +Epoch 4 || on]/BertSelfAttentio | | | | +Epoch 4 || n[self]/NNCFLinear[q | | | | +Epoch 4 || uery]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.917 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[11]/B | | | | +Epoch 4 || ertAttention[attenti | | | | +Epoch 4 || on]/BertSelfAttentio | | | | +Epoch 4 || n[self]/NNCFLinear[k | | | | +Epoch 4 || ey]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0.917 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[11]/B | | | | +Epoch 4 || ertAttention[attenti | | | | +Epoch 4 || on]/BertSelfAttentio | | | | +Epoch 4 || n[self]/NNCFLinear[k | | | | +Epoch 4 || ey]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.917 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[11]/B | | | | +Epoch 4 || ertAttention[attenti | | | | +Epoch 4 || on]/BertSelfAttentio | | | | +Epoch 4 || n[self]/NNCFLinear[v | | | | +Epoch 4 || alue]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0.917 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[11]/B | | | | +Epoch 4 || ertAttention[attenti | | | | +Epoch 4 || on]/BertSelfAttentio | | | | +Epoch 4 || n[self]/NNCFLinear[v | | | | +Epoch 4 || alue]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.917 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[11]/B | | | | +Epoch 4 || ertAttention[attenti | | | | +Epoch 4 || on]/BertSelfOutput[o | | | | +Epoch 4 || utput]/NNCFLinear[de | | | | +Epoch 4 || nse]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[11]/B | | | | +Epoch 4 || ertAttention[attenti | | | | +Epoch 4 || on]/BertSelfOutput[o | | | | +Epoch 4 || utput]/NNCFLinear[de | | | | +Epoch 4 || nse]/linear_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072, 768] | 0.486 | 2.775 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[11]/B | | | | +Epoch 4 || ertIntermediate[inte | | | | +Epoch 4 || rmediate]/NNCFLinear | | | | +Epoch 4 || [dense]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072] | 0.486 | 0.004 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[11]/B | | | | +Epoch 4 || ertIntermediate[inte | | | | +Epoch 4 || rmediate]/NNCFLinear | | | | +Epoch 4 || [dense]/linear_0/bia | | | | +Epoch 4 || s | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 3072] | 0.486 | 2.775 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[11]/B | | | | +Epoch 4 || ertOutput[output]/NN | | | | +Epoch 4 || CFLinear[dense]/line | | | | +Epoch 4 || ar_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768] | 0 | 0.001 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[11]/B | | | | +Epoch 4 || ertOutput[output]/NN | | | | +Epoch 4 || CFLinear[dense]/line | | | | +Epoch 4 || ar_0/bias | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 | +Epoch 4 |Statistics of the movement-sparsity algorithm: +Epoch 4 |+----------------------------------+-------+ +Epoch 4 || Statistic's name | Value | +Epoch 4 |+==================================+=======+ +Epoch 4 || Mask Importance Threshold | 0 | +Epoch 4 |+----------------------------------+-------+ +Epoch 4 || Importance Regularization Factor | 0.050 | +Epoch 4 |+----------------------------------+-------+ +Epoch 4 | +Epoch 4 |Statistics of the quantization algorithm: +Epoch 4 |+--------------------------------+-------+ +Epoch 4 || Statistic's name | Value | +Epoch 4 |+================================+=======+ +Epoch 4 || Ratio of enabled quantizations | 100 | +Epoch 4 |+--------------------------------+-------+ +Epoch 4 | +Epoch 4 |Statistics of the quantization share: +Epoch 4 |+----------------------------------+----------------------+ +Epoch 4 || Statistic's name | Value | +Epoch 4 |+==================================+======================+ +Epoch 4 || Symmetric WQs / All placed WQs | 100.00 % (77 / 77) | +Epoch 4 |+----------------------------------+----------------------+ +Epoch 4 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 77) | +Epoch 4 |+----------------------------------+----------------------+ +Epoch 4 || Signed WQs / All placed WQs | 100.00 % (77 / 77) | +Epoch 4 |+----------------------------------+----------------------+ +Epoch 4 || Unsigned WQs / All placed WQs | 0.00 % (0 / 77) | +Epoch 4 |+----------------------------------+----------------------+ +Epoch 4 || Per-tensor WQs / All placed WQs | 3.90 % (3 / 77) | +Epoch 4 |+----------------------------------+----------------------+ +Epoch 4 || Per-channel WQs / All placed WQs | 96.10 % (74 / 77) | +Epoch 4 |+----------------------------------+----------------------+ +Epoch 4 || Placed WQs / Potential WQs | 75.49 % (77 / 102) | +Epoch 4 |+----------------------------------+----------------------+ +Epoch 4 || Symmetric AQs / All placed AQs | 23.76 % (24 / 101) | +Epoch 4 |+----------------------------------+----------------------+ +Epoch 4 || Asymmetric AQs / All placed AQs | 76.24 % (77 / 101) | +Epoch 4 |+----------------------------------+----------------------+ +Epoch 4 || Signed AQs / All placed AQs | 100.00 % (101 / 101) | +Epoch 4 |+----------------------------------+----------------------+ +Epoch 4 || Unsigned AQs / All placed AQs | 0.00 % (0 / 101) | +Epoch 4 |+----------------------------------+----------------------+ +Epoch 4 || Per-tensor AQs / All placed AQs | 100.00 % (101 / 101) | +Epoch 4 |+----------------------------------+----------------------+ +Epoch 4 || Per-channel AQs / All placed AQs | 0.00 % (0 / 101) | +Epoch 4 |+----------------------------------+----------------------+ +Epoch 4 | +Epoch 4 |Statistics of the bitwidth distribution: +Epoch 4 |+--------------+---------------------+--------------------+--------------------+ +Epoch 4 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 4 || | WQs | Placed AQs | Qs | +Epoch 4 |+==============+=====================+====================+====================+ +Epoch 4 || 8 | 100.00 % (77 / 77) | 100.00 % (101 / | 100.00 % (178 / | +Epoch 4 || | | 101) | 178) | +Epoch 4 |+--------------+---------------------+--------------------+--------------------+