adrianeboyd commited on
Commit
49f366e
1 Parent(s): 6b01c7e

Update spaCy pipeline

Browse files
README.md CHANGED
@@ -14,13 +14,13 @@ model-index:
14
  metrics:
15
  - name: NER Precision
16
  type: precision
17
- value: 0.8443037975
18
  - name: NER Recall
19
  type: recall
20
- value: 0.8389937107
21
  - name: NER F Score
22
  type: f_score
23
- value: 0.8416403785
24
  - task:
25
  name: TAG
26
  type: token-classification
@@ -34,7 +34,7 @@ model-index:
34
  metrics:
35
  - name: POS (UPOS) Accuracy
36
  type: accuracy
37
- value: 0.9806393516
38
  - task:
39
  name: MORPH
40
  type: token-classification
@@ -55,21 +55,21 @@ model-index:
55
  metrics:
56
  - name: Unlabeled Attachment Score (UAS)
57
  type: f_score
58
- value: 0.9318325079
59
  - task:
60
  name: LABELED_DEPENDENCIES
61
  type: token-classification
62
  metrics:
63
  - name: Labeled Attachment Score (LAS)
64
  type: f_score
65
- value: 0.9202448465
66
  - task:
67
  name: SENTS
68
  type: token-classification
69
  metrics:
70
  - name: Sentences F-Score
71
  type: f_score
72
- value: 0.9794319295
73
  ---
74
  ### Details: https://spacy.io/models/ja#ja_core_news_trf
75
 
@@ -78,8 +78,8 @@ Japanese transformer pipeline (cl-tohoku/bert-base-japanese-char-v2). Components
78
  | Feature | Description |
79
  | --- | --- |
80
  | **Name** | `ja_core_news_trf` |
81
- | **Version** | `3.4.0` |
82
- | **spaCy** | `>=3.4.0,<3.5.0` |
83
  | **Default Pipeline** | `transformer`, `morphologizer`, `parser`, `attribute_ruler`, `ner` |
84
  | **Components** | `transformer`, `morphologizer`, `parser`, `attribute_ruler`, `ner` |
85
  | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
@@ -105,22 +105,22 @@ Japanese transformer pipeline (cl-tohoku/bert-base-japanese-char-v2). Components
105
 
106
  | Type | Score |
107
  | --- | --- |
108
- | `TOKEN_ACC` | 99.69 |
109
  | `TOKEN_P` | 97.65 |
110
  | `TOKEN_R` | 97.90 |
111
  | `TOKEN_F` | 97.77 |
112
- | `POS_ACC` | 98.06 |
113
  | `MORPH_ACC` | 0.00 |
114
- | `MORPH_MICRO_P` | 34.01 |
115
- | `MORPH_MICRO_R` | 98.04 |
116
- | `MORPH_MICRO_F` | 50.51 |
117
- | `SENTS_P` | 97.28 |
118
- | `SENTS_R` | 98.62 |
119
- | `SENTS_F` | 97.94 |
120
- | `DEP_UAS` | 93.18 |
121
- | `DEP_LAS` | 92.02 |
122
  | `TAG_ACC` | 97.12 |
123
  | `LEMMA_ACC` | 96.71 |
124
- | `ENTS_P` | 84.43 |
125
- | `ENTS_R` | 83.90 |
126
- | `ENTS_F` | 84.16 |
 
14
  metrics:
15
  - name: NER Precision
16
  type: precision
17
+ value: 0.8298969072
18
  - name: NER Recall
19
  type: recall
20
+ value: 0.8100628931
21
  - name: NER F Score
22
  type: f_score
23
+ value: 0.8198599618
24
  - task:
25
  name: TAG
26
  type: token-classification
 
34
  metrics:
35
  - name: POS (UPOS) Accuracy
36
  type: accuracy
37
+ value: 0.9798207196
38
  - task:
39
  name: MORPH
40
  type: token-classification
 
55
  metrics:
56
  - name: Unlabeled Attachment Score (UAS)
57
  type: f_score
58
+ value: 0.9355666622
59
  - task:
60
  name: LABELED_DEPENDENCIES
61
  type: token-classification
62
  metrics:
63
  - name: Labeled Attachment Score (LAS)
64
  type: f_score
65
+ value: 0.9241776538
66
  - task:
67
  name: SENTS
68
  type: token-classification
69
  metrics:
70
  - name: Sentences F-Score
71
  type: f_score
72
+ value: 0.9755142018
73
  ---
74
  ### Details: https://spacy.io/models/ja#ja_core_news_trf
75
 
 
78
  | Feature | Description |
79
  | --- | --- |
80
  | **Name** | `ja_core_news_trf` |
81
+ | **Version** | `3.5.0` |
82
+ | **spaCy** | `>=3.5.0,<3.6.0` |
83
  | **Default Pipeline** | `transformer`, `morphologizer`, `parser`, `attribute_ruler`, `ner` |
84
  | **Components** | `transformer`, `morphologizer`, `parser`, `attribute_ruler`, `ner` |
85
  | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
 
105
 
106
  | Type | Score |
107
  | --- | --- |
108
+ | `TOKEN_ACC` | 99.37 |
109
  | `TOKEN_P` | 97.65 |
110
  | `TOKEN_R` | 97.90 |
111
  | `TOKEN_F` | 97.77 |
112
+ | `POS_ACC` | 97.98 |
113
  | `MORPH_ACC` | 0.00 |
114
+ | `MORPH_MICRO_P` | 33.56 |
115
+ | `MORPH_MICRO_R` | 96.08 |
116
+ | `MORPH_MICRO_F` | 49.75 |
117
+ | `SENTS_P` | 96.89 |
118
+ | `SENTS_R` | 98.22 |
119
+ | `SENTS_F` | 97.55 |
120
+ | `DEP_UAS` | 93.56 |
121
+ | `DEP_LAS` | 92.42 |
122
  | `TAG_ACC` | 97.12 |
123
  | `LEMMA_ACC` | 96.71 |
124
+ | `ENTS_P` | 82.99 |
125
+ | `ENTS_R` | 81.01 |
126
+ | `ENTS_F` | 81.99 |
accuracy.json CHANGED
@@ -1,18 +1,18 @@
1
  {
2
- "token_acc": 0.9968649485,
3
  "token_p": 0.9764591282,
4
  "token_r": 0.9790021974,
5
  "token_f": 0.9777290092,
6
- "pos_acc": 0.9806393516,
7
  "morph_acc": 0.0,
8
- "morph_micro_p": 0.3401360544,
9
- "morph_micro_r": 0.9803921569,
10
- "morph_micro_f": 0.5050505051,
11
  "morph_per_feat": {
12
  "Polarity": {
13
  "p": 1.0,
14
- "r": 0.9803921569,
15
- "f": 0.9900990099
16
  },
17
  "Inflection": {
18
  "p": 0.0,
@@ -25,106 +25,106 @@
25
  "f": 0.0
26
  }
27
  },
28
- "sents_p": 0.9727626459,
29
- "sents_r": 0.9861932939,
30
- "sents_f": 0.9794319295,
31
- "dep_uas": 0.9318325079,
32
- "dep_las": 0.9202448465,
33
  "dep_las_per_type": {
34
  "cc": {
35
- "p": 0.8958333333,
36
- "r": 0.8958333333,
37
- "f": 0.8958333333
38
  },
39
  "compound": {
40
- "p": 0.9542755344,
41
- "r": 0.9058624577,
42
- "f": 0.9294389821
43
  },
44
  "obl": {
45
- "p": 0.8513513514,
46
- "r": 0.8651685393,
47
- "f": 0.8582043344
48
  },
49
  "case": {
50
- "p": 0.9873756695,
51
- "r": 0.9806231003,
52
- "f": 0.9839878002
53
  },
54
  "dislocated": {
55
- "p": 0.7,
56
- "r": 0.5384615385,
57
- "f": 0.6086956522
58
  },
59
  "nsubj": {
60
- "p": 0.8549323017,
61
- "r": 0.8483685221,
62
- "f": 0.8516377649
63
  },
64
  "nmod": {
65
- "p": 0.9195979899,
66
- "r": 0.8561403509,
67
- "f": 0.8867353119
68
  },
69
  "root": {
70
- "p": 0.9486166008,
71
- "r": 0.9467455621,
72
- "f": 0.9476801579
73
  },
74
  "aux": {
75
- "p": 0.9794776119,
76
- "r": 0.9749303621,
77
- "f": 0.9771986971
78
  },
79
  "advcl": {
80
- "p": 0.7342342342,
81
- "r": 0.7325842697,
82
- "f": 0.733408324
83
  },
84
  "mark": {
85
- "p": 0.9757575758,
86
- "r": 0.966,
87
- "f": 0.9708542714
88
  },
89
  "fixed": {
90
- "p": 0.9659498208,
91
- "r": 0.98,
92
- "f": 0.9729241877
93
  },
94
  "acl": {
95
- "p": 0.8608695652,
96
  "r": 0.8703296703,
97
- "f": 0.8655737705
98
  },
99
  "obj": {
100
- "p": 0.9386503067,
101
- "r": 0.9244712991,
102
- "f": 0.9315068493
103
  },
104
  "nummod": {
105
- "p": 0.9871794872,
106
- "r": 0.9112426036,
107
- "f": 0.9476923077
108
  },
109
  "advmod": {
110
- "p": 0.7851851852,
111
- "r": 0.7571428571,
112
- "f": 0.7709090909
113
  },
114
  "amod": {
115
- "p": 0.9666666667,
116
- "r": 0.7837837838,
117
- "f": 0.8656716418
118
  },
119
  "cop": {
120
- "p": 0.9822485207,
121
- "r": 0.9651162791,
122
- "f": 0.9736070381
123
  },
124
  "ccomp": {
125
- "p": 1.0,
126
- "r": 0.8181818182,
127
- "f": 0.9
128
  },
129
  "det": {
130
  "p": 1.0,
@@ -132,106 +132,106 @@
132
  "f": 0.9904761905
133
  },
134
  "csubj": {
135
- "p": 0.6666666667,
136
  "r": 0.8333333333,
137
- "f": 0.7407407407
138
  },
139
  "dep": {
140
- "p": 0.3,
141
- "r": 0.4285714286,
142
- "f": 0.3529411765
143
  }
144
  },
145
  "tag_acc": 0.9712488769,
146
  "lemma_acc": 0.9670526831,
147
- "ents_p": 0.8443037975,
148
- "ents_r": 0.8389937107,
149
- "ents_f": 0.8416403785,
150
  "ents_per_type": {
151
  "DATE": {
152
- "p": 0.9907407407,
153
- "r": 0.9816513761,
154
- "f": 0.9861751152
155
  },
156
  "ORG": {
157
- "p": 0.7941176471,
158
- "r": 0.7883211679,
159
- "f": 0.7912087912
160
- },
161
- "TITLE_AFFIX": {
162
- "p": 0.8695652174,
163
- "r": 0.6666666667,
164
- "f": 0.7547169811
165
  },
166
  "PERSON": {
167
- "p": 0.9007092199,
168
- "r": 0.9136690647,
169
- "f": 0.9071428571
170
  },
171
  "GPE": {
172
- "p": 0.8315789474,
173
  "r": 0.8404255319,
174
- "f": 0.835978836
 
 
 
 
 
175
  },
176
  "TIME": {
177
- "p": 1.0,
178
  "r": 1.0,
179
- "f": 1.0
180
  },
181
  "QUANTITY": {
182
- "p": 0.8533333333,
183
- "r": 0.9696969697,
184
- "f": 0.9078014184
185
  },
186
  "NORP": {
187
- "p": 0.7419354839,
188
- "r": 0.71875,
189
- "f": 0.7301587302
 
 
 
 
 
190
  },
191
  "ORDINAL": {
192
- "p": 0.6956521739,
193
- "r": 0.7272727273,
194
- "f": 0.7111111111
195
  },
196
  "WORK_OF_ART": {
197
- "p": 0.8235294118,
198
- "r": 0.8235294118,
199
- "f": 0.8235294118
200
  },
201
- "PERCENT": {
202
  "p": 1.0,
203
- "r": 0.8571428571,
204
- "f": 0.9230769231
205
  },
206
- "CARDINAL": {
207
- "p": 0.0,
208
- "r": 0.0,
209
- "f": 0.0
210
  },
211
- "EVENT": {
212
  "p": 1.0,
213
- "r": 0.9230769231,
214
- "f": 0.96
215
  },
216
- "PRODUCT": {
217
- "p": 0.6216216216,
218
- "r": 0.5476190476,
219
- "f": 0.582278481
220
  },
221
  "FAC": {
222
- "p": 0.7941176471,
223
- "r": 0.7297297297,
224
- "f": 0.7605633803
225
- },
226
- "LOC": {
227
- "p": 0.5714285714,
228
- "r": 0.8,
229
- "f": 0.6666666667
230
  },
231
  "MOVEMENT": {
232
- "p": 0.4,
233
- "r": 0.4,
234
- "f": 0.4
235
  },
236
  "LAW": {
237
  "p": 0.6666666667,
@@ -247,12 +247,7 @@
247
  "p": 1.0,
248
  "r": 1.0,
249
  "f": 1.0
250
- },
251
- "PET_NAME": {
252
- "p": 0.0,
253
- "r": 0.0,
254
- "f": 0.0
255
  }
256
  },
257
- "speed": 2829.1916919204
258
  }
 
1
  {
2
+ "token_acc": 0.9937494927,
3
  "token_p": 0.9764591282,
4
  "token_r": 0.9790021974,
5
  "token_f": 0.9777290092,
6
+ "pos_acc": 0.9798207196,
7
  "morph_acc": 0.0,
8
+ "morph_micro_p": 0.3356164384,
9
+ "morph_micro_r": 0.9607843137,
10
+ "morph_micro_f": 0.4974619289,
11
  "morph_per_feat": {
12
  "Polarity": {
13
  "p": 1.0,
14
+ "r": 0.9607843137,
15
+ "f": 0.98
16
  },
17
  "Inflection": {
18
  "p": 0.0,
 
25
  "f": 0.0
26
  }
27
  },
28
+ "sents_p": 0.9688715953,
29
+ "sents_r": 0.9822485207,
30
+ "sents_f": 0.9755142018,
31
+ "dep_uas": 0.9355666622,
32
+ "dep_las": 0.9241776538,
33
  "dep_las_per_type": {
34
  "cc": {
35
+ "p": 0.875,
36
+ "r": 0.875,
37
+ "f": 0.875
38
  },
39
  "compound": {
40
+ "p": 0.9552414605,
41
+ "r": 0.9143179256,
42
+ "f": 0.9343317972
43
  },
44
  "obl": {
45
+ "p": 0.8567901235,
46
+ "r": 0.8664169788,
47
+ "f": 0.8615766605
48
  },
49
  "case": {
50
+ "p": 0.990015361,
51
+ "r": 0.9794832827,
52
+ "f": 0.9847211612
53
  },
54
  "dislocated": {
55
+ "p": 0.8181818182,
56
+ "r": 0.6923076923,
57
+ "f": 0.75
58
  },
59
  "nsubj": {
60
+ "p": 0.875,
61
+ "r": 0.8598848369,
62
+ "f": 0.8673765731
63
  },
64
  "nmod": {
65
+ "p": 0.921641791,
66
+ "r": 0.8666666667,
67
+ "f": 0.8933092224
68
  },
69
  "root": {
70
+ "p": 0.9645669291,
71
+ "r": 0.966469428,
72
+ "f": 0.9655172414
73
  },
74
  "aux": {
75
+ "p": 0.9757462687,
76
+ "r": 0.9712163417,
77
+ "f": 0.9734760354
78
  },
79
  "advcl": {
80
+ "p": 0.7333333333,
81
+ "r": 0.7415730337,
82
+ "f": 0.7374301676
83
  },
84
  "mark": {
85
+ "p": 0.964,
86
+ "r": 0.964,
87
+ "f": 0.964
88
  },
89
  "fixed": {
90
+ "p": 0.9663716814,
91
+ "r": 0.9927272727,
92
+ "f": 0.9793721973
93
  },
94
  "acl": {
95
+ "p": 0.8780487805,
96
  "r": 0.8703296703,
97
+ "f": 0.8741721854
98
  },
99
  "obj": {
100
+ "p": 0.9726443769,
101
+ "r": 0.9667673716,
102
+ "f": 0.9696969697
103
  },
104
  "nummod": {
105
+ "p": 0.9806451613,
106
+ "r": 0.899408284,
107
+ "f": 0.9382716049
108
  },
109
  "advmod": {
110
+ "p": 0.7388059701,
111
+ "r": 0.7071428571,
112
+ "f": 0.7226277372
113
  },
114
  "amod": {
115
+ "p": 0.9333333333,
116
+ "r": 0.7567567568,
117
+ "f": 0.8358208955
118
  },
119
  "cop": {
120
+ "p": 0.9821428571,
121
+ "r": 0.9593023256,
122
+ "f": 0.9705882353
123
  },
124
  "ccomp": {
125
+ "p": 0.9444444444,
126
+ "r": 0.7727272727,
127
+ "f": 0.85
128
  },
129
  "det": {
130
  "p": 1.0,
 
132
  "f": 0.9904761905
133
  },
134
  "csubj": {
135
+ "p": 0.7692307692,
136
  "r": 0.8333333333,
137
+ "f": 0.8
138
  },
139
  "dep": {
140
+ "p": 0.2857142857,
141
+ "r": 0.2857142857,
142
+ "f": 0.2857142857
143
  }
144
  },
145
  "tag_acc": 0.9712488769,
146
  "lemma_acc": 0.9670526831,
147
+ "ents_p": 0.8298969072,
148
+ "ents_r": 0.8100628931,
149
+ "ents_f": 0.8198599618,
150
  "ents_per_type": {
151
  "DATE": {
152
+ "p": 0.9906542056,
153
+ "r": 0.9724770642,
154
+ "f": 0.9814814815
155
  },
156
  "ORG": {
157
+ "p": 0.7445255474,
158
+ "r": 0.7445255474,
159
+ "f": 0.7445255474
 
 
 
 
 
160
  },
161
  "PERSON": {
162
+ "p": 0.9516129032,
163
+ "r": 0.8489208633,
164
+ "f": 0.897338403
165
  },
166
  "GPE": {
167
+ "p": 0.79,
168
  "r": 0.8404255319,
169
+ "f": 0.8144329897
170
+ },
171
+ "PRODUCT": {
172
+ "p": 0.5945945946,
173
+ "r": 0.5238095238,
174
+ "f": 0.5569620253
175
  },
176
  "TIME": {
177
+ "p": 0.8,
178
  "r": 1.0,
179
+ "f": 0.8888888889
180
  },
181
  "QUANTITY": {
182
+ "p": 0.8904109589,
183
+ "r": 0.9848484848,
184
+ "f": 0.9352517986
185
  },
186
  "NORP": {
187
+ "p": 0.6785714286,
188
+ "r": 0.59375,
189
+ "f": 0.6333333333
190
+ },
191
+ "TITLE_AFFIX": {
192
+ "p": 0.7857142857,
193
+ "r": 0.7333333333,
194
+ "f": 0.7586206897
195
  },
196
  "ORDINAL": {
197
+ "p": 0.652173913,
198
+ "r": 0.6818181818,
199
+ "f": 0.6666666667
200
  },
201
  "WORK_OF_ART": {
202
+ "p": 0.8461538462,
203
+ "r": 0.6470588235,
204
+ "f": 0.7333333333
205
  },
206
+ "CARDINAL": {
207
  "p": 1.0,
208
+ "r": 0.5,
209
+ "f": 0.6666666667
210
  },
211
+ "LOC": {
212
+ "p": 0.6,
213
+ "r": 0.9,
214
+ "f": 0.72
215
  },
216
+ "PERCENT": {
217
  "p": 1.0,
218
+ "r": 0.7142857143,
219
+ "f": 0.8333333333
220
  },
221
+ "EVENT": {
222
+ "p": 0.9090909091,
223
+ "r": 0.7692307692,
224
+ "f": 0.8333333333
225
  },
226
  "FAC": {
227
+ "p": 0.7692307692,
228
+ "r": 0.8108108108,
229
+ "f": 0.7894736842
 
 
 
 
 
230
  },
231
  "MOVEMENT": {
232
+ "p": 0.3333333333,
233
+ "r": 0.2,
234
+ "f": 0.25
235
  },
236
  "LAW": {
237
  "p": 0.6666666667,
 
247
  "p": 1.0,
248
  "r": 1.0,
249
  "f": 1.0
 
 
 
 
 
250
  }
251
  },
252
+ "speed": 3501.8234367587
253
  }
attribute_ruler/patterns CHANGED
Binary files a/attribute_ruler/patterns and b/attribute_ruler/patterns differ
 
config.cfg CHANGED
@@ -111,6 +111,7 @@ stride = 96
111
  use_fast = false
112
  word_tokenizer_type = "basic"
113
  subword_tokenizer_type = "character"
 
114
 
115
  [components.transformer.model.transformer_config]
116
 
@@ -146,6 +147,7 @@ eval_frequency = 1000
146
  frozen_components = []
147
  before_to_disk = null
148
  annotating_components = []
 
149
 
150
  [training.batcher]
151
  @batchers = "spacy.batch_by_padded.v1"
 
111
  use_fast = false
112
  word_tokenizer_type = "basic"
113
  subword_tokenizer_type = "character"
114
+ model_max_length = 512
115
 
116
  [components.transformer.model.transformer_config]
117
 
 
147
  frozen_components = []
148
  before_to_disk = null
149
  annotating_components = []
150
+ before_update = null
151
 
152
  [training.batcher]
153
  @batchers = "spacy.batch_by_padded.v1"
ja_core_news_trf-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51f0142b124a407acdd90fa60ec9eb4a4908fa9c85c98dfe50d83b827208f3d7
3
- size 337884760
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af1c8bca8b1a4de8acff4e37981769d1234e36f3cfdfac49806c402a5b88dd2c
3
+ size 337887987
meta.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "lang":"ja",
3
  "name":"core_news_trf",
4
- "version":"3.4.0",
5
  "description":"Japanese transformer pipeline (cl-tohoku/bert-base-japanese-char-v2). Components: transformer, morphologizer, parser, ner.",
6
  "author":"Explosion",
7
  "email":"[email protected]",
8
  "url":"https://explosion.ai",
9
  "license":"CC BY-SA 3.0",
10
- "spacy_version":">=3.4.0,<3.5.0",
11
- "spacy_git_version":"dd038b536",
12
  "vectors":{
13
  "width":0,
14
  "vectors":0,
@@ -111,20 +111,20 @@
111
 
112
  ],
113
  "performance":{
114
- "token_acc":0.9968649485,
115
  "token_p":0.9764591282,
116
  "token_r":0.9790021974,
117
  "token_f":0.9777290092,
118
- "pos_acc":0.9806393516,
119
  "morph_acc":0.0,
120
- "morph_micro_p":0.3401360544,
121
- "morph_micro_r":0.9803921569,
122
- "morph_micro_f":0.5050505051,
123
  "morph_per_feat":{
124
  "Polarity":{
125
  "p":1.0,
126
- "r":0.9803921569,
127
- "f":0.9900990099
128
  },
129
  "Inflection":{
130
  "p":0.0,
@@ -137,106 +137,106 @@
137
  "f":0.0
138
  }
139
  },
140
- "sents_p":0.9727626459,
141
- "sents_r":0.9861932939,
142
- "sents_f":0.9794319295,
143
- "dep_uas":0.9318325079,
144
- "dep_las":0.9202448465,
145
  "dep_las_per_type":{
146
  "cc":{
147
- "p":0.8958333333,
148
- "r":0.8958333333,
149
- "f":0.8958333333
150
  },
151
  "compound":{
152
- "p":0.9542755344,
153
- "r":0.9058624577,
154
- "f":0.9294389821
155
  },
156
  "obl":{
157
- "p":0.8513513514,
158
- "r":0.8651685393,
159
- "f":0.8582043344
160
  },
161
  "case":{
162
- "p":0.9873756695,
163
- "r":0.9806231003,
164
- "f":0.9839878002
165
  },
166
  "dislocated":{
167
- "p":0.7,
168
- "r":0.5384615385,
169
- "f":0.6086956522
170
  },
171
  "nsubj":{
172
- "p":0.8549323017,
173
- "r":0.8483685221,
174
- "f":0.8516377649
175
  },
176
  "nmod":{
177
- "p":0.9195979899,
178
- "r":0.8561403509,
179
- "f":0.8867353119
180
  },
181
  "root":{
182
- "p":0.9486166008,
183
- "r":0.9467455621,
184
- "f":0.9476801579
185
  },
186
  "aux":{
187
- "p":0.9794776119,
188
- "r":0.9749303621,
189
- "f":0.9771986971
190
  },
191
  "advcl":{
192
- "p":0.7342342342,
193
- "r":0.7325842697,
194
- "f":0.733408324
195
  },
196
  "mark":{
197
- "p":0.9757575758,
198
- "r":0.966,
199
- "f":0.9708542714
200
  },
201
  "fixed":{
202
- "p":0.9659498208,
203
- "r":0.98,
204
- "f":0.9729241877
205
  },
206
  "acl":{
207
- "p":0.8608695652,
208
  "r":0.8703296703,
209
- "f":0.8655737705
210
  },
211
  "obj":{
212
- "p":0.9386503067,
213
- "r":0.9244712991,
214
- "f":0.9315068493
215
  },
216
  "nummod":{
217
- "p":0.9871794872,
218
- "r":0.9112426036,
219
- "f":0.9476923077
220
  },
221
  "advmod":{
222
- "p":0.7851851852,
223
- "r":0.7571428571,
224
- "f":0.7709090909
225
  },
226
  "amod":{
227
- "p":0.9666666667,
228
- "r":0.7837837838,
229
- "f":0.8656716418
230
  },
231
  "cop":{
232
- "p":0.9822485207,
233
- "r":0.9651162791,
234
- "f":0.9736070381
235
  },
236
  "ccomp":{
237
- "p":1.0,
238
- "r":0.8181818182,
239
- "f":0.9
240
  },
241
  "det":{
242
  "p":1.0,
@@ -244,106 +244,106 @@
244
  "f":0.9904761905
245
  },
246
  "csubj":{
247
- "p":0.6666666667,
248
  "r":0.8333333333,
249
- "f":0.7407407407
250
  },
251
  "dep":{
252
- "p":0.3,
253
- "r":0.4285714286,
254
- "f":0.3529411765
255
  }
256
  },
257
  "tag_acc":0.9712488769,
258
  "lemma_acc":0.9670526831,
259
- "ents_p":0.8443037975,
260
- "ents_r":0.8389937107,
261
- "ents_f":0.8416403785,
262
  "ents_per_type":{
263
  "DATE":{
264
- "p":0.9907407407,
265
- "r":0.9816513761,
266
- "f":0.9861751152
267
  },
268
  "ORG":{
269
- "p":0.7941176471,
270
- "r":0.7883211679,
271
- "f":0.7912087912
272
- },
273
- "TITLE_AFFIX":{
274
- "p":0.8695652174,
275
- "r":0.6666666667,
276
- "f":0.7547169811
277
  },
278
  "PERSON":{
279
- "p":0.9007092199,
280
- "r":0.9136690647,
281
- "f":0.9071428571
282
  },
283
  "GPE":{
284
- "p":0.8315789474,
285
  "r":0.8404255319,
286
- "f":0.835978836
 
 
 
 
 
287
  },
288
  "TIME":{
289
- "p":1.0,
290
  "r":1.0,
291
- "f":1.0
292
  },
293
  "QUANTITY":{
294
- "p":0.8533333333,
295
- "r":0.9696969697,
296
- "f":0.9078014184
297
  },
298
  "NORP":{
299
- "p":0.7419354839,
300
- "r":0.71875,
301
- "f":0.7301587302
 
 
 
 
 
302
  },
303
  "ORDINAL":{
304
- "p":0.6956521739,
305
- "r":0.7272727273,
306
- "f":0.7111111111
307
  },
308
  "WORK_OF_ART":{
309
- "p":0.8235294118,
310
- "r":0.8235294118,
311
- "f":0.8235294118
312
  },
313
- "PERCENT":{
314
  "p":1.0,
315
- "r":0.8571428571,
316
- "f":0.9230769231
317
  },
318
- "CARDINAL":{
319
- "p":0.0,
320
- "r":0.0,
321
- "f":0.0
322
  },
323
- "EVENT":{
324
  "p":1.0,
325
- "r":0.9230769231,
326
- "f":0.96
327
  },
328
- "PRODUCT":{
329
- "p":0.6216216216,
330
- "r":0.5476190476,
331
- "f":0.582278481
332
  },
333
  "FAC":{
334
- "p":0.7941176471,
335
- "r":0.7297297297,
336
- "f":0.7605633803
337
- },
338
- "LOC":{
339
- "p":0.5714285714,
340
- "r":0.8,
341
- "f":0.6666666667
342
  },
343
  "MOVEMENT":{
344
- "p":0.4,
345
- "r":0.4,
346
- "f":0.4
347
  },
348
  "LAW":{
349
  "p":0.6666666667,
@@ -359,14 +359,9 @@
359
  "p":1.0,
360
  "r":1.0,
361
  "f":1.0
362
- },
363
- "PET_NAME":{
364
- "p":0.0,
365
- "r":0.0,
366
- "f":0.0
367
  }
368
  },
369
- "speed":2829.1916919204
370
  },
371
  "sources":[
372
  {
@@ -389,7 +384,7 @@
389
  }
390
  ],
391
  "requirements":[
392
- "spacy-transformers>=1.1.2,<1.2.0",
393
  "sudachipy>=0.5.2,!=0.6.1",
394
  "sudachidict-core>=20211220"
395
  ]
 
1
  {
2
  "lang":"ja",
3
  "name":"core_news_trf",
4
+ "version":"3.5.0",
5
  "description":"Japanese transformer pipeline (cl-tohoku/bert-base-japanese-char-v2). Components: transformer, morphologizer, parser, ner.",
6
  "author":"Explosion",
7
  "email":"[email protected]",
8
  "url":"https://explosion.ai",
9
  "license":"CC BY-SA 3.0",
10
+ "spacy_version":">=3.5.0,<3.6.0",
11
+ "spacy_git_version":"9e0322de1",
12
  "vectors":{
13
  "width":0,
14
  "vectors":0,
 
111
 
112
  ],
113
  "performance":{
114
+ "token_acc":0.9937494927,
115
  "token_p":0.9764591282,
116
  "token_r":0.9790021974,
117
  "token_f":0.9777290092,
118
+ "pos_acc":0.9798207196,
119
  "morph_acc":0.0,
120
+ "morph_micro_p":0.3356164384,
121
+ "morph_micro_r":0.9607843137,
122
+ "morph_micro_f":0.4974619289,
123
  "morph_per_feat":{
124
  "Polarity":{
125
  "p":1.0,
126
+ "r":0.9607843137,
127
+ "f":0.98
128
  },
129
  "Inflection":{
130
  "p":0.0,
 
137
  "f":0.0
138
  }
139
  },
140
+ "sents_p":0.9688715953,
141
+ "sents_r":0.9822485207,
142
+ "sents_f":0.9755142018,
143
+ "dep_uas":0.9355666622,
144
+ "dep_las":0.9241776538,
145
  "dep_las_per_type":{
146
  "cc":{
147
+ "p":0.875,
148
+ "r":0.875,
149
+ "f":0.875
150
  },
151
  "compound":{
152
+ "p":0.9552414605,
153
+ "r":0.9143179256,
154
+ "f":0.9343317972
155
  },
156
  "obl":{
157
+ "p":0.8567901235,
158
+ "r":0.8664169788,
159
+ "f":0.8615766605
160
  },
161
  "case":{
162
+ "p":0.990015361,
163
+ "r":0.9794832827,
164
+ "f":0.9847211612
165
  },
166
  "dislocated":{
167
+ "p":0.8181818182,
168
+ "r":0.6923076923,
169
+ "f":0.75
170
  },
171
  "nsubj":{
172
+ "p":0.875,
173
+ "r":0.8598848369,
174
+ "f":0.8673765731
175
  },
176
  "nmod":{
177
+ "p":0.921641791,
178
+ "r":0.8666666667,
179
+ "f":0.8933092224
180
  },
181
  "root":{
182
+ "p":0.9645669291,
183
+ "r":0.966469428,
184
+ "f":0.9655172414
185
  },
186
  "aux":{
187
+ "p":0.9757462687,
188
+ "r":0.9712163417,
189
+ "f":0.9734760354
190
  },
191
  "advcl":{
192
+ "p":0.7333333333,
193
+ "r":0.7415730337,
194
+ "f":0.7374301676
195
  },
196
  "mark":{
197
+ "p":0.964,
198
+ "r":0.964,
199
+ "f":0.964
200
  },
201
  "fixed":{
202
+ "p":0.9663716814,
203
+ "r":0.9927272727,
204
+ "f":0.9793721973
205
  },
206
  "acl":{
207
+ "p":0.8780487805,
208
  "r":0.8703296703,
209
+ "f":0.8741721854
210
  },
211
  "obj":{
212
+ "p":0.9726443769,
213
+ "r":0.9667673716,
214
+ "f":0.9696969697
215
  },
216
  "nummod":{
217
+ "p":0.9806451613,
218
+ "r":0.899408284,
219
+ "f":0.9382716049
220
  },
221
  "advmod":{
222
+ "p":0.7388059701,
223
+ "r":0.7071428571,
224
+ "f":0.7226277372
225
  },
226
  "amod":{
227
+ "p":0.9333333333,
228
+ "r":0.7567567568,
229
+ "f":0.8358208955
230
  },
231
  "cop":{
232
+ "p":0.9821428571,
233
+ "r":0.9593023256,
234
+ "f":0.9705882353
235
  },
236
  "ccomp":{
237
+ "p":0.9444444444,
238
+ "r":0.7727272727,
239
+ "f":0.85
240
  },
241
  "det":{
242
  "p":1.0,
 
244
  "f":0.9904761905
245
  },
246
  "csubj":{
247
+ "p":0.7692307692,
248
  "r":0.8333333333,
249
+ "f":0.8
250
  },
251
  "dep":{
252
+ "p":0.2857142857,
253
+ "r":0.2857142857,
254
+ "f":0.2857142857
255
  }
256
  },
257
  "tag_acc":0.9712488769,
258
  "lemma_acc":0.9670526831,
259
+ "ents_p":0.8298969072,
260
+ "ents_r":0.8100628931,
261
+ "ents_f":0.8198599618,
262
  "ents_per_type":{
263
  "DATE":{
264
+ "p":0.9906542056,
265
+ "r":0.9724770642,
266
+ "f":0.9814814815
267
  },
268
  "ORG":{
269
+ "p":0.7445255474,
270
+ "r":0.7445255474,
271
+ "f":0.7445255474
 
 
 
 
 
272
  },
273
  "PERSON":{
274
+ "p":0.9516129032,
275
+ "r":0.8489208633,
276
+ "f":0.897338403
277
  },
278
  "GPE":{
279
+ "p":0.79,
280
  "r":0.8404255319,
281
+ "f":0.8144329897
282
+ },
283
+ "PRODUCT":{
284
+ "p":0.5945945946,
285
+ "r":0.5238095238,
286
+ "f":0.5569620253
287
  },
288
  "TIME":{
289
+ "p":0.8,
290
  "r":1.0,
291
+ "f":0.8888888889
292
  },
293
  "QUANTITY":{
294
+ "p":0.8904109589,
295
+ "r":0.9848484848,
296
+ "f":0.9352517986
297
  },
298
  "NORP":{
299
+ "p":0.6785714286,
300
+ "r":0.59375,
301
+ "f":0.6333333333
302
+ },
303
+ "TITLE_AFFIX":{
304
+ "p":0.7857142857,
305
+ "r":0.7333333333,
306
+ "f":0.7586206897
307
  },
308
  "ORDINAL":{
309
+ "p":0.652173913,
310
+ "r":0.6818181818,
311
+ "f":0.6666666667
312
  },
313
  "WORK_OF_ART":{
314
+ "p":0.8461538462,
315
+ "r":0.6470588235,
316
+ "f":0.7333333333
317
  },
318
+ "CARDINAL":{
319
  "p":1.0,
320
+ "r":0.5,
321
+ "f":0.6666666667
322
  },
323
+ "LOC":{
324
+ "p":0.6,
325
+ "r":0.9,
326
+ "f":0.72
327
  },
328
+ "PERCENT":{
329
  "p":1.0,
330
+ "r":0.7142857143,
331
+ "f":0.8333333333
332
  },
333
+ "EVENT":{
334
+ "p":0.9090909091,
335
+ "r":0.7692307692,
336
+ "f":0.8333333333
337
  },
338
  "FAC":{
339
+ "p":0.7692307692,
340
+ "r":0.8108108108,
341
+ "f":0.7894736842
 
 
 
 
 
342
  },
343
  "MOVEMENT":{
344
+ "p":0.3333333333,
345
+ "r":0.2,
346
+ "f":0.25
347
  },
348
  "LAW":{
349
  "p":0.6666666667,
 
359
  "p":1.0,
360
  "r":1.0,
361
  "f":1.0
 
 
 
 
 
362
  }
363
  },
364
+ "speed":3501.8234367587
365
  },
366
  "sources":[
367
  {
 
384
  }
385
  ],
386
  "requirements":[
387
+ "spacy-transformers>=1.2.0.dev0,<1.3.0",
388
  "sudachipy>=0.5.2,!=0.6.1",
389
  "sudachidict-core>=20211220"
390
  ]
morphologizer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c2d8ebad0d05f95532763df7e037c200a53b3d967a4780ce3b200376e0b9b7e
3
  size 59084
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b29b5a466d2b2e04d0ba5ecc814b1c2dd3b68779a268d1c9f2e53acc3ae3e341
3
  size 59084
ner/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d88fa5c2720aa1ab707719d91fdd75ea8303709f648f5cd8df889235cd294ed8
3
  size 338861
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8861afdc60dc55a1be8bc7af4d9f886b7df4b01fb10a7edf4db75c1bd4eb4b6d
3
  size 338861
parser/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c420287c381609578c2f76348243a44c556d2471fef08cbaf03c42d4b7203045
3
  size 318612
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6db32102f6c2c87ecac20d4510cd45b863de1e00a1080b029ceff5926f3fde3
3
  size 318612
transformer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb035744014008c4e09ccf237f08798a9265a4aed19e93828f47f30270674813
3
- size 363145638
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43a45ee97862b26d1b81b9896d5d5ed3b599d3b6bf80db3c2b03d6bb6e62437a
3
+ size 363145845
vocab/strings.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8c4c085e460d965216188fcf475275929e8562fdc40ea14f82e344c84faed30
3
- size 1600684
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5c0c3d498c927b78335dc65ecb0eb22e2dd05617c40665dc66c0b600e8dc6f4
3
+ size 1600681