Update README.md
Browse files
README.md
CHANGED
@@ -27,7 +27,8 @@ BERT model specijalno obučen za srpski jezik.</h4>
|
|
27 |
<img src="https://www.ai.gov.rs/img/logo_60x120-2.png" style="position:relative; left:30px; z-index:10; height:85px">
|
28 |
</div>
|
29 |
<table width=100% style="border:0px">
|
30 |
-
<tr style=
|
|
|
31 |
<tr style="background-color:#0C4076;width:100%;border:0px;height:30px"><td></td></tr>
|
32 |
<tr style="background-color:#ffffff;width:100%;border:0px;height:30px"><td></td></tr>
|
33 |
</table>
|
@@ -57,28 +58,23 @@ kao i korpus [PDRS 1.0](https://www.clarin.si/repository/xmlui/handle/11356/1752
|
|
57 |
{'score': 0.0532902330160141, 'token': 998, 'token_str': ' rekao', 'sequence': 'Kada bi čovek znao gde će pasti on bi rekao.'}]
|
58 |
```
|
59 |
|
60 |
-
```
|
61 |
>>> from transformers import AutoTokenizer, AutoModelForMaskedLM
|
62 |
>>> from torch import LongTensor, no_grad
|
63 |
>>> from scipy import spatial
|
64 |
-
|
65 |
>>> tokenizer = AutoTokenizer.from_pretrained('bert modeli/bertovic-base')
|
66 |
>>> model = AutoModelForMaskedLM.from_pretrained('bert modeli/bertovic-base', output_hidden_states=True)
|
67 |
-
|
68 |
>>> x = " pas"
|
69 |
>>> y = " mačka"
|
70 |
>>> z = " svemir"
|
71 |
-
|
72 |
>>> tensor_x = LongTensor(tokenizer.encode(x, add_special_tokens=False)).unsqueeze(0)
|
73 |
>>> tensor_y = LongTensor(tokenizer.encode(y, add_special_tokens=False)).unsqueeze(0)
|
74 |
>>> tensor_z = LongTensor(tokenizer.encode(z, add_special_tokens=False)).unsqueeze(0)
|
75 |
-
|
76 |
>>> model.eval()
|
77 |
>>> with no_grad():
|
78 |
>>> vektor_x = model(input_ids=tensor_x).hidden_states[-1].squeeze()
|
79 |
>>> vektor_y = model(input_ids=tensor_y).hidden_states[-1].squeeze()
|
80 |
>>> vektor_z = model(input_ids=tensor_z).hidden_states[-1].squeeze()
|
81 |
-
|
82 |
>>> print(spatial.distance.cosine(vektor_x, vektor_y))
|
83 |
>>> print(spatial.distance.cosine(vektor_x, vektor_z))
|
84 |
```
|
|
|
27 |
<img src="https://www.ai.gov.rs/img/logo_60x120-2.png" style="position:relative; left:30px; z-index:10; height:85px">
|
28 |
</div>
|
29 |
<table width=100% style="border:0px">
|
30 |
+
<tr style=
|
31 |
+
"background-color:#C6363C;width:100%;border:0px;height:30px"><td></td></tr>
|
32 |
<tr style="background-color:#0C4076;width:100%;border:0px;height:30px"><td></td></tr>
|
33 |
<tr style="background-color:#ffffff;width:100%;border:0px;height:30px"><td></td></tr>
|
34 |
</table>
|
|
|
58 |
{'score': 0.0532902330160141, 'token': 998, 'token_str': ' rekao', 'sequence': 'Kada bi čovek znao gde će pasti on bi rekao.'}]
|
59 |
```
|
60 |
|
61 |
+
```python
|
62 |
>>> from transformers import AutoTokenizer, AutoModelForMaskedLM
|
63 |
>>> from torch import LongTensor, no_grad
|
64 |
>>> from scipy import spatial
|
|
|
65 |
>>> tokenizer = AutoTokenizer.from_pretrained('bert modeli/bertovic-base')
|
66 |
>>> model = AutoModelForMaskedLM.from_pretrained('bert modeli/bertovic-base', output_hidden_states=True)
|
|
|
67 |
>>> x = " pas"
|
68 |
>>> y = " mačka"
|
69 |
>>> z = " svemir"
|
|
|
70 |
>>> tensor_x = LongTensor(tokenizer.encode(x, add_special_tokens=False)).unsqueeze(0)
|
71 |
>>> tensor_y = LongTensor(tokenizer.encode(y, add_special_tokens=False)).unsqueeze(0)
|
72 |
>>> tensor_z = LongTensor(tokenizer.encode(z, add_special_tokens=False)).unsqueeze(0)
|
|
|
73 |
>>> model.eval()
|
74 |
>>> with no_grad():
|
75 |
>>> vektor_x = model(input_ids=tensor_x).hidden_states[-1].squeeze()
|
76 |
>>> vektor_y = model(input_ids=tensor_y).hidden_states[-1].squeeze()
|
77 |
>>> vektor_z = model(input_ids=tensor_z).hidden_states[-1].squeeze()
|
|
|
78 |
>>> print(spatial.distance.cosine(vektor_x, vektor_y))
|
79 |
>>> print(spatial.distance.cosine(vektor_x, vektor_z))
|
80 |
```
|