procesaur commited on
Commit
d6c250e
1 Parent(s): 219a345

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +3 -7
README.md CHANGED
@@ -27,7 +27,8 @@ BERT model specijalno obučen za srpski jezik.</h4>
27
  <img src="https://www.ai.gov.rs/img/logo_60x120-2.png" style="position:relative; left:30px; z-index:10; height:85px">
28
  </div>
29
  <table width=100% style="border:0px">
30
- <tr style="background-color:#C6363C;width:100%;border:0px;height:30px"><td></td></tr>
 
31
  <tr style="background-color:#0C4076;width:100%;border:0px;height:30px"><td></td></tr>
32
  <tr style="background-color:#ffffff;width:100%;border:0px;height:30px"><td></td></tr>
33
  </table>
@@ -57,28 +58,23 @@ kao i korpus [PDRS 1.0](https://www.clarin.si/repository/xmlui/handle/11356/1752
57
  {'score': 0.0532902330160141, 'token': 998, 'token_str': ' rekao', 'sequence': 'Kada bi čovek znao gde će pasti on bi rekao.'}]
58
  ```
59
 
60
- ```
61
  >>> from transformers import AutoTokenizer, AutoModelForMaskedLM
62
  >>> from torch import LongTensor, no_grad
63
  >>> from scipy import spatial
64
-
65
  >>> tokenizer = AutoTokenizer.from_pretrained('bert modeli/bertovic-base')
66
  >>> model = AutoModelForMaskedLM.from_pretrained('bert modeli/bertovic-base', output_hidden_states=True)
67
-
68
  >>> x = " pas"
69
  >>> y = " mačka"
70
  >>> z = " svemir"
71
-
72
  >>> tensor_x = LongTensor(tokenizer.encode(x, add_special_tokens=False)).unsqueeze(0)
73
  >>> tensor_y = LongTensor(tokenizer.encode(y, add_special_tokens=False)).unsqueeze(0)
74
  >>> tensor_z = LongTensor(tokenizer.encode(z, add_special_tokens=False)).unsqueeze(0)
75
-
76
  >>> model.eval()
77
  >>> with no_grad():
78
  >>> vektor_x = model(input_ids=tensor_x).hidden_states[-1].squeeze()
79
  >>> vektor_y = model(input_ids=tensor_y).hidden_states[-1].squeeze()
80
  >>> vektor_z = model(input_ids=tensor_z).hidden_states[-1].squeeze()
81
-
82
  >>> print(spatial.distance.cosine(vektor_x, vektor_y))
83
  >>> print(spatial.distance.cosine(vektor_x, vektor_z))
84
  ```
 
27
  <img src="https://www.ai.gov.rs/img/logo_60x120-2.png" style="position:relative; left:30px; z-index:10; height:85px">
28
  </div>
29
  <table width=100% style="border:0px">
30
+ <tr style=
31
+ "background-color:#C6363C;width:100%;border:0px;height:30px"><td></td></tr>
32
  <tr style="background-color:#0C4076;width:100%;border:0px;height:30px"><td></td></tr>
33
  <tr style="background-color:#ffffff;width:100%;border:0px;height:30px"><td></td></tr>
34
  </table>
 
58
  {'score': 0.0532902330160141, 'token': 998, 'token_str': ' rekao', 'sequence': 'Kada bi čovek znao gde će pasti on bi rekao.'}]
59
  ```
60
 
61
+ ```python
62
  >>> from transformers import AutoTokenizer, AutoModelForMaskedLM
63
  >>> from torch import LongTensor, no_grad
64
  >>> from scipy import spatial
 
65
  >>> tokenizer = AutoTokenizer.from_pretrained('bert modeli/bertovic-base')
66
  >>> model = AutoModelForMaskedLM.from_pretrained('bert modeli/bertovic-base', output_hidden_states=True)
 
67
  >>> x = " pas"
68
  >>> y = " mačka"
69
  >>> z = " svemir"
 
70
  >>> tensor_x = LongTensor(tokenizer.encode(x, add_special_tokens=False)).unsqueeze(0)
71
  >>> tensor_y = LongTensor(tokenizer.encode(y, add_special_tokens=False)).unsqueeze(0)
72
  >>> tensor_z = LongTensor(tokenizer.encode(z, add_special_tokens=False)).unsqueeze(0)
 
73
  >>> model.eval()
74
  >>> with no_grad():
75
  >>> vektor_x = model(input_ids=tensor_x).hidden_states[-1].squeeze()
76
  >>> vektor_y = model(input_ids=tensor_y).hidden_states[-1].squeeze()
77
  >>> vektor_z = model(input_ids=tensor_z).hidden_states[-1].squeeze()
 
78
  >>> print(spatial.distance.cosine(vektor_x, vektor_y))
79
  >>> print(spatial.distance.cosine(vektor_x, vektor_z))
80
  ```