It only translate to English

#10
by Krille47 - opened

No matter what I set up as target language it becomes english. Hmm?

Can you please provide your sample?

Why can only be translated into English?
LLM_path = 'path/LLM/'
version_small100='small100'
from transformers import M2M100ForConditionalGeneration
from LLM.small100.tokenization_small100 import SMALL100Tokenizer
model_small100 = M2M100ForConditionalGeneration.from_pretrained(LLM_path + version_small100)
model_small100_tokenizer = AutoTokenizer.from_pretrained(LLM_path + version_small100)
def t2t_samll100(text, source_language = None, target_language = None, version='small100'):
model_small100_tokenizer.tgt_lang = target_language
print(type(target_language))
print([].append(target_language))
encoded_text = model_small100_tokenizer(text, return_tensors="pt")
generated_tokens = model_small100.generate(**encoded_text)
out = model_small100_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)

hi_text = "जीवन एक चॉकलेट बॉक्स की तरह है।"
model_small100_tokenizer.tgt
model_small100_tokenizer.tgt_lang = "fr"
encoded_hi = model_small100_tokenizer(hi_text, return_tensors="pt")
generated_tokens = model_small100.generate(**encoded_hi)
output = model_small100_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
print(output)
return out

print(t2t_samll100("'Even listening to technology welcome you, even listening to technology is a new company located in Beijing’s Qingdao district.come on, let’s go!'", 'en', 'de'))

Don't you need to choose the source language

I have updated the tokenizer, please try again.

Sign up or log in to comment