-
Notifications
You must be signed in to change notification settings - Fork 8
Χρήση Tokenizer
Nina Gial edited this page Jan 8, 2024
·
1 revision
from transformers import PreTrainedTokenizerFast
tokenizer = PreTrainedTokenizerFast(tokenizer_file="greek_tokenizer.json")
model = AutoModelForMaskedLM.from_pretrained(BASE_MODEL)
tokenizer.add_special_tokens({'pad_token': '<pad>', 'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'mask_token': '<mask>'})
mask_token_id = tokenizer.mask_token_id # as is, the evaluation examples have to be gsubbed.