Skip to content

Commit

Permalink
fix AttributeError: 'NagisaBertTokenizer' object has no attribute 'vo…
Browse files Browse the repository at this point in the history
…cab' in tokenization_nagisa_bert.py
  • Loading branch information
taishi-i committed Dec 23, 2023
1 parent 20bb708 commit 8b6d096
Showing 1 changed file with 18 additions and 19 deletions.
37 changes: 18 additions & 19 deletions nagisa_bert/tokenization_nagisa_bert.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,14 @@
# https://github.com/WorksApplications/SudachiTra/blob/dbcaf5c851fe817bead20acf3958e50c93b0118c/sudachitra/tokenization_bert_sudachipy.py
"""Tokenization classes for nagisa BERT."""

import os
import copy

import os
from collections import OrderedDict
from typing import Dict, List, Optional, Tuple

import nagisa

from transformers.tokenization_utils import PreTrainedTokenizer
from transformers.models.bert.tokenization_bert import WordpieceTokenizer

from transformers.tokenization_utils import PreTrainedTokenizer

VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}

Expand Down Expand Up @@ -117,20 +114,7 @@ def __init__(
nagisa_kwargs=None,
**kwargs,
):
super().__init__(
do_lower_case=do_lower_case,
do_word_tokenize=do_word_tokenize,
do_subword_tokenize=do_subword_tokenize,
word_tokenizer_type=word_tokenizer_type,
subword_tokenizer_type=subword_tokenizer_type,
unk_token=unk_token,
sep_token=sep_token,
pad_token=pad_token,
cls_token=cls_token,
mask_token=mask_token,
nagisa_kwargs=nagisa_kwargs,
**kwargs,
)
self._unk_token = unk_token

if not os.path.isfile(vocab_file):
raise ValueError(f"Can't find a vocabulary file at path '{vocab_file}'.")
Expand Down Expand Up @@ -170,6 +154,21 @@ def __init__(
f"Invalid subword_tokenizer_type '{subword_tokenizer_type}' is specified."
)

super().__init__(
do_lower_case=do_lower_case,
do_word_tokenize=do_word_tokenize,
do_subword_tokenize=do_subword_tokenize,
word_tokenizer_type=word_tokenizer_type,
subword_tokenizer_type=subword_tokenizer_type,
unk_token=unk_token,
sep_token=sep_token,
pad_token=pad_token,
cls_token=cls_token,
mask_token=mask_token,
nagisa_kwargs=nagisa_kwargs,
**kwargs,
)

@property
def do_lower_case(self):
return self.lower_case
Expand Down

0 comments on commit 8b6d096

Please sign in to comment.