Skip to content

Commit

Permalink
optional tokenizer and fix some doc (#3046)
Browse files Browse the repository at this point in the history
  • Loading branch information
zxcd authored Mar 15, 2023
1 parent 1bf1c3a commit 4d1787d
Show file tree
Hide file tree
Showing 6 changed files with 6 additions and 4 deletions.
Empty file modified examples/librispeech/asr3/local/data.sh
100644 → 100755
Empty file.
Empty file modified examples/librispeech/asr3/local/test.sh
100644 → 100755
Empty file.
Empty file modified examples/librispeech/asr3/local/test_wav.sh
100644 → 100755
Empty file.
Empty file modified examples/librispeech/asr3/local/train.sh
100644 → 100755
Empty file.
2 changes: 1 addition & 1 deletion examples/librispeech/asr3/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ set -e

gpus=0
stage=0
stop_stage=0
stop_stage=4
conf_path=conf/wav2vec2ASR.yaml
ips= #xx.xx.xx.xx,xx.xx.xx.xx
decode_conf_path=conf/tuning/decode.yaml
Expand Down
8 changes: 5 additions & 3 deletions paddlespeech/s2t/exps/wav2vec2/bin/test_wav.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,13 @@
import paddle
import soundfile
from paddlenlp.transformers import AutoTokenizer
from yacs.config import CfgNode

from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer
from paddlespeech.s2t.models.wav2vec2.wav2vec2_ASR import Wav2vec2ASR
from paddlespeech.s2t.training.cli import default_argument_parser
from paddlespeech.s2t.utils.log import Log
from paddlespeech.s2t.utils.utility import UpdateConfig
from yacs.config import CfgNode
logger = Log(__name__).getlog()


Expand All @@ -33,8 +34,9 @@ def __init__(self, config, args):
self.args = args
self.config = config
self.audio_file = args.audio_file
self.tokenizer = config.get("tokenizer", None)

if self.config.tokenizer:
if self.tokenizer:
self.text_feature = AutoTokenizer.from_pretrained(
self.config.tokenizer)
else:
Expand Down Expand Up @@ -71,7 +73,7 @@ def run(self):
text_feature=self.text_feature,
decoding_method=decode_config.decoding_method,
beam_size=decode_config.beam_size,
tokenizer=self.config.tokenizer, )
tokenizer=self.tokenizer, )
rsl = result_transcripts[0]
utt = Path(self.audio_file).name
logger.info(f"hyp: {utt} {rsl}")
Expand Down

0 comments on commit 4d1787d

Please sign in to comment.