File tree Expand file tree Collapse file tree 3 files changed +2
-11
lines changed Expand file tree Collapse file tree 3 files changed +2
-11
lines changed Original file line number Diff line number Diff line change @@ -5,7 +5,6 @@ import { TokenizerConfigDecoderMetaspace } from "@static/tokenizer";
55 * MetaspaceDecoder class extends the Decoder class and decodes Metaspace tokenization.
66 */
77class Metaspace extends Decoder {
8- add_prefix_space ?: boolean ;
98 replacement : string ;
109
1110 /**
@@ -15,15 +14,14 @@ class Metaspace extends Decoder {
1514 constructor ( config : TokenizerConfigDecoderMetaspace ) {
1615 super ( config ) ;
1716
18- this . add_prefix_space = config . add_prefix_space ;
1917 this . replacement = config . replacement ?? "▁" ;
2018 }
2119
2220 decode_chain ( tokens : string [ ] ) : string [ ] {
2321 const result = [ ] ;
2422 for ( let i = 0 ; i < tokens . length ; ++ i ) {
2523 let normalized = tokens [ i ] . replaceAll ( this . replacement , " " ) ;
26- if ( this . add_prefix_space && i == 0 && normalized . startsWith ( " " ) ) {
24+ if ( i == 0 && normalized . startsWith ( " " ) ) {
2725 normalized = normalized . substring ( 1 ) ;
2826 }
2927 result . push ( normalized ) ;
Original file line number Diff line number Diff line change @@ -11,8 +11,6 @@ import type {
1111 * and returns a list of tokens.
1212 */
1313class Metaspace extends PreTokenizer {
14- /** Whether to add a prefix space to the first token. */
15- add_prefix_space : boolean ;
1614 /** The character to replace spaces with. */
1715 replacement : string ;
1816 /** An optional string representation of the replacement character. */
@@ -26,7 +24,6 @@ class Metaspace extends PreTokenizer {
2624 constructor ( config : TokenizerConfigPreTokenizerMetaspace ) {
2725 super ( ) ;
2826
29- this . add_prefix_space = config . add_prefix_space ?? false ;
3027 this . replacement = config . replacement ?? "▁" ;
3128 this . str_rep = config . str_rep || this . replacement ;
3229 this . prepend_scheme = config . prepend_scheme ?? "always" ;
@@ -45,9 +42,7 @@ class Metaspace extends PreTokenizer {
4542
4643 if (
4744 // We add a prefix space if:
48- // (1) The add_prefix_space option is enabled and the normalized
49- // token does not already start with the replacement character.
50- this . add_prefix_space &&
45+ // (1) The normalized token does not already start with the replacement character.
5146 ! normalized . startsWith ( this . replacement ) &&
5247 // and (2) either:
5348 // (a) prepend_scheme is 'always'
Original file line number Diff line number Diff line change @@ -205,7 +205,6 @@ export interface TokenizerConfigPreTokenizerMetaspace {
205205 type : "Metaspace" ;
206206 replacement ?: string ;
207207 str_rep ?: string ;
208- add_prefix_space ?: boolean ;
209208 prepend_scheme ?: PrependScheme ;
210209}
211210
@@ -355,7 +354,6 @@ export interface TokenizerConfigDecoderWordPiece {
355354export interface TokenizerConfigDecoderMetaspace {
356355 type : "Metaspace" ;
357356 replacement ?: string ;
358- add_prefix_space ?: boolean ;
359357 prepend_scheme ?: "always" | "never" | "first" ;
360358}
361359
You can’t perform that action at this time.
0 commit comments