Skip to content

Commit 6fc4ec8

Browse files
authored
Remove Metaspace add_prefix_space logic (#14)
Always true, since other logic is already handled by prepend_scheme
1 parent 717d6e3 commit 6fc4ec8

File tree

3 files changed

+2
-11
lines changed

3 files changed

+2
-11
lines changed

src/core/decoder/Metaspace.ts

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ import { TokenizerConfigDecoderMetaspace } from "@static/tokenizer";
55
* MetaspaceDecoder class extends the Decoder class and decodes Metaspace tokenization.
66
*/
77
class Metaspace extends Decoder {
8-
add_prefix_space?: boolean;
98
replacement: string;
109

1110
/**
@@ -15,15 +14,14 @@ class Metaspace extends Decoder {
1514
constructor(config: TokenizerConfigDecoderMetaspace) {
1615
super(config);
1716

18-
this.add_prefix_space = config.add_prefix_space;
1917
this.replacement = config.replacement ?? "▁";
2018
}
2119

2220
decode_chain(tokens: string[]): string[] {
2321
const result = [];
2422
for (let i = 0; i < tokens.length; ++i) {
2523
let normalized = tokens[i].replaceAll(this.replacement, " ");
26-
if (this.add_prefix_space && i == 0 && normalized.startsWith(" ")) {
24+
if (i == 0 && normalized.startsWith(" ")) {
2725
normalized = normalized.substring(1);
2826
}
2927
result.push(normalized);

src/core/preTokenizer/Metaspace.ts

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@ import type {
1111
* and returns a list of tokens.
1212
*/
1313
class Metaspace extends PreTokenizer {
14-
/** Whether to add a prefix space to the first token. */
15-
add_prefix_space: boolean;
1614
/** The character to replace spaces with. */
1715
replacement: string;
1816
/** An optional string representation of the replacement character. */
@@ -26,7 +24,6 @@ class Metaspace extends PreTokenizer {
2624
constructor(config: TokenizerConfigPreTokenizerMetaspace) {
2725
super();
2826

29-
this.add_prefix_space = config.add_prefix_space ?? false;
3027
this.replacement = config.replacement ?? "▁";
3128
this.str_rep = config.str_rep || this.replacement;
3229
this.prepend_scheme = config.prepend_scheme ?? "always";
@@ -45,9 +42,7 @@ class Metaspace extends PreTokenizer {
4542

4643
if (
4744
// We add a prefix space if:
48-
// (1) The add_prefix_space option is enabled and the normalized
49-
// token does not already start with the replacement character.
50-
this.add_prefix_space &&
45+
// (1) The normalized token does not already start with the replacement character.
5146
!normalized.startsWith(this.replacement) &&
5247
// and (2) either:
5348
// (a) prepend_scheme is 'always'

src/static/tokenizer.d.ts

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,6 @@ export interface TokenizerConfigPreTokenizerMetaspace {
205205
type: "Metaspace";
206206
replacement?: string;
207207
str_rep?: string;
208-
add_prefix_space?: boolean;
209208
prepend_scheme?: PrependScheme;
210209
}
211210

@@ -355,7 +354,6 @@ export interface TokenizerConfigDecoderWordPiece {
355354
export interface TokenizerConfigDecoderMetaspace {
356355
type: "Metaspace";
357356
replacement?: string;
358-
add_prefix_space?: boolean;
359357
prepend_scheme?: "always" | "never" | "first";
360358
}
361359

0 commit comments

Comments
 (0)