Skip to content

Commit

Permalink
add validation for supported charsets (#14)
Browse files Browse the repository at this point in the history
Co-authored-by: Melissa Wang <[email protected]>
  • Loading branch information
melw66 and Melissa Wang authored Nov 4, 2024
1 parent a2898b7 commit a8befc0
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

import java.io.Serializable;
import java.nio.charset.Charset;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.TimeUnit;
Expand All @@ -51,6 +52,7 @@
public class EmbeddingConfiguration implements Serializable {
public static final int DEFAULT_EMBEDDING_ASYNC_TIMEOUT = 15000;
public static final TimeUnit DEFAULT_EMBEDDING_ASYNC_TIMEOUT_UNIT = TimeUnit.MILLISECONDS;
public static final List<String> SUPPORTED_EMBEDDING_CHARSETS = List.of("UTF-8", "US-ASCII", "ISO-8859-1");
public static final int DEFAULT_EMBEDDING_ASYNC_MAX_IO = 1000;
public static final String DEFAULT_EMBEDDING_CHARSET = "UTF-8";
public static final ChunkingType DEFAULT_EMBEDDING_CHUNKING_TYPE = ChunkingType.SPLIT_BY_WORD;
Expand Down Expand Up @@ -119,8 +121,9 @@ public void validate() {
if (isEmpty(this.charset)) {
throw new MissingOrIncorrectConfigurationException("Input stream Charset is required.");
}
if (!Charset.isSupported(this.charset)) {
throw new MissingOrIncorrectConfigurationException("Input stream Charset is not supported.");
if (!Charset.isSupported(this.charset) || !SUPPORTED_EMBEDDING_CHARSETS.contains(this.charset)) {
throw new MissingOrIncorrectConfigurationException("Input stream Charset is not supported. "
+ "Supported Charsets are: " + String.join(", ", SUPPORTED_EMBEDDING_CHARSETS));
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,8 @@ public static Collection<Object[]> data() {
, PROPERTY_EMBEDDING_CHARSET, "xyz")
).getProperties())
.build(),
"Input stream Charset is not supported."}};
"Input stream Charset is not supported. Supported Charsets are: "
+ String.join(", ", EmbeddingConfiguration.SUPPORTED_EMBEDDING_CHARSETS)}};
return Arrays.asList(data);
}

Expand Down

0 comments on commit a8befc0

Please sign in to comment.