Skip to content

Commit 35a703d

Browse files
committed
Checkstyle fixes for lang ident model
1 parent 4b90b1b commit 35a703d

File tree

9 files changed

+26
-50
lines changed

9 files changed

+26
-50
lines changed

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/InferenceProcessor.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
import org.elasticsearch.ingest.IngestDocument;
1212
import org.elasticsearch.ingest.Processor;
1313

14-
import java.util.HashMap;
1514
import java.util.Map;
1615
import java.util.concurrent.ConcurrentHashMap;
1716
import java.util.function.BiConsumer;

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/langident/EmbeddingNetwork.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@
66

77
package org.elasticsearch.xpack.ml.inference.langident;
88

9-
import java.util.*;
9+
10+
import java.util.ArrayList;
11+
import java.util.Arrays;
1012

1113
/**
1214
* Classifier using a hand-coded feed-forward neural network.

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/langident/EmbeddingNetworkParams.java

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,6 @@
66

77
package org.elasticsearch.xpack.ml.inference.langident;
88

9-
/**
10-
*
11-
*/
129
public abstract class EmbeddingNetworkParams {
1310
public enum QuantizationType {
1411
NONE,

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/langident/FeatureUtils.java

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,7 @@
66

77
package org.elasticsearch.xpack.ml.inference.langident;
88

9-
import org.apache.logging.log4j.message.ParameterizedMessage;
10-
import org.apache.logging.log4j.util.Supplier;
11-
129
import java.io.UnsupportedEncodingException;
13-
import java.nio.charset.StandardCharsets;
1410
import java.util.Locale;
1511

1612
/**
@@ -195,7 +191,8 @@ public static String toUTF8ByteString(String text) {
195191
boolean isIdeographic = Character.isIdeographic(codepoint);
196192
Character.UnicodeBlock unicodeBlock = Character.UnicodeBlock.of(codepoint);
197193
198-
sb.append(String.format("%d|%b,%b,%b,%b,%s|", codepoint, isLetter, isSupplementaryCodePoint, isSpaceChar, isIdeographic, unicodeBlock.toString()));
194+
sb.append(String.format("%d|%b,%b,%b,%b,%s|", codepoint, isLetter, isSupplementaryCodePoint,
195+
isSpaceChar, isIdeographic, unicodeBlock.toString()));
199196
200197
offset += Character.charCount(codepoint);
201198
}
@@ -204,11 +201,11 @@ public static String toUTF8ByteString(String text) {
204201
byte[] bytes = text.getBytes("UTF-8");
205202
StringBuilder sb = new StringBuilder();
206203
for (byte b : bytes) {
207-
sb.append(String.format("%02X", b));
204+
sb.append(String.format(Locale.ROOT, "%02X", b));
208205
}
209206
return sb.toString();
210207
} catch (UnsupportedEncodingException e) {
211-
e.printStackTrace();
208+
// e.printStackTrace();
212209
}
213210
return "";
214211
}

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/langident/FeatureVector.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,16 +54,16 @@ private class Element {
5454
private final FeatureType featureType;
5555
private final long featureValue;
5656

57-
public Element(FeatureType featureType, long featureValue) {
57+
Element(FeatureType featureType, long featureValue) {
5858
this.featureType = featureType;
5959
this.featureValue = featureValue;
6060
}
6161

62-
public FeatureType getFeatureType() {
62+
FeatureType getFeatureType() {
6363
return featureType;
6464
}
6565

66-
public long getFeatureValue() {
66+
long getFeatureValue() {
6767
return featureValue;
6868
}
6969
}

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/langident/LangIdentModel.java

Lines changed: 2 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ final class LangIdentModel implements Model {
2929
private final String targetIsReliableField;
3030
private final boolean ignoreMissing;
3131

32-
public LangIdentModel(String field,
32+
LangIdentModel(String field,
3333
String targetLanguageField, String targetProbabilityField,
3434
String targetTopProbabilitiesField, String targetIsReliableField,
3535
boolean ignoreMissing) {
@@ -71,7 +71,7 @@ public void infer(IngestDocument ingestDocument, BiConsumer<IngestDocument, Exce
7171
}
7272
if (targetTopProbabilitiesField != null) {
7373
ingestDocument.setFieldValue(targetTopProbabilitiesField, result.getTopProbabilities().toString());
74-
}+
74+
}
7575
if (targetIsReliableField != null) {
7676
ingestDocument.setFieldValue(targetIsReliableField, result.isReliable());
7777
}
@@ -82,23 +82,4 @@ public void infer(IngestDocument ingestDocument, BiConsumer<IngestDocument, Exce
8282

8383
handler.accept(ingestDocument, null);
8484
}
85-
86-
public NNetLanguageIdentifier.Result identifyLanguage(String text) {
87-
NNetLanguageIdentifier.Result result = null;
88-
89-
if (languageIdentifier == null) {
90-
logger.error("Language identifier model not initialised - unknown language returned");
91-
return new NNetLanguageIdentifier.Result();
92-
}
93-
94-
try {
95-
result = languageIdentifier.findLanguage(text);
96-
} catch (Exception e) {
97-
logger.error((Supplier<?>) () -> new ParameterizedMessage(
98-
"Error identifjying language [{}]", text), e);
99-
result = new NNetLanguageIdentifier.Result();
100-
}
101-
102-
return result;
103-
}
10485
}

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/langident/LangIdentParams.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
package org.elasticsearch.xpack.ml.inference.langident;
88

99
public class LangIdentParams {
10-
public final static String kLanguageNames[] = {
10+
public static final String kLanguageNames[] = {
1111
"eo", "co", "eu", "ta", "de", "mt", "ps", "te", "su", "uz", "zh-Latn", "ne",
1212
"nl", "sw", "sq", "hmn", "ja", "no", "mn", "so", "ko", "kk", "sl", "ig",
1313
"mr", "th", "zu", "ml", "hr", "bs", "lo", "sd", "cy", "hy", "uk", "pt",

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/langident/NNetLanguageIdentifier.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,11 @@
77
package org.elasticsearch.xpack.ml.inference.langident;
88

99
import java.io.FileNotFoundException;
10-
import java.util.*;
10+
import java.util.AbstractMap;
11+
import java.util.ArrayList;
12+
import java.util.Collections;
13+
import java.util.List;
14+
import java.util.Map;
1115

1216
public class NNetLanguageIdentifier {
1317
public static final int kMaxNumInputBytesToConsider = 10000;
@@ -50,7 +54,7 @@ public ArrayList<FeatureVector> getFeatures(String text) throws Exception {
5054

5155
return features;
5256
}
53-
57+
/*
5458
private void dumpFeatures(String text, ArrayList<FeatureVector> featureVectors) {
5559
System.out.println("'" + text + "'" + text.length() + "{");
5660
System.out.println("features:" + featureVectors.size());
@@ -67,6 +71,7 @@ private void dumpFeatures(String text, ArrayList<FeatureVector> featureVectors)
6771
}
6872
System.out.println("}");
6973
}
74+
*/
7075

7176
private Result findLanguageOfValidUTF8(String text) throws Exception {
7277
// Create the feature vector with it actually a list
@@ -123,7 +128,7 @@ boolean resultIsReliable(String language, float probability) {
123128
}
124129

125130
public static class Result {
126-
public final static int TOP_N = 5;
131+
public static final int TOP_N = 5;
127132

128133
private final String language;
129134
private final float probability;

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/langident/NNetParams.java

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,11 @@
66

77
package org.elasticsearch.xpack.ml.inference.langident;
88

9-
import java.io.File;
9+
import org.apache.commons.codec.Charsets;
10+
1011
import java.io.FileNotFoundException;
11-
import java.io.InputStream;
12-
import java.util.InputMismatchException;
1312
import java.util.Scanner;
1413

15-
/**
16-
*
17-
*/
1814
public class NNetParams extends EmbeddingNetworkParams {
1915
private static final int EMBEDDINGS_SIZE = 6;
2016
private static final int EMBEDDING_NUM_FEATURES_SIZE = 6;
@@ -111,7 +107,7 @@ public NNetParams() throws FileNotFoundException {
111107
}
112108

113109
private static short[] readFileToShortArray(String path) throws FileNotFoundException {
114-
Scanner s = new Scanner(NNetParams.class.getResourceAsStream(path));
110+
Scanner s = new Scanner(NNetParams.class.getResourceAsStream(path), Charsets.UTF_8);
115111
// Read first entry (array size) as integer
116112
short[] array = new short[s.nextInt()];
117113
for (int i = 0; i < array.length; i++) {
@@ -121,7 +117,7 @@ private static short[] readFileToShortArray(String path) throws FileNotFoundExce
121117
}
122118

123119
private static char[] readFileToCharArray(String path) throws FileNotFoundException {
124-
Scanner s = new Scanner(NNetParams.class.getResourceAsStream(path));
120+
Scanner s = new Scanner(NNetParams.class.getResourceAsStream(path), Charsets.UTF_8);
125121
// Read first entry (array size) as integer
126122
char[] array = new char[s.nextInt()];
127123
for (int i = 0; i < array.length; i++) {
@@ -132,7 +128,7 @@ private static char[] readFileToCharArray(String path) throws FileNotFoundExcept
132128
}
133129

134130
private static float[] readFileToFloatArray(String path) throws FileNotFoundException {
135-
Scanner s = new Scanner(NNetParams.class.getResourceAsStream(path));
131+
Scanner s = new Scanner(NNetParams.class.getResourceAsStream(path), Charsets.UTF_8);
136132
// Read first entry (array size) as integer
137133
float[] array = new float[s.nextInt()];
138134
for (int i = 0; i < array.length; i++) {
@@ -143,7 +139,6 @@ private static float[] readFileToFloatArray(String path) throws FileNotFoundExce
143139
array[i] = f;
144140
} catch (Exception e) {
145141
// TODO resolve error
146-
System.out.println("Can not parse to float '" + text + "'" + e.toString());
147142
throw e;
148143
}
149144
}

0 commit comments

Comments
 (0)