Skip to content
This repository was archived by the owner on Dec 18, 2021. It is now read-only.

Commit a9536d5

Browse files
committed
add 'language_to' parameter
1 parent fd5f00f commit a9536d5

File tree

4 files changed

+192
-58
lines changed

4 files changed

+192
-58
lines changed

src/main/java/org/xbib/elasticsearch/index/mapper/langdetect/LangdetectMapper.java

Lines changed: 134 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,10 @@
1919
import org.xbib.elasticsearch.common.langdetect.LanguageDetectionException;
2020

2121
import java.io.IOException;
22-
import java.nio.charset.Charset;
22+
import java.nio.charset.StandardCharsets;
23+
import java.util.Collections;
2324
import java.util.Iterator;
25+
import java.util.LinkedHashMap;
2426
import java.util.List;
2527
import java.util.Map;
2628

@@ -48,6 +50,8 @@ public static class Builder extends FieldMapper.Builder<Builder, StringFieldMapp
4850

4951
protected int positionIncrementGap = -1;
5052

53+
protected LanguageTo languageTo = LanguageTo.builder().build();
54+
5155
protected Settings.Builder settingsBuilder = Settings.settingsBuilder();
5256

5357
public Builder(String name) {
@@ -139,6 +143,11 @@ public Builder profile(String profile) {
139143
return this;
140144
}
141145

146+
public Builder languageTo(LanguageTo languageTo) {
147+
this.languageTo = languageTo;
148+
return this;
149+
}
150+
142151
@Override
143152
public LangdetectMapper build(BuilderContext context) {
144153
if (positionIncrementGap != -1) {
@@ -159,7 +168,8 @@ public LangdetectMapper build(BuilderContext context) {
159168
setupFieldType(context);
160169
LangdetectService service = new LangdetectService(settingsBuilder.build());
161170
return new LangdetectMapper(name, fieldType, defaultFieldType, 100, -1,
162-
context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo, service);
171+
context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo,
172+
languageTo, service);
163173
}
164174
}
165175

@@ -175,21 +185,19 @@ public static class TypeParser implements Mapper.TypeParser {
175185
String fieldName = entry.getKey();
176186
Object fieldNode = entry.getValue();
177187
switch (fieldName) {
178-
case "analyzer" : {
188+
case "analyzer" :
179189
// "_keyword" - we do ignore this, it's our internal analyzer
180190
iterator.remove();
181191
break;
182-
}
183-
case "search_quote_analyzer": {
192+
case "search_quote_analyzer":
184193
NamedAnalyzer analyzer = parserContext.analysisService().analyzer(fieldNode.toString());
185194
if (analyzer == null) {
186195
throw new MapperParsingException("Analyzer [" + fieldNode.toString() + "] not found for field [" + name + "]");
187196
}
188197
builder.searchQuotedAnalyzer(analyzer);
189198
iterator.remove();
190199
break;
191-
}
192-
case "position_increment_gap": {
200+
case "position_increment_gap":
193201
int newPositionIncrementGap = XContentMapValues.nodeIntegerValue(fieldNode, -1);
194202
if (newPositionIncrementGap < 0) {
195203
throw new MapperParsingException("position_increment_gap less than 0 aren't allowed.");
@@ -206,78 +214,72 @@ public static class TypeParser implements Mapper.TypeParser {
206214
}
207215
iterator.remove();
208216
break;
209-
}
210-
case "store" : {
217+
case "store" :
211218
builder.store(parseStore(fieldName, fieldNode.toString()));
212219
iterator.remove();
213220
break;
214-
}
215-
case "number_of_trials": {
221+
case "number_of_trials":
216222
builder.ntrials(XContentMapValues.nodeIntegerValue(fieldNode));
217223
iterator.remove();
218224
break;
219-
}
220-
case "alpha": {
225+
case "alpha":
221226
builder.alpha(XContentMapValues.nodeDoubleValue(fieldNode));
222227
iterator.remove();
223228
break;
224-
}
225-
case "alpha_width": {
229+
case "alpha_width":
226230
builder.alphaWidth(XContentMapValues.nodeDoubleValue(fieldNode));
227231
iterator.remove();
228232
break;
229-
}
230-
case "iteration_limit": {
233+
case "iteration_limit":
231234
builder.iterationLimit(XContentMapValues.nodeIntegerValue(fieldNode));
232235
iterator.remove();
233236
break;
234-
}
235-
case "prob_threshold": {
237+
case "prob_threshold":
236238
builder.probThreshold(XContentMapValues.nodeDoubleValue(fieldNode));
237239
iterator.remove();
238240
break;
239-
}
240-
case "conv_threshold": {
241+
case "conv_threshold":
241242
builder.convThreshold(XContentMapValues.nodeDoubleValue(fieldNode));
242243
iterator.remove();
243244
break;
244-
}
245-
case "base_freq": {
245+
case "base_freq":
246246
builder.baseFreq(XContentMapValues.nodeIntegerValue(fieldNode));
247247
iterator.remove();
248248
break;
249-
}
250-
case "pattern": {
249+
case "pattern":
251250
builder.pattern(XContentMapValues.nodeStringValue(fieldNode, null));
252251
iterator.remove();
253252
break;
254-
}
255-
case "max": {
253+
case "max":
256254
builder.max(XContentMapValues.nodeIntegerValue(fieldNode));
257255
iterator.remove();
258256
break;
259-
}
260-
case "binary": {
257+
case "binary":
261258
boolean b = XContentMapValues.nodeBooleanValue(fieldNode);
262259
builder.binary(b);
263260
iterator.remove();
264261
break;
265-
}
266-
case "map" : {
262+
case "map" :
267263
builder.map(XContentMapValues.nodeMapValue(fieldNode, "map"));
268264
iterator.remove();
269265
break;
270-
}
271-
case "languages" : {
266+
case "languages" :
272267
builder.languages(XContentMapValues.nodeStringArrayValue(fieldNode));
273268
iterator.remove();
274269
break;
275-
}
276-
case "profile" : {
270+
case "profile" :
277271
builder.profile(XContentMapValues.nodeStringValue(fieldNode, null));
278272
iterator.remove();
279273
break;
280-
}
274+
case "language_to" :
275+
Map<String, Object> map = XContentMapValues.nodeMapValue(fieldNode, null);
276+
LanguageTo.Builder languageToBuilder = LanguageTo.builder();
277+
languageToBuilder.add(map);
278+
builder.languageTo(languageToBuilder.build());
279+
iterator.remove();
280+
break;
281+
default:
282+
break;
281283
}
282284
}
283285
return builder;
@@ -288,17 +290,21 @@ public static class TypeParser implements Mapper.TypeParser {
288290

289291
private final int positionIncrementGap;
290292

293+
private final LanguageTo languageTo;
294+
291295
public LangdetectMapper(String simpleName,
292-
MappedFieldType fieldType,
293-
MappedFieldType defaultFieldType,
294-
int positionIncrementGap,
295-
int ignoreAbove,
296-
Settings indexSettings,
297-
MultiFields multiFields,
298-
CopyTo copyTo,
299-
LangdetectService langdetectService) {
296+
MappedFieldType fieldType,
297+
MappedFieldType defaultFieldType,
298+
int positionIncrementGap,
299+
int ignoreAbove,
300+
Settings indexSettings,
301+
MultiFields multiFields,
302+
CopyTo copyTo,
303+
LanguageTo languageTo,
304+
LangdetectService langdetectService) {
300305
super(simpleName, fieldType, defaultFieldType,
301306
positionIncrementGap, ignoreAbove, indexSettings, multiFields, copyTo);
307+
this.languageTo = languageTo;
302308
this.langdetectService = langdetectService;
303309
this.positionIncrementGap = positionIncrementGap;
304310
}
@@ -337,7 +343,7 @@ protected void parseCreateField(ParseContext context, List<Field> fields) throws
337343
try {
338344
byte[] b = parser.binaryValue();
339345
if (b != null && b.length > 0) {
340-
value = new String(b, Charset.forName("UTF-8"));
346+
value = new String(b, StandardCharsets.UTF_8);
341347
}
342348
} catch (Exception e) {
343349
// ignore
@@ -348,6 +354,9 @@ protected void parseCreateField(ParseContext context, List<Field> fields) throws
348354
for (Language lang : langs) {
349355
Field field = new Field(fieldType().names().indexName(), lang.getLanguage(), fieldType());
350356
fields.add(field);
357+
if (languageTo.languageToFields().containsKey(lang.getLanguage())) {
358+
parseLanguageToFields(context, languageTo.languageToFields().get(lang.getLanguage()));
359+
}
351360
}
352361
} catch (LanguageDetectionException e) {
353362
context.createExternalValueContext("unknown");
@@ -377,5 +386,84 @@ protected void doXContentBody(XContentBuilder builder, boolean includeDefaults,
377386
for (String key : map.keySet()) {
378387
builder.field(key, map.get(key));
379388
}
389+
languageTo.toXContent(builder, params);
380390
}
381-
}
391+
392+
@SuppressWarnings("unchecked")
393+
private static void parseLanguageToFields(ParseContext originalContext, Object languageToFields) throws IOException {
394+
List<Object> fieldList = languageToFields instanceof List ?
395+
(List<Object>)languageToFields : Collections.singletonList(languageToFields);
396+
ParseContext context = originalContext.createCopyToContext();
397+
for (Object field : fieldList) {
398+
ParseContext.Document targetDoc = null;
399+
for (ParseContext.Document doc = context.doc(); doc != null; doc = doc.getParent()) {
400+
if (field.toString().startsWith(doc.getPrefix())) {
401+
targetDoc = doc;
402+
break;
403+
}
404+
}
405+
if (targetDoc == null) {
406+
throw new IllegalArgumentException("target doc is null");
407+
}
408+
final ParseContext copyToContext;
409+
if (targetDoc == context.doc()) {
410+
copyToContext = context;
411+
} else {
412+
copyToContext = context.switchDoc(targetDoc);
413+
}
414+
FieldMapper fieldMapper = copyToContext.docMapper().mappers().getMapper(field.toString());
415+
if (fieldMapper != null) {
416+
fieldMapper.parse(copyToContext);
417+
} else {
418+
throw new MapperParsingException("attempt to copy value to non-existing field [" + field + "]");
419+
}
420+
}
421+
}
422+
423+
public static class LanguageTo {
424+
425+
private final Map<String, Object> languageToFields;
426+
427+
private LanguageTo(Map<String, Object> languageToFields) {
428+
this.languageToFields = languageToFields;
429+
}
430+
431+
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
432+
if (!languageToFields.isEmpty()) {
433+
builder.startObject("language_to");
434+
for (Map.Entry<String, Object> field : languageToFields.entrySet()) {
435+
builder.field(field.getKey(), field.getValue());
436+
}
437+
builder.endObject();
438+
}
439+
return builder;
440+
}
441+
442+
public static Builder builder() {
443+
return new Builder();
444+
}
445+
446+
public static class Builder {
447+
private final Map<String, Object> languageToBuilders = new LinkedHashMap<>();
448+
449+
public LanguageTo.Builder add(String language, String field) {
450+
languageToBuilders.put(language, field);
451+
return this;
452+
}
453+
454+
public LanguageTo.Builder add(Map<String, Object> map) {
455+
languageToBuilders.putAll(map);
456+
return this;
457+
}
458+
459+
public LanguageTo build() {
460+
return new LanguageTo(Collections.unmodifiableMap(languageToBuilders));
461+
}
462+
}
463+
464+
public Map<String, Object> languageToFields() {
465+
return languageToFields;
466+
}
467+
}
468+
469+
}

src/test/java/org/xbib/elasticsearch/index/mapper/langdetect/LangdetectMappingTest.java

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
import java.io.IOException;
1414
import java.io.InputStreamReader;
15+
import java.nio.charset.StandardCharsets;
1516

1617
import static org.elasticsearch.common.io.Streams.copyToString;
1718
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
@@ -115,7 +116,30 @@ public void testShortTextProfile() throws Exception {
115116
assertEquals("en", doc.getFields("someField")[0].stringValue());
116117
}
117118

119+
@Test
120+
public void testToFields() throws Exception {
121+
String mapping = copyToStringFromClasspath("mapping-to-fields.json");
122+
DocumentMapper docMapper = MapperTestUtils.newDocumentMapperParser().parse("someType", new CompressedXContent(mapping));
123+
String sampleText = copyToStringFromClasspath("english.txt");
124+
BytesReference json = jsonBuilder().startObject().field("someField", sampleText).endObject().bytes();
125+
ParseContext.Document doc = docMapper.parse("someIndex", "someType", "1", json).rootDoc();
126+
assertEquals(1, doc.getFields("someField").length);
127+
assertEquals("en", doc.getFields("someField")[0].stringValue());
128+
// re-parse it
129+
String builtMapping = docMapper.mappingSource().string();
130+
docMapper = MapperTestUtils.newDocumentMapperParser().parse("someType", new CompressedXContent(builtMapping));
131+
json = jsonBuilder().startObject().field("someField", sampleText).endObject().bytes();
132+
doc = docMapper.parse("someIndex", "someType", "1", json).rootDoc();
133+
//for (IndexableField field : doc.getFields()) {
134+
// System.err.println(field.name() + " = " + field.stringValue());
135+
//}
136+
assertEquals(1, doc.getFields("someField").length);
137+
assertEquals("en", doc.getFields("someField")[0].stringValue());
138+
assertEquals(1, doc.getFields("english_field").length);
139+
assertEquals("This is a very small example of a text", doc.getFields("english_field")[0].stringValue());
140+
}
141+
118142
public String copyToStringFromClasspath(String path) throws IOException {
119-
return copyToString(new InputStreamReader(getClass().getResource(path).openStream(), "UTF-8"));
143+
return copyToString(new InputStreamReader(getClass().getResource(path).openStream(), StandardCharsets.UTF_8));
120144
}
121145
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{
2+
"someType" : {
3+
"properties" : {
4+
"someField":{
5+
"type" : "langdetect",
6+
"languages" : [ "de", "en", "fr", "nl", "it" ],
7+
"language_to" : {
8+
"de": "german_field",
9+
"en": "english_field"
10+
}
11+
},
12+
"german_field" : {
13+
"analyzer" : "german",
14+
"type": "string"
15+
},
16+
"english_field" : {
17+
"analyzer" : "english",
18+
"type" : "string"
19+
}
20+
}
21+
}
22+
}
Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
{
2-
"someType" : {
3-
"properties" : {
4-
"someField":{
5-
"type" : "langdetect",
6-
"languages" : [ "de", "en", "fr", "nl", "it" ],
7-
"map" : {
8-
"de" : "Deutsch"
9-
}
10-
}
11-
}
2+
"someType" : {
3+
"properties" : {
4+
"someField":{
5+
"type" : "langdetect",
6+
"languages" : [ "de", "en", "fr", "nl", "it" ],
7+
"map" : {
8+
"de" : "Deutsch"
129
}
13-
}
10+
}
11+
}
12+
}
13+
}

0 commit comments

Comments
 (0)