Skip to content

Commit

Permalink
multilingual rule #192
Browse files Browse the repository at this point in the history
  • Loading branch information
pkiraly committed Aug 23, 2024
1 parent feb88e7 commit e88cd78
Show file tree
Hide file tree
Showing 10 changed files with 176 additions and 3 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<groupId>de.gwdg.metadataqa</groupId>
<artifactId>metadata-qa-api</artifactId>
<packaging>jar</packaging>
<version>0.9.5</version>
<version>0.9.6-SNAPSHOT</version>
<name>Metadata Quality Assurance Framework API</name>
<description>
A metadata quality assurance framework. It checks some metrics of
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ public class Rule implements Serializable {
private Boolean skip = Boolean.FALSE;
private Boolean debug = Boolean.FALSE;
private Boolean allowEmptyInstances = Boolean.FALSE;
private Boolean multilingual;
private Boolean hasLanguageTag;

public String getId() {
return id;
Expand Down Expand Up @@ -533,6 +535,32 @@ public Rule withAllowEmptyInstances(Boolean allowEmptyInstances) {
return this;
}

public Boolean getMultilingual() {
return multilingual;
}

public void setIsMultilingual(Boolean multilingual) {
this.multilingual = multilingual;
}

public Rule withMultilingual(Boolean multilingual) {
this.multilingual = multilingual;
return this;
}

public Boolean getHasLanguageTag() {
return hasLanguageTag;
}

public void setHasLanguageTag(Boolean hasLanguageTag) {
this.hasLanguageTag = hasLanguageTag;
}

public Rule withHasLanguageTag(Boolean hasLanguageTag) {
this.hasLanguageTag = hasLanguageTag;
return this;
}

@JsonIgnore
public List<String> getRulenames() {
List<String> excludeFromComparision = List.of("serialVersionUID", "id", "description",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package de.gwdg.metadataqa.api.rule.singlefieldchecker;

import de.gwdg.metadataqa.api.counter.FieldCounter;
import de.gwdg.metadataqa.api.json.DataElement;
import de.gwdg.metadataqa.api.model.XmlFieldInstance;
import de.gwdg.metadataqa.api.model.selector.Selector;
import de.gwdg.metadataqa.api.rule.RuleCheckerOutput;
import de.gwdg.metadataqa.api.rule.RuleCheckingOutputStatus;
import de.gwdg.metadataqa.api.rule.RuleCheckingOutputType;
import de.gwdg.metadataqa.api.uniqueness.UniquenessExtractor;

import java.util.List;

public class LanguageTagChecker extends SingleFieldChecker {
private static final long serialVersionUID = 7236047216814906713L;
public static final String PREFIX = "languageTag";

public LanguageTagChecker(DataElement field) {
this(field, field.getLabel());
}

public LanguageTagChecker(DataElement field, String header) {
super(field, header);
}

@Override
public void update(Selector cache, FieldCounter<RuleCheckerOutput> results, RuleCheckingOutputType outputType) {
if (isDebug())
LOGGER.info(this.getClass() + " " + this.id);
var allPassed = true;
var isNA = true;
List<XmlFieldInstance> instances = cache.get(field);
if (instances != null && !instances.isEmpty()) {
for (XmlFieldInstance instance : instances) {
if (instance.hasLanguage()) {
isNA = false;
if (isDebug())
LOGGER.info("language tag: " + instance.hasLanguage());
break;
}
}
}

addOutput(results, isNA, allPassed, outputType);
if (isDebug())
LOGGER.info(this.getClass().getSimpleName() + " " + this.id + ") result: " + RuleCheckingOutputStatus.create(isNA, allPassed));

}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package de.gwdg.metadataqa.api.rule.singlefieldchecker;

import de.gwdg.metadataqa.api.counter.FieldCounter;
import de.gwdg.metadataqa.api.json.DataElement;
import de.gwdg.metadataqa.api.model.XmlFieldInstance;
import de.gwdg.metadataqa.api.model.selector.Selector;
import de.gwdg.metadataqa.api.rule.RuleCheckerOutput;
import de.gwdg.metadataqa.api.rule.RuleCheckingOutputStatus;
import de.gwdg.metadataqa.api.rule.RuleCheckingOutputType;
import de.gwdg.metadataqa.api.uniqueness.UniquenessExtractor;

import java.util.HashSet;
import java.util.List;
import java.util.Set;

public class MultilingualChecker extends SingleFieldChecker {
private static final long serialVersionUID = 3911175767180059821L;
public static final String PREFIX = "multilingual";

public MultilingualChecker(DataElement field) {
this(field, field.getLabel());
}

public MultilingualChecker(DataElement field, String header) {
super(field, header);
}

@Override
public void update(Selector cache, FieldCounter<RuleCheckerOutput> results, RuleCheckingOutputType outputType) {
if (isDebug())
LOGGER.info(this.getClass() + " " + this.id);
var allPassed = true;
var isNA = true;
Set<String> languages = new HashSet<>();
List<XmlFieldInstance> instances = cache.get(field);
if (instances != null && !instances.isEmpty()) {
for (XmlFieldInstance instance : instances) {
if (instance.hasLanguage()) {
languages.add(instance.getLanguage());
isNA = false;
}
}
}
if (languages.size() < 2)
allPassed = false;

addOutput(results, isNA, allPassed, outputType);
if (isDebug())
LOGGER.info(this.getClass().getSimpleName() + " " + this.id + ") result: " + RuleCheckingOutputStatus.create(isNA, allPassed));
}
}
10 changes: 10 additions & 0 deletions src/main/java/de/gwdg/metadataqa/api/schema/SchemaUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,14 @@
import de.gwdg.metadataqa.api.rule.pairchecker.EqualityChecker;
import de.gwdg.metadataqa.api.rule.singlefieldchecker.HasValueChecker;
import de.gwdg.metadataqa.api.rule.singlefieldchecker.ImageDimensionChecker;
import de.gwdg.metadataqa.api.rule.singlefieldchecker.LanguageTagChecker;
import de.gwdg.metadataqa.api.rule.singlefieldchecker.MaxCountChecker;
import de.gwdg.metadataqa.api.rule.singlefieldchecker.MaxLengthChecker;
import de.gwdg.metadataqa.api.rule.singlefieldchecker.MaxWordsChecker;
import de.gwdg.metadataqa.api.rule.singlefieldchecker.MinCountChecker;
import de.gwdg.metadataqa.api.rule.singlefieldchecker.MinLengthChecker;
import de.gwdg.metadataqa.api.rule.singlefieldchecker.MinWordsChecker;
import de.gwdg.metadataqa.api.rule.singlefieldchecker.MultilingualChecker;
import de.gwdg.metadataqa.api.rule.singlefieldchecker.NumericValueChecker;
import de.gwdg.metadataqa.api.rule.singlefieldchecker.PatternChecker;
import de.gwdg.metadataqa.api.rule.RuleChecker;
Expand Down Expand Up @@ -126,6 +128,14 @@ private static List<RuleChecker> processRule(Schema schema, DataElement dataElem
if (rule.getUnique() != null && rule.getUnique().equals(Boolean.TRUE))
ruleCheckers.add(new UniquenessChecker(dataElement));

// TODO
if (rule.getMultilingual() != null && rule.getMultilingual().equals(Boolean.TRUE))
ruleCheckers.add(new MultilingualChecker(dataElement));

// TODO
if (rule.getHasLanguageTag() != null && rule.getHasLanguageTag().equals(Boolean.TRUE))
ruleCheckers.add(new LanguageTagChecker(dataElement));

if (rule.getLessThan() != null)
pair(schema, ruleCheckers, dataElement, rule.getLessThan(), "LessThan");

Expand Down
2 changes: 1 addition & 1 deletion src/test/java/de/gwdg/metadataqa/api/cli/VersionTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

public class VersionTest {

private final String EXPECTED_VERSION = "0.9.5";
private final String EXPECTED_VERSION = "0.9.6-SNAPSHOT";

@Test
public void getVersion() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -350,8 +350,20 @@ public void yaml_contentType() throws FileNotFoundException {
}

@Test
public void yaml_ssLanguageTagged() throws FileNotFoundException {
public void yaml_asLanguageTagged() throws FileNotFoundException {
Schema schema = ConfigurationReader.readSchemaYaml("src/test/resources/configuration/schema/asLanguageTagged.yaml").asSchema();
assertEquals(true, schema.getPathByLabel("description").isAsLanguageTagged());
}

@Test
public void yaml_isMultilingual() throws FileNotFoundException {
Schema schema = ConfigurationReader.readSchemaYaml("src/test/resources/configuration/schema/rules/isMultilingual.yaml").asSchema();
assertEquals(true, schema.getPathByLabel("description").getRules().get(0).getMultilingual());
}

@Test
public void yaml_hasLanguaggeTag() throws FileNotFoundException {
Schema schema = ConfigurationReader.readSchemaYaml("src/test/resources/configuration/schema/rules/hasLanguageTag.yaml").asSchema();
assertEquals(true, schema.getPathByLabel("description").getRules().get(0).getHasLanguageTag());
}
}
1 change: 1 addition & 0 deletions src/test/resources/configuration/multilingual.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"id":"1","description":[{"@lang":"de","#value":"Portr\u00e4t"},{"@lang":"zh","#value":"\u8096\u50cf"}]}
11 changes: 11 additions & 0 deletions src/test/resources/configuration/schema/rules/hasLanguageTag.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
format: json
fields:
- name: about
path: $.['about']
rules:
- lessThanOrEquals: description
- name: description
path: $.['description']
asLanguageTagged: true
rules:
- hasLanguageTag: true
11 changes: 11 additions & 0 deletions src/test/resources/configuration/schema/rules/isMultilingual.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
format: json
fields:
- name: about
path: $.['about']
rules:
- lessThanOrEquals: description
- name: description
path: $.['description']
asLanguageTagged: true
rules:
- isMultilingual: true

0 comments on commit e88cd78

Please sign in to comment.