From 961e9a86f5ba40a07a319b1e6ec88f6558e97cfd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Markus=20D=C3=B6ring?= <mdoering@gbif.org>
Date: Tue, 7 Jan 2025 05:49:39 +0100
Subject: [PATCH] Test normilisation of monomial names, see
 https://github.com/CatalogueOfLife/data/issues/892

---
 .../catalogue/api/model/FormattableName.java  |  6 +-
 .../life/catalogue/api/model/IndexName.java   | 40 +++++------
 .../life/catalogue/common/io/TabReader.java   |  8 +++
 .../common/tax/SciNameNormalizer.java         | 53 ++++++--------
 .../common/tax/SciNameNormalizerTest.java     | 71 ++++++++++---------
 .../java/life/catalogue/es/NameStrings.java   | 10 +--
 .../life/catalogue/es/nu/FuzzyMatcher.java    | 21 +++---
 .../es/nu/NameUsageWrapperConverter.java      | 14 +---
 .../life/catalogue/es/nu/SimpleMatcher.java   |  8 +--
 .../es/nu/search/NameUsageHighlighter.java    | 13 ++--
 .../nidx/NameIndexChronicleStore.java         |  5 +-
 .../matching/nidx/NameIndexImpl.java          |  5 +-
 .../matching/nidx/NameIndexKryoPool.java      |  2 +
 .../matching/nidx/NamesIndexConfig.java       | 11 ++-
 .../resources/life/catalogue/db/dbschema.md   |  4 ++
 .../resources/life/catalogue/db/dbschema.sql  |  1 +
 .../catalogue/db/mapper/NamesIndexMapper.xml  |  2 +
 .../es/nu/NameUsageWrapperConverterTest.java  | 36 +++++-----
 .../catalogue/matching/NameIndexImplTest.java | 65 +++++++++++++++--
 .../catalogue/assembly/SectorSyncMergeIT.java |  1 +
 .../life/catalogue/importer/PgImportIT.java   |  2 +-
 .../txtree/ismaridae/artsnaven.txtree         |  7 ++
 .../resources/txtree/ismaridae/dyntaxa.txtree | 10 +++
 .../txtree/ismaridae/expected.txtree          |  9 +++
 .../resources/txtree/ismaridae/project.txtree |  8 +++
 .../test/resources/txtree/ismaridae/readme.md |  2 +
 .../resources/txtree/ismaridae/taxref.txtree  | 20 ++++++
 27 files changed, 276 insertions(+), 158 deletions(-)
 create mode 100644 webservice/src/test/resources/txtree/ismaridae/artsnaven.txtree
 create mode 100644 webservice/src/test/resources/txtree/ismaridae/dyntaxa.txtree
 create mode 100644 webservice/src/test/resources/txtree/ismaridae/expected.txtree
 create mode 100644 webservice/src/test/resources/txtree/ismaridae/project.txtree
 create mode 100644 webservice/src/test/resources/txtree/ismaridae/readme.md
 create mode 100644 webservice/src/test/resources/txtree/ismaridae/taxref.txtree
diff --git a/api/src/main/java/life/catalogue/api/model/FormattableName.java b/api/src/main/java/life/catalogue/api/model/FormattableName.java
index a1980b8c74..f51d7d328e 100644
--- a/api/src/main/java/life/catalogue/api/model/FormattableName.java
+++ b/api/src/main/java/life/catalogue/api/model/FormattableName.java
@@ -14,6 +14,8 @@
 import com.fasterxml.jackson.annotation.JsonProperty;
 import com.google.common.collect.Lists;
 
+import org.gbif.nameparser.api.NameType;
+
 /**
  * Most of the Name class with all getters needed to format a Name using the NameFormatter.
  */
@@ -34,6 +36,8 @@ default Boolean isOriginalSpelling() {
 
   String getUnparsed();
 
+  NameType getType();
+
   /**
    * @return the terminal epithet. Infraspecific epithet if existing, the species epithet or null
    */
@@ -123,6 +127,6 @@ default List<String> nameParts() {
    */
   @JsonIgnore
   default String getScientificNameNormalized() {
-    return SciNameNormalizer.normalize(getScientificName());
+    return SciNameNormalizer.normalize(getScientificName(), getType());
   }
 }
diff --git a/api/src/main/java/life/catalogue/api/model/IndexName.java b/api/src/main/java/life/catalogue/api/model/IndexName.java
index 823e648270..975082f919 100644
--- a/api/src/main/java/life/catalogue/api/model/IndexName.java
+++ b/api/src/main/java/life/catalogue/api/model/IndexName.java
@@ -4,10 +4,7 @@
 import life.catalogue.common.tax.NameFormatter;
 import life.catalogue.common.text.StringUtils;
 
-import org.gbif.nameparser.api.Authorship;
-import org.gbif.nameparser.api.NamePart;
-import org.gbif.nameparser.api.NomCode;
-import org.gbif.nameparser.api.Rank;
+import org.gbif.nameparser.api.*;
 
 import java.util.Objects;
 
@@ -38,6 +35,7 @@ public class IndexName extends DataEntity<Integer> implements FormattableName {
   private String authorship;
   @Nonnull
   private Rank rank;
+  private NameType type;
   private String uninomial;
   private String genus;
   private String infragenericEpithet; // we only use this for true infrageneric names, not bi-/trinomials!
@@ -60,6 +58,7 @@ public IndexName(IndexName other) {
     this.scientificName = other.scientificName;
     this.authorship = other.authorship;
     this.rank = other.rank;
+    this.type = other.type;
     this.uninomial = other.uninomial;
     this.genus = other.genus;
     this.infragenericEpithet = other.infragenericEpithet;
@@ -83,6 +82,7 @@ public IndexName(Name n) {
     this.scientificName = n.getScientificName();
     this.authorship = n.getAuthorship();
     setRank(n.getRank());
+    this.type = n.getType();
     this.uninomial = n.getUninomial();
     this.genus = n.getGenus();
     this.infragenericEpithet = n.getInfragenericEpithet();
@@ -112,6 +112,7 @@ public IndexName(Name n, int key) {
   public static IndexName newCanonical(IndexName n) {
     IndexName cn = new IndexName();
     cn.setRank(CANONICAL_RANK);
+    cn.setType(n.getType());
     // we keep a canonical infrageneric name in uninomial and ignore its genus placement!
     if (n.getInfragenericEpithet() != null && n.isInfrageneric()) {
       cn.setUninomial(n.getInfragenericEpithet());
@@ -293,6 +294,15 @@ public String getUnparsed() {
     return null;
   }
 
+  @Override
+  public NameType getType() {
+    return type;
+  }
+
+  public void setType(NameType type) {
+    this.type = type;
+  }
+
   public void setCultivarEpithet(String cultivarEpithet) {
     this.cultivarEpithet = cultivarEpithet;
   }
@@ -377,29 +387,15 @@ String scientificNameHtml(){
 
   @Override
   public boolean equals(Object o) {
-    if (this == o) return true;
-    if (!(o instanceof IndexName)) return false;
+    if (o == null || getClass() != o.getClass()) return false;
     if (!super.equals(o)) return false;
     IndexName indexName = (IndexName) o;
-    return Objects.equals(key, indexName.key) &&
-      Objects.equals(canonicalId, indexName.canonicalId) &&
-      scientificName.equals(indexName.scientificName) &&
-      Objects.equals(authorship, indexName.authorship) &&
-      rank == indexName.rank &&
-      Objects.equals(uninomial, indexName.uninomial) &&
-      Objects.equals(genus, indexName.genus) &&
-      Objects.equals(infragenericEpithet, indexName.infragenericEpithet) &&
-      Objects.equals(specificEpithet, indexName.specificEpithet) &&
-      Objects.equals(infraspecificEpithet, indexName.infraspecificEpithet) &&
-      Objects.equals(cultivarEpithet, indexName.cultivarEpithet) &&
-      Objects.equals(combinationAuthorship, indexName.combinationAuthorship) &&
-      Objects.equals(basionymAuthorship, indexName.basionymAuthorship) &&
-      Objects.equals(sanctioningAuthor, indexName.sanctioningAuthor);
+    return Objects.equals(key, indexName.key) && Objects.equals(canonicalId, indexName.canonicalId) && Objects.equals(scientificName, indexName.scientificName) && Objects.equals(authorship, indexName.authorship) && rank == indexName.rank && type == indexName.type && Objects.equals(uninomial, indexName.uninomial) && Objects.equals(genus, indexName.genus) && Objects.equals(infragenericEpithet, indexName.infragenericEpithet) && Objects.equals(specificEpithet, indexName.specificEpithet) && Objects.equals(infraspecificEpithet, indexName.infraspecificEpithet) && Objects.equals(cultivarEpithet, indexName.cultivarEpithet) && Objects.equals(combinationAuthorship, indexName.combinationAuthorship) && Objects.equals(basionymAuthorship, indexName.basionymAuthorship) && Objects.equals(sanctioningAuthor, indexName.sanctioningAuthor);
   }
 
   @Override
   public int hashCode() {
-    return Objects.hash(super.hashCode(), key, canonicalId, scientificName, authorship, rank, uninomial, genus, infragenericEpithet, specificEpithet, infraspecificEpithet, cultivarEpithet, combinationAuthorship, basionymAuthorship, sanctioningAuthor);
+    return Objects.hash(super.hashCode(), key, canonicalId, scientificName, authorship, rank, type, uninomial, genus, infragenericEpithet, specificEpithet, infraspecificEpithet, cultivarEpithet, combinationAuthorship, basionymAuthorship, sanctioningAuthor);
   }
 
   @Override
@@ -413,6 +409,8 @@ public String toString() {
       sb.append(" [CANONICAL]");
     } else {
       sb.append(getLabelWithRank());
+      sb.append(" cid=");
+      sb.append(getCanonicalId());
     }
     return sb.toString();
   }
diff --git a/api/src/main/java/life/catalogue/common/io/TabReader.java b/api/src/main/java/life/catalogue/common/io/TabReader.java
index 71aa6a265c..163ea1c480 100644
--- a/api/src/main/java/life/catalogue/common/io/TabReader.java
+++ b/api/src/main/java/life/catalogue/common/io/TabReader.java
@@ -26,6 +26,10 @@ public static TabReader csv(Reader reader, int skip) throws IOException  {
     return csv(new ReaderInputStream(reader, StandardCharsets.UTF_8), StandardCharsets.UTF_8, skip, 2);
   }
 
+  public static TabReader csv(File file, int skip) throws IOException  {
+    return csv(file, StandardCharsets.UTF_8, skip, 2);
+  }
+
   public static TabReader csv(File file, Charset charset, int skip) throws IOException  {
     return csv(file, charset, skip, 2);
   }
@@ -42,6 +46,10 @@ public static TabReader tab(Reader reader, int skip) throws IOException  {
     return tab(new ReaderInputStream(reader, StandardCharsets.UTF_8), StandardCharsets.UTF_8, skip, 2);
   }
 
+  public static TabReader tab(File file, int skip) throws IOException  {
+    return tab(file, StandardCharsets.UTF_8, skip, 2);
+  }
+
   public static TabReader tab(File file, Charset charset, int skip) throws IOException  {
     return tab(file, charset, skip, 2);
   }
diff --git a/api/src/main/java/life/catalogue/common/tax/SciNameNormalizer.java b/api/src/main/java/life/catalogue/common/tax/SciNameNormalizer.java
index 5f0966c1ff..acc9526746 100644
--- a/api/src/main/java/life/catalogue/common/tax/SciNameNormalizer.java
+++ b/api/src/main/java/life/catalogue/common/tax/SciNameNormalizer.java
@@ -1,5 +1,7 @@
 package life.catalogue.common.tax;
 
+import org.gbif.nameparser.api.NameType;
+
 import java.util.regex.Pattern;
 
 import static org.apache.commons.lang3.StringUtils.trimToNull;
@@ -96,50 +98,35 @@ public static String normalizeWhitespaceAndPunctuation(String s) {
    * The return will be a strictly ASCII encoded string.
    */
   public static String normalize(String s) {
-    return normalize(s, false, true);
+    return normalize(s, null, true);
   }
-  
-  /**
-   * Normalizes an entire name string including monomials and genus parts of a name.
-   */
-  public static String normalizeAll(String s) {
-    return normalize(s, true, true);
+
+  public static String normalize(String s, NameType type) {
+    return normalize(s, type, true);
   }
-  
-  private static String normalize(String s, boolean normMonomials, boolean stemming) {
+
+  private static String normalize(String s, NameType type, boolean stemEpithets) {
     if (!hasContent(s)) return "";
     
     s = normalizedAscii(s);
     
     // Remove a hybrid cross, or a likely hybrid cross.
     s = removeHybridMarker(s);
-    
-    // Only for bi/trinomials, otherwise we mix up ranks.
-    if (normMonomials) {
-      s = normStrongly(s, stemming);
-      
-    } else if (s.indexOf(' ') > 2) {
-      String[] parts = s.split(" +");
-      StringBuilder sb = new StringBuilder();
-      sb.append(parts[0]);
-      for (int i = 1; i < parts.length; i++) {
-        sb.append(" ");
-        if (Character.isLowerCase(parts[i].charAt(0))) {
-          sb.append(normStrongly(parts[i], stemming));
-        } else {
-          sb.append(parts[i]);
-        }
+
+    // corrent common misspellings
+    if (type != null && type.isParsable()) {
+      s = normSpellings(s);
+
+      // apply stemming only for epithets, never monomials!
+      if (stemEpithets && s.indexOf(' ') > 2) {
+        s = stemEpithet(s);
       }
-      s = sb.toString();
     }
-    
+
     return s.trim();
   }
-  
-  private static String normStrongly(String s, boolean stemming) {
-    if (stemming) {
-      s = stemEpithet(s);
-    }
+
+  private static String normSpellings(String s) {
     // normalize frequent variations of i
     s = i.matcher(s).replaceAll("i");
     // remove repeated letters→leters in binomials
@@ -152,7 +139,7 @@ private static String normStrongly(String s, boolean stemming) {
    * Stems and normalizes some few, but frequent misspellings
    */
   public static String normalizeEpithet(String epithet) {
-    return normStrongly(epithet, true);
+    return stemEpithet(normSpellings(epithet));
   }
 
   /**
diff --git a/api/src/test/java/life/catalogue/common/tax/SciNameNormalizerTest.java b/api/src/test/java/life/catalogue/common/tax/SciNameNormalizerTest.java
index f5fb7f2966..3f352e7f93 100644
--- a/api/src/test/java/life/catalogue/common/tax/SciNameNormalizerTest.java
+++ b/api/src/test/java/life/catalogue/common/tax/SciNameNormalizerTest.java
@@ -1,11 +1,16 @@
 package life.catalogue.common.tax;
 
+import life.catalogue.common.io.TabReader;
+
 import org.junit.Test;
 
+import java.io.*;
+
 import static org.junit.Assert.assertEquals;
 
 
 public class SciNameNormalizerTest {
+
   @Test
   public void removeHybridMarker() throws Exception {
     assertEquals("Abies", SciNameNormalizer.removeHybridMarker("Abies"));
@@ -18,7 +23,7 @@ public void removeHybridMarker() throws Exception {
   public void testNormalize() throws Exception {
     assertEquals("", SciNameNormalizer.normalize(""));
     assertEquals("Abies", SciNameNormalizer.normalize("Abies "));
-    assertEquals("Abiies", SciNameNormalizer.normalize("Abiies "));
+    assertEquals("Abies", SciNameNormalizer.normalize("Abiies "));
     assertEquals("Abyes", SciNameNormalizer.normalize("Abyes "));
     assertEquals("Abyes alb", SciNameNormalizer.normalize("Abyes  albus"));
     assertEquals("Abyes albiet", SciNameNormalizer.normalize("Abyes albieta"));
@@ -63,41 +68,41 @@ public void testNormalize() throws Exception {
 
   @Test
   public void testNormalizeAll() throws Exception {
-    assertEquals("", SciNameNormalizer.normalizeAll(""));
-    assertEquals("Abies", SciNameNormalizer.normalizeAll("Abies "));
-    assertEquals("Abies", SciNameNormalizer.normalizeAll("Abiies "));
-    assertEquals("Abies", SciNameNormalizer.normalizeAll("Abyes "));
-    assertEquals("Abies alb", SciNameNormalizer.normalizeAll("Abyes  albus"));
-    assertEquals("Abies albiet", SciNameNormalizer.normalizeAll("Abyes albieta"));
-    assertEquals("Abies albiet", SciNameNormalizer.normalizeAll("Abies albijeta"));
-    assertEquals("Abies albiet", SciNameNormalizer.normalizeAll("Abies albyeta"));
-    assertEquals("Abies alb", SciNameNormalizer.normalizeAll(" \txAbies × ållbbus\t"));
-
-    assertEquals("Abies alb", SciNameNormalizer.normalizeAll(" \txAbies × ållbbus\t"));
-    assertEquals("Rachis takt", SciNameNormalizer.normalizeAll("Rhachis taktos"));
-
-    assertEquals("Hieracium sabaud", SciNameNormalizer.normalizeAll("Hieracium sabaudum"));
-    assertEquals("Hieracium scorzoneraefoli", SciNameNormalizer.normalizeAll("Hieracium scorzoneræfolium"));
-    assertEquals("Hieracium scorzonerifoli", SciNameNormalizer.normalizeAll("Hieracium scorzonerifolium"));
-    assertEquals("Macrozamia platirach", SciNameNormalizer.normalizeAll("Macrozamia platyrachis"));
-    assertEquals("Macrozamia platirach", SciNameNormalizer.normalizeAll("Macrozamia platyrhachis"));
-    assertEquals("Cicas circinal", SciNameNormalizer.normalizeAll("Cycas circinalis"));
-    assertEquals("Cicas circinal", SciNameNormalizer.normalizeAll("Cycas circinnalis"));
-    assertEquals("Isolona perier", SciNameNormalizer.normalizeAll("Isolona perieri"));
-    assertEquals("Isolona perier", SciNameNormalizer.normalizeAll("Isolona perrieri"));
-    assertEquals("Isolona perier", SciNameNormalizer.normalizeAll("Isolona perrierii"));
-
-    assertEquals("Carex caiouet", SciNameNormalizer.normalizeAll("Carex ×cayouettei"));
-    assertEquals("Platanus hispanic", SciNameNormalizer.normalizeAll("Platanus x hispanica"));
+    assertEquals("", SciNameNormalizer.normalize(""));
+    assertEquals("Abies", SciNameNormalizer.normalize("Abies "));
+    assertEquals("Abies", SciNameNormalizer.normalize("Abiies "));
+    assertEquals("Abies", SciNameNormalizer.normalize("Abyes "));
+    assertEquals("Abies alb", SciNameNormalizer.normalize("Abyes  albus"));
+    assertEquals("Abies albiet", SciNameNormalizer.normalize("Abyes albieta"));
+    assertEquals("Abies albiet", SciNameNormalizer.normalize("Abies albijeta"));
+    assertEquals("Abies albiet", SciNameNormalizer.normalize("Abies albyeta"));
+    assertEquals("Abies alb", SciNameNormalizer.normalize(" \txAbies × ållbbus\t"));
+
+    assertEquals("Abies alb", SciNameNormalizer.normalize(" \txAbies × ållbbus\t"));
+    assertEquals("Rachis takt", SciNameNormalizer.normalize("Rhachis taktos"));
+
+    assertEquals("Hieracium sabaud", SciNameNormalizer.normalize("Hieracium sabaudum"));
+    assertEquals("Hieracium scorzoneraefoli", SciNameNormalizer.normalize("Hieracium scorzoneræfolium"));
+    assertEquals("Hieracium scorzonerifoli", SciNameNormalizer.normalize("Hieracium scorzonerifolium"));
+    assertEquals("Macrozamia platirach", SciNameNormalizer.normalize("Macrozamia platyrachis"));
+    assertEquals("Macrozamia platirach", SciNameNormalizer.normalize("Macrozamia platyrhachis"));
+    assertEquals("Cicas circinal", SciNameNormalizer.normalize("Cycas circinalis"));
+    assertEquals("Cicas circinal", SciNameNormalizer.normalize("Cycas circinnalis"));
+    assertEquals("Isolona perier", SciNameNormalizer.normalize("Isolona perieri"));
+    assertEquals("Isolona perier", SciNameNormalizer.normalize("Isolona perrieri"));
+    assertEquals("Isolona perier", SciNameNormalizer.normalize("Isolona perrierii"));
+
+    assertEquals("Carex caiouet", SciNameNormalizer.normalize("Carex ×cayouettei"));
+    assertEquals("Platanus hispanic", SciNameNormalizer.normalize("Platanus x hispanica"));
     // https://github.com/gbif/checklistbank/issues/7
-    assertEquals("Eragrostis brown", SciNameNormalizer.normalizeAll("Eragrostis brownii"));
-    assertEquals("Eragrostis brown", SciNameNormalizer.normalizeAll("Eragrostis brownei"));
+    assertEquals("Eragrostis brown", SciNameNormalizer.normalize("Eragrostis brownii"));
+    assertEquals("Eragrostis brown", SciNameNormalizer.normalize("Eragrostis brownei"));
   }
 
   @Test
   public void testHybridCross() throws Exception {
-    assertEquals("xcayouettei", SciNameNormalizer.normalize("xcayouettei"));
-    assertEquals("cayouettei", SciNameNormalizer.normalize("×cayouettei"));
+    assertEquals("xcaiouetei", SciNameNormalizer.normalize("xcayouettei"));
+    assertEquals("caiouetei", SciNameNormalizer.normalize("×cayouettei"));
 
     assertEquals("Carex xcaiouet", SciNameNormalizer.normalize("Carex xcayouettei"));
     assertEquals("Carex caiouet", SciNameNormalizer.normalize("Carex ×cayouettei"));
@@ -113,8 +118,8 @@ public void testHybridCross() throws Exception {
 
   @Test
   public void testNonAscii() throws Exception {
-    assertEquals("Cem Andrexi", SciNameNormalizer.normalize("Çem Ándrexï"));
-    assertEquals("SOEZsoezY¥µAAAAAAAECEEEEIIIIDNOOOOOOUUUUYssaaaaaaaeceeeeiiiidnoooooouuuuyy", SciNameNormalizer.normalize("ŠŒŽšœžŸ¥µÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýÿ"));
+    assertEquals("CemAndrexi", SciNameNormalizer.normalize("Çem_Ándrexï"));
+    assertEquals("SOEZsoezY¥µAECEIDNOUYsaeceidnoui", SciNameNormalizer.normalize("ŠŒŽšœžŸ¥µÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýÿ"));
   }
 
   @Test
diff --git a/dao/src/main/java/life/catalogue/es/NameStrings.java b/dao/src/main/java/life/catalogue/es/NameStrings.java
index 89120aa986..e5c6594763 100644
--- a/dao/src/main/java/life/catalogue/es/NameStrings.java
+++ b/dao/src/main/java/life/catalogue/es/NameStrings.java
@@ -10,7 +10,7 @@
 
 import static life.catalogue.es.ddl.Analyzer.SCINAME_AUTO_COMPLETE;
 import static life.catalogue.es.ddl.Analyzer.SCINAME_IGNORE_CASE;
-import static life.catalogue.es.nu.NameUsageWrapperConverter.normalizeWeakly;
+import static life.catalogue.es.nu.NameUsageWrapperConverter.normalize;
 
 /**
  * An object embedded within the name usage document solely aimed at optimizing searchability. The name strings within this class do not
@@ -44,17 +44,17 @@ public NameStrings(Name name) {
     }
     if (!StringUtils.isBlank(name.getGenus())) {
       genusLetter = Character.toLowerCase(name.getGenus().charAt(0));
-      genusOrMonomial = getStrings(name.getGenus(), normalizeWeakly(name.getGenus()));
+      genusOrMonomial = getStrings(name.getGenus(), normalize(name.getGenus()));
     } else if (!StringUtils.isBlank(name.getUninomial())) {
-      genusOrMonomial = getStrings(name.getUninomial().toLowerCase(), normalizeWeakly(name.getUninomial()));
+      genusOrMonomial = getStrings(name.getUninomial().toLowerCase(), normalize(name.getUninomial()));
     }
     // we used to use the strong normaliser to index species/infraspecific epithets...
     // But that caused more problems than it helped...
     if (!StringUtils.isBlank(name.getSpecificEpithet())) {
-      specificEpithet = getStrings(name.getSpecificEpithet().toLowerCase(), normalizeWeakly(name.getSpecificEpithet()));
+      specificEpithet = getStrings(name.getSpecificEpithet().toLowerCase(), normalize(name.getSpecificEpithet()));
     }
     if (!StringUtils.isBlank(name.getInfraspecificEpithet())) {
-      infraspecificEpithet = getStrings(name.getInfraspecificEpithet().toLowerCase(), normalizeWeakly(name.getInfraspecificEpithet()));
+      infraspecificEpithet = getStrings(name.getInfraspecificEpithet().toLowerCase(), normalize(name.getInfraspecificEpithet()));
     }
   }
 
diff --git a/dao/src/main/java/life/catalogue/es/nu/FuzzyMatcher.java b/dao/src/main/java/life/catalogue/es/nu/FuzzyMatcher.java
index 3c26e9983e..b62acf711d 100644
--- a/dao/src/main/java/life/catalogue/es/nu/FuzzyMatcher.java
+++ b/dao/src/main/java/life/catalogue/es/nu/FuzzyMatcher.java
@@ -4,8 +4,7 @@
 import life.catalogue.es.query.BoolQuery;
 import life.catalogue.es.query.Query;
 
-import static life.catalogue.es.nu.NameUsageWrapperConverter.normalizeStrongly;
-import static life.catalogue.es.nu.NameUsageWrapperConverter.normalizeWeakly;
+import static life.catalogue.es.nu.NameUsageWrapperConverter.normalize;
 
 /**
  * Abstract base class for fuzzy matching.
@@ -20,10 +19,10 @@ abstract class FuzzyMatcher extends QMatcher implements MatcherMixIn {
   @Override
   Query matchAsMonomial() {
     String[] terms = request.getSciNameSearchTerms();
-    String termWN = normalizeWeakly(terms[0]);
+    String termWN = normalize(terms[0]);
     // we used to use the strongly normalised terms to index/query species/infraspecific epithets.
     // But that caused more problems than it helped...
-    String termSN = normalizeStrongly(terms[0]);
+    String termSN = NameUsageWrapperConverter.normalize(terms[0]);
     return sciNameBaseQuery()
         .subquery(new BoolQuery() // Prefer subspecies over species and species over genera
             .should(matchAsEpithet(FLD_SUBSPECIES, termWN).withBoost(1.2))
@@ -34,10 +33,10 @@ Query matchAsMonomial() {
   @Override
   Query matchAsBinomial() {
     String[] terms = request.getSciNameSearchTerms();
-    String term0WN = normalizeWeakly(terms[0]);
-    String term0SN = normalizeStrongly(terms[0]);
-    String term1WN = normalizeWeakly(terms[1]);
-    String term1SN = normalizeStrongly(terms[1]);
+    String term0WN = normalize(terms[0]);
+    String term0SN = NameUsageWrapperConverter.normalize(terms[0]);
+    String term1WN = normalize(terms[1]);
+    String term1SN = NameUsageWrapperConverter.normalize(terms[1]);
     return sciNameBaseQuery()
         .subquery(new BoolQuery()
             .must(matchAsGenericEpithet(term0WN))
@@ -64,9 +63,9 @@ Query matchAsBinomial() {
   @Override
   Query matchAsTrinomial() {
     String[] terms = request.getSciNameSearchTerms();
-    String term0WN = normalizeWeakly(terms[0]);
-    String term1SN = normalizeStrongly(terms[1]);
-    String term2SN = normalizeStrongly(terms[2]);
+    String term0WN = normalize(terms[0]);
+    String term1SN = NameUsageWrapperConverter.normalize(terms[1]);
+    String term2SN = NameUsageWrapperConverter.normalize(terms[2]);
     return sciNameBaseQuery()
         .subquery(new BoolQuery()
             .must(matchAsGenericEpithet(term0WN))
diff --git a/dao/src/main/java/life/catalogue/es/nu/NameUsageWrapperConverter.java b/dao/src/main/java/life/catalogue/es/nu/NameUsageWrapperConverter.java
index 55e6b2cfa0..43a3b995e7 100644
--- a/dao/src/main/java/life/catalogue/es/nu/NameUsageWrapperConverter.java
+++ b/dao/src/main/java/life/catalogue/es/nu/NameUsageWrapperConverter.java
@@ -67,25 +67,15 @@ public static NameUsageWrapper decode(String payload) throws IOException {
   }
 
   /**
-   * Provides a weakly normalized version of the provided string. Used to index generic epithets. See {@link NameStrings}.
+   * Provides a normalized version of the provided string. Used to index generic epithets. See {@link NameStrings}.
    */
-  public static String normalizeWeakly(String s) {
+  public static String normalize(String s) {
     if (s == null) {
       return null;
     }
     return SciNameNormalizer.normalize(s.toLowerCase());
   }
 
-  /**
-   * Provides a strongly normalized version of the provided string. Used to index specific epithets and infraspecific epithets.
-   */
-  public static String normalizeStrongly(String s) {
-    if (s == null) {
-      return null;
-    }
-    return SciNameNormalizer.normalizeAll(s.toLowerCase());
-  }
-
   /**
    * Extracts the classification from the provided document.
    * 
diff --git a/dao/src/main/java/life/catalogue/es/nu/SimpleMatcher.java b/dao/src/main/java/life/catalogue/es/nu/SimpleMatcher.java
index dc3123a5c4..48c4849f50 100644
--- a/dao/src/main/java/life/catalogue/es/nu/SimpleMatcher.java
+++ b/dao/src/main/java/life/catalogue/es/nu/SimpleMatcher.java
@@ -5,7 +5,7 @@
 import life.catalogue.es.query.BoolQuery;
 import life.catalogue.es.query.Query;
 
-import static life.catalogue.es.nu.NameUsageWrapperConverter.normalizeWeakly;
+import static life.catalogue.es.nu.NameUsageWrapperConverter.normalize;
 
 /**
  * Abstract base class for non-fuzzy matching. Search terms are not normalized, so they can only hit the non-normalized versions of the
@@ -22,7 +22,7 @@ abstract class SimpleMatcher extends QMatcher implements MatcherMixIn {
   @Override
   Query matchAsMonomial() {
     String[] terms = request.getSciNameSearchTerms();
-    String term0 = normalizeWeakly(terms[0]);
+    String term0 = normalize(terms[0]);
     return sciNameBaseQuery()
         .subquery(new BoolQuery() // Prefer genus over species over subspecies
             .should(matchAsEpithet(FLD_SUBSPECIES, term0).withBoost(1.0))
@@ -33,8 +33,8 @@ Query matchAsMonomial() {
   @Override
   Query matchAsBinomial() {
     String[] terms = request.getSciNameSearchTerms();
-    String term0 = normalizeWeakly(terms[0]);
-    String term1 = normalizeWeakly(terms[1]);
+    String term0 = normalize(terms[0]);
+    String term1 = normalize(terms[1]);
     return sciNameBaseQuery()
         .subquery(new BoolQuery()
             .must(matchAsGenericEpithet(term0))
diff --git a/dao/src/main/java/life/catalogue/es/nu/search/NameUsageHighlighter.java b/dao/src/main/java/life/catalogue/es/nu/search/NameUsageHighlighter.java
index dfd0276887..ab0acd7a4f 100644
--- a/dao/src/main/java/life/catalogue/es/nu/search/NameUsageHighlighter.java
+++ b/dao/src/main/java/life/catalogue/es/nu/search/NameUsageHighlighter.java
@@ -5,6 +5,8 @@
 import life.catalogue.api.search.NameUsageSearchResponse;
 import life.catalogue.api.search.NameUsageWrapper;
 
+import life.catalogue.es.nu.NameUsageWrapperConverter;
+
 import org.gbif.nameparser.api.Authorship;
 
 import java.util.Set;
@@ -17,8 +19,7 @@
 import static life.catalogue.api.search.NameUsageSearchRequest.SearchContent.AUTHORSHIP;
 import static life.catalogue.api.search.NameUsageSearchRequest.SearchContent.SCIENTIFIC_NAME;
 import static life.catalogue.common.collection.CollectionUtils.isEmpty;
-import static life.catalogue.es.nu.NameUsageWrapperConverter.normalizeStrongly;
-import static life.catalogue.es.nu.NameUsageWrapperConverter.normalizeWeakly;
+import static life.catalogue.es.nu.NameUsageWrapperConverter.normalize;
 
 /*
  * A DIY highlighter we use in stead of Elasticsearch's highlight capabilities.
@@ -54,8 +55,8 @@ class NameUsageHighlighter {
       pattern = Pattern.compile(Pattern.quote(request.getQ().toLowerCase()));
     }
     if (sc.contains(SCIENTIFIC_NAME)) {
-      String qWN = normalizeWeakly(request.getQ());
-      String qSN = normalizeStrongly(request.getQ());
+      String qWN = normalize(request.getQ());
+      String qSN = NameUsageWrapperConverter.normalize(request.getQ());
       patternWN = Pattern.compile(Pattern.quote(qWN));
       patternSN = qWN.equals(qSN) ? null : Pattern.compile(Pattern.quote(qSN));
     }
@@ -89,11 +90,11 @@ private void highlightAuthorShip(NameUsageWrapper nuw) {
 
   private void highlightScientificName(NameUsageWrapper nuw) {
     String original = nuw.getUsage().getName().getScientificName();
-    Matcher matcher = patternWN.matcher(normalizeWeakly(original));
+    Matcher matcher = patternWN.matcher(normalize(original));
     String highlighted = highlight(original, matcher);
     if (highlighted.length() == original.length() && patternSN != null) {
       // Then no highlighting took place; let's try with the strongly normalized name
-      matcher = patternSN.matcher(normalizeStrongly(original));
+      matcher = patternSN.matcher(NameUsageWrapperConverter.normalize(original));
       highlighted = highlight(original, matcher);
     }
     nuw.getUsage().getName().setScientificName(highlighted);
diff --git a/dao/src/main/java/life/catalogue/matching/nidx/NameIndexChronicleStore.java b/dao/src/main/java/life/catalogue/matching/nidx/NameIndexChronicleStore.java
index 50c65aa05b..07d45e78dd 100644
--- a/dao/src/main/java/life/catalogue/matching/nidx/NameIndexChronicleStore.java
+++ b/dao/src/main/java/life/catalogue/matching/nidx/NameIndexChronicleStore.java
@@ -317,9 +317,8 @@ public IndexName read(Bytes in, @Nullable IndexName using) {
         int size = in.readInt();
         byte[] bytes = new byte[size];
         in.read(bytes);
-        if (using != null) {
-          System.out.println("WARN: IndexName instance existing: " + using);
-        }
+        // kryo creates a new instance anyways, so we always create a new instance
+        // we can only reuse any existing object (happens always in memory mode) with even more effort
         return kryo.readObject(new Input(bytes), IndexName.class);
       } finally {
         if (kryo != null) {
diff --git a/dao/src/main/java/life/catalogue/matching/nidx/NameIndexImpl.java b/dao/src/main/java/life/catalogue/matching/nidx/NameIndexImpl.java
index 37f6db7db2..3b230995e4 100644
--- a/dao/src/main/java/life/catalogue/matching/nidx/NameIndexImpl.java
+++ b/dao/src/main/java/life/catalogue/matching/nidx/NameIndexImpl.java
@@ -108,7 +108,8 @@ public NameMatch match(Name name, boolean allowInserts, boolean verbose) throws
       if (name.getRank() == null) {
         name.setRank(IndexName.CANONICAL_RANK);
       }
-      List<IndexName> candidates = store.get(key(name));
+      var key = key(name);
+      List<IndexName> candidates = store.get(key);
       if (candidates != null && !candidates.isEmpty()) {
         m = matchCandidates(name, candidates);
         if (verbose) {
@@ -493,7 +494,7 @@ private void createCanonical(NamesIndexMapper nim, String key, IndexName cn){
    */
   private static String key(FormattableName n) {
     String origName = NameFormatter.canonicalName(n);
-    return UnicodeUtils.replaceNonAscii(SciNameNormalizer.normalize(UnicodeUtils.decompose(origName)).toLowerCase(), '*');
+    return UnicodeUtils.replaceNonAscii(SciNameNormalizer.normalize(UnicodeUtils.decompose(origName), n.getType()).toLowerCase(), '*');
   }
   
   /**
diff --git a/dao/src/main/java/life/catalogue/matching/nidx/NameIndexKryoPool.java b/dao/src/main/java/life/catalogue/matching/nidx/NameIndexKryoPool.java
index d3e9ea5e1f..45b5bdd5ac 100644
--- a/dao/src/main/java/life/catalogue/matching/nidx/NameIndexKryoPool.java
+++ b/dao/src/main/java/life/catalogue/matching/nidx/NameIndexKryoPool.java
@@ -9,6 +9,7 @@
 import life.catalogue.common.kryo.FastUtilsSerializers;
 
 import org.gbif.nameparser.api.Authorship;
+import org.gbif.nameparser.api.NameType;
 import org.gbif.nameparser.api.Rank;
 
 import java.time.LocalDateTime;
@@ -33,6 +34,7 @@ public Kryo create() {
     kryo.register(IndexName.class);
     kryo.register(Authorship.class);
     kryo.register(Rank.class);
+    kryo.register(NameType.class);
     kryo.register(LocalDateTime.class);
     kryo.register(ArrayList.class);
     kryo.register(HashMap.class);
diff --git a/dao/src/main/java/life/catalogue/matching/nidx/NamesIndexConfig.java b/dao/src/main/java/life/catalogue/matching/nidx/NamesIndexConfig.java
index c2ea7a2f37..f21187ae00 100644
--- a/dao/src/main/java/life/catalogue/matching/nidx/NamesIndexConfig.java
+++ b/dao/src/main/java/life/catalogue/matching/nidx/NamesIndexConfig.java
@@ -12,9 +12,16 @@ public class NamesIndexConfig {
   public enum Store {MAPDB, CHRONICLE}
 
   public static NamesIndexConfig memory(int poolsize){
+    return memory(poolsize, -1);
+  }
+
+  public static NamesIndexConfig memory(int poolsize, int maxEntries){
     var cfg = new NamesIndexConfig();
     cfg.file = null;
     cfg.kryoPoolSize = poolsize;
+    if (maxEntries > 0) {
+      cfg.maxEntries = maxEntries;
+    }
     return cfg;
   }
 
@@ -42,12 +49,12 @@ public static NamesIndexConfig file(File location, int poolsize){
   public int kryoPoolSize = 1024;
 
   @NotNull
-  public Store type = Store.MAPDB;
+  public Store type = Store.CHRONICLE;
 
   /**
    * Maximum numbers of names index entries supported by a chronicle store
    */
   @Min(1_000)
-  public int maxEntries = 1_000;
+  public int maxEntries = 10_000;
 
 }
diff --git a/dao/src/main/resources/life/catalogue/db/dbschema.md b/dao/src/main/resources/life/catalogue/db/dbschema.md
index 3cf93ad475..6f2729b250 100644
--- a/dao/src/main/resources/life/catalogue/db/dbschema.md
+++ b/dao/src/main/resources/life/catalogue/db/dbschema.md
@@ -14,6 +14,10 @@ and done it manually. So we can as well log changes here.
 
 ### PROD changes
 
+#### 2024-12-05 taxon metrics
+```
+ALTER TABLE names_index ADD COLUMN type NAMETYPE;
+```
 
 #### 2024-12-05 taxon metrics
 ```
diff --git a/dao/src/main/resources/life/catalogue/db/dbschema.sql b/dao/src/main/resources/life/catalogue/db/dbschema.sql
index 6680af07b4..5993c29bd8 100644
--- a/dao/src/main/resources/life/catalogue/db/dbschema.sql
+++ b/dao/src/main/resources/life/catalogue/db/dbschema.sql
@@ -1106,6 +1106,7 @@ CREATE TABLE names_index (
   id SERIAL PRIMARY KEY,
   canonical_id INTEGER NOT NULL REFERENCES names_index,
   rank RANK NOT NULL,
+  type NAMETYPE,
   created TIMESTAMP WITHOUT TIME ZONE DEFAULT NOW(),
   modified TIMESTAMP WITHOUT TIME ZONE DEFAULT NOW(),
   scientific_name TEXT NOT NULL,
diff --git a/dao/src/main/resources/life/catalogue/db/mapper/NamesIndexMapper.xml b/dao/src/main/resources/life/catalogue/db/mapper/NamesIndexMapper.xml
index f0fb3b4a0f..38c8a95156 100644
--- a/dao/src/main/resources/life/catalogue/db/mapper/NamesIndexMapper.xml
+++ b/dao/src/main/resources/life/catalogue/db/mapper/NamesIndexMapper.xml
@@ -12,6 +12,7 @@
     scientific_name,
     authorship,
     rank,
+    type,
     uninomial,
     genus,
     infrageneric_epithet,
@@ -31,6 +32,7 @@
     #{scientificName},
     #{authorship},
     #{rank}::RANK,
+    #{type}::NAMETYPE,
     #{uninomial},
     #{genus},
     #{infragenericEpithet},
diff --git a/dao/src/test/java/life/catalogue/es/nu/NameUsageWrapperConverterTest.java b/dao/src/test/java/life/catalogue/es/nu/NameUsageWrapperConverterTest.java
index 84a2fd668a..17fea46b1e 100644
--- a/dao/src/test/java/life/catalogue/es/nu/NameUsageWrapperConverterTest.java
+++ b/dao/src/test/java/life/catalogue/es/nu/NameUsageWrapperConverterTest.java
@@ -53,58 +53,58 @@ public void roundtripPayload() throws IOException {
   }
 
   @Test
-  public void testNormalizeWeakly1() {
-    String s = NameUsageWrapperConverter.normalizeWeakly("Larus");
+  public void testNormalize1() {
+    String s = NameUsageWrapperConverter.normalize("Larus");
     assertEquals("larus", s);
   }
 
   @Test
-  public void testNormalizeWeakly2() {
-    String s = NameUsageWrapperConverter.normalizeWeakly("等待");
+  public void testNormalize2() {
+    String s = NameUsageWrapperConverter.normalize("等待");
     assertEquals("等待", s);
   }
 
   @Test
-  public void testNormalizeWeakly3() {
-    String s = NameUsageWrapperConverter.normalizeWeakly("sérieux");
+  public void testNormalize3() {
+    String s = NameUsageWrapperConverter.normalize("sérieux");
     assertEquals("serieux", s);
   }
 
   @Test
-  public void testNormalizeStrongly1a() {
-    String s = NameUsageWrapperConverter.normalizeStrongly("Larus");
+  public void testNormalize1A() {
+    String s = NameUsageWrapperConverter.normalize("Larus");
     System.out.println(s);
     assertEquals("lar", s);
   }
 
   @Test
-  public void testNormalizeStrongly1b() {
-    String s = NameUsageWrapperConverter.normalizeStrongly("Larus fuscus");
+  public void testNormalize1B() {
+    String s = NameUsageWrapperConverter.normalize("Larus fuscus");
     assertEquals("larus fusc", s);
   }
 
   @Test
-  public void testNormalizeStrongly1c() {
-    String s = NameUsageWrapperConverter.normalizeStrongly("Larus fuscus fuscus");
+  public void testNormalize1C() {
+    String s = NameUsageWrapperConverter.normalize("Larus fuscus fuscus");
     System.out.println(s);
     assertEquals("larus fuscus fusc", s);
   }
 
   @Test
-  public void testNormalizeStrongly2() {
-    String s = NameUsageWrapperConverter.normalizeStrongly("等待");
+  public void testNormalize2b() {
+    String s = NameUsageWrapperConverter.normalize("等待");
     assertEquals("等待", s);
   }
 
   @Test
-  public void testNormalizeStrongly3() {
-    String s = NameUsageWrapperConverter.normalizeStrongly("sérieux");
+  public void testNormalize3b() {
+    String s = NameUsageWrapperConverter.normalize("sérieux");
     assertEquals("serieux", s);
   }
 
   @Test
-  public void testNormalizeStrongly4() {
-    String s = NameUsageWrapperConverter.normalizeStrongly("sylvestris");
+  public void testNormalize4() {
+    String s = NameUsageWrapperConverter.normalize("sylvestris");
     System.out.println(s);
     assertEquals("silvestr", s);
   }
diff --git a/dao/src/test/java/life/catalogue/matching/NameIndexImplTest.java b/dao/src/test/java/life/catalogue/matching/NameIndexImplTest.java
index 952f5fcc8c..dcf321bd2f 100644
--- a/dao/src/test/java/life/catalogue/matching/NameIndexImplTest.java
+++ b/dao/src/test/java/life/catalogue/matching/NameIndexImplTest.java
@@ -1,13 +1,12 @@
 package life.catalogue.matching;
 
 import life.catalogue.api.TestEntityGenerator;
-import life.catalogue.api.model.IndexName;
-import life.catalogue.api.model.Name;
-import life.catalogue.api.model.NameMatch;
-import life.catalogue.api.model.VerbatimRecord;
+import life.catalogue.api.model.*;
 import life.catalogue.api.vocab.MatchType;
 import life.catalogue.api.vocab.Origin;
+import life.catalogue.common.io.TabReader;
 import life.catalogue.common.tax.AuthorshipNormalizer;
+import life.catalogue.common.tax.SciNameNormalizer;
 import life.catalogue.db.mapper.NamesIndexMapper;
 import life.catalogue.matching.nidx.NameIndex;
 import life.catalogue.matching.nidx.NameIndexFactory;
@@ -15,8 +14,14 @@
 import life.catalogue.matching.nidx.NamesIndexConfig;
 import life.catalogue.parser.NameParser;
 
+import life.catalogue.parser.RankParser;
+
+import life.catalogue.parser.UnparsableException;
+
 import org.gbif.nameparser.api.*;
 
+import java.io.File;
+import java.io.FileWriter;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
@@ -28,6 +33,9 @@
 import org.apache.ibatis.cursor.Cursor;
 import org.apache.ibatis.session.SqlSession;
 import org.apache.ibatis.session.SqlSessionFactory;
+
+import org.gbif.nameparser.util.RankUtils;
+
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -64,7 +72,7 @@ public IndexName answer(InvocationOnMock invocation) {
       }}
     ).when(mapper).create(any());
 
-    ni = NameIndexFactory.build(NamesIndexConfig.memory(512), factory, aNormalizer).started();
+    ni = NameIndexFactory.build(NamesIndexConfig.memory(512, 10_000_000), factory, aNormalizer).started();
     assertEquals(0, ni.size());
   }
 
@@ -834,5 +842,50 @@ private NameMatch match(String name, Rank rank) throws InterruptedException {
     NameMatch m = ni.match(name(name, rank), false, true);
     return m;
   }
-  
+
+
+  /**
+   * Reads a simple ColDP NameUsage file and adds 4 more columns to it:
+   *  1) normalised name
+   *  2) name type
+   *  3) nidx id
+   *  4) nidx canonical id
+   */
+  @Test
+  public void procColDP() throws Exception {
+    File dir = new File("/Users/markus/Downloads/col12");
+    File fni = new File(dir, "NameUsage.tsv");
+    File fno = new File(dir, "NameUsageOut.tsv");
+    try (
+      TabReader reader = TabReader.tab(fni, 1);
+      FileWriter fw = new FileWriter(fno)
+    ) {
+      for (var row : reader) {
+        fw.write(processLine(row) + "\n");
+      }
+    }
+    // report nidx
+    System.out.println("\n\n+++++ NIDX +++++");
+    System.out.println("Total records: " + ni.store().count());
+
+    // avoid large dumps
+    ni.close();
+    ni = null;
+  }
+
+  private String processLine(String[] row) throws UnparsableException, InterruptedException {
+    // col:ID	col:parentID	col:status	col:rank	col:scientificName	col:authorship
+    String[] extra = new String[4];
+    Rank rank = RankParser.PARSER.parse(row[3]).get();
+    Name n = TestEntityGenerator.setUserDate(NameParser.PARSER.parse(row[4], row[5], rank, null, VerbatimRecord.VOID).get().getName());
+    n.setRank(rank);
+    extra[0] = SciNameNormalizer.normalize(row[4], n.getType());
+    var nm = ni.match(n, true, false);
+    extra[1] = n.getType().toString();
+    if (nm.hasMatch()) {
+      extra[2] = nm.getNameKey().toString();
+      extra[3] = nm.getCanonicalNameKey().toString();
+    }
+    return String.join("\t", row) + "\t" + String.join("\t", extra);
+  }
 }
\ No newline at end of file
diff --git a/webservice/src/test/java/life/catalogue/assembly/SectorSyncMergeIT.java b/webservice/src/test/java/life/catalogue/assembly/SectorSyncMergeIT.java
index 5e372ce60e..7a94b1e458 100644
--- a/webservice/src/test/java/life/catalogue/assembly/SectorSyncMergeIT.java
+++ b/webservice/src/test/java/life/catalogue/assembly/SectorSyncMergeIT.java
@@ -83,6 +83,7 @@ public class SectorSyncMergeIT extends SectorSyncTestBase {
   @Parameterized.Parameters
   public static Collection<Object[]> data() {
     return Arrays.asList(new Object[][] {
+      {"ismaridae", List.of("taxref", "dyntaxa", "artsnaven")},
       {"bolyeriidae", List.of("itis", "reptiledb", "uksi", "pbdb")},
       {"myosotis", List.of("taxref", "uksi", "pbdb", "bavaria")},
       {"tetralobus", List.of("wfo", "bouchard", "plazi")},
diff --git a/webservice/src/test/java/life/catalogue/importer/PgImportIT.java b/webservice/src/test/java/life/catalogue/importer/PgImportIT.java
index f421d6536a..ea4748f472 100644
--- a/webservice/src/test/java/life/catalogue/importer/PgImportIT.java
+++ b/webservice/src/test/java/life/catalogue/importer/PgImportIT.java
@@ -620,7 +620,7 @@ public void coldpProperties() throws Exception {
   public void testExternalManually() throws Exception {
     dataset.setType(DatasetType.TAXONOMIC);
 
-    normalizeAndImportFolder(new File("/Users/markus/Downloads/dataset-53133"), DWCA);
+    normalizeAndImportFolder(new File("/Users/markus/Downloads/dataset-307245"), COLDP);
     //normalizeAndImport(URI.create("https://bdj.pensoft.net/lib/ajax_srv/archive_download.php?archive_type=2&document_id=80487"), DWCA);
     //normalizeAndImport(URI.create("https://tb.plazi.org/GgServer/dwca/CB7EFFE7FFD3FFB3E551FFBDFF9C916F.zip"), DWCA);
     //normalizeAndImport(URI.create("https://github.com/mdoering/data-ina/archive/master.zip"), COLDP);
diff --git a/webservice/src/test/resources/txtree/ismaridae/artsnaven.txtree b/webservice/src/test/resources/txtree/ismaridae/artsnaven.txtree
new file mode 100644
index 0000000000..eab0f014a9
--- /dev/null
+++ b/webservice/src/test/resources/txtree/ismaridae/artsnaven.txtree
@@ -0,0 +1,7 @@
+Animalia [kingdom]
+  Arthropoda [phylum]
+    Insecta [class]
+      Hymenoptera [order]
+        Apocrita [suborder]
+          Diaprioidea [superfamily]
+            Ismariidae [family]
\ No newline at end of file
diff --git a/webservice/src/test/resources/txtree/ismaridae/dyntaxa.txtree b/webservice/src/test/resources/txtree/ismaridae/dyntaxa.txtree
new file mode 100644
index 0000000000..7c384dc734
--- /dev/null
+++ b/webservice/src/test/resources/txtree/ismaridae/dyntaxa.txtree
@@ -0,0 +1,10 @@
+Animalia [kingdom]
+  Arthropoda [phylum]
+    Hexapoda [subphylum]
+      Insecta Linnaeus, 1758 [class]
+        Hymenoptera Linnaeus, 1758 [order]
+          Apocrita Gerstaecker, 1867 [suborder]
+            Parasitica [infraorder]
+              Diaprioidea Sharkey, 2007 [superfamily]
+                Ismaridae Thomson, 1858 [family]
+                  =Ismarinae Thomson, 1858 [family]
\ No newline at end of file
diff --git a/webservice/src/test/resources/txtree/ismaridae/expected.txtree b/webservice/src/test/resources/txtree/ismaridae/expected.txtree
new file mode 100644
index 0000000000..f333fd4338
--- /dev/null
+++ b/webservice/src/test/resources/txtree/ismaridae/expected.txtree
@@ -0,0 +1,9 @@
+Biota [unranked]
+  Animalia [kingdom]
+    Arthropoda [phylum]
+      Hexapoda [subphylum]
+        Insecta [class]
+          Hymenoptera [order]
+            Diaprioidea [superfamily]
+              Ismaridae Thomson, 1858 [family]
+                =Ismarinae Thomson, 1858 [family]
diff --git a/webservice/src/test/resources/txtree/ismaridae/project.txtree b/webservice/src/test/resources/txtree/ismaridae/project.txtree
new file mode 100644
index 0000000000..1411b3424f
--- /dev/null
+++ b/webservice/src/test/resources/txtree/ismaridae/project.txtree
@@ -0,0 +1,8 @@
+Biota [unranked]
+  Animalia [kingdom]
+    Arthropoda [phylum]
+      Hexapoda [subphylum]
+        Insecta [class]
+          Hymenoptera [order]
+            Diaprioidea [superfamily]
+              Ismaridae [family]
\ No newline at end of file
diff --git a/webservice/src/test/resources/txtree/ismaridae/readme.md b/webservice/src/test/resources/txtree/ismaridae/readme.md
new file mode 100644
index 0000000000..4a91b83a94
--- /dev/null
+++ b/webservice/src/test/resources/txtree/ismaridae/readme.md
@@ -0,0 +1,2 @@
+Family misspellings of Ismariidae that should be caught be the nidx alone
+https://github.com/CatalogueOfLife/data/issues/889
\ No newline at end of file
diff --git a/webservice/src/test/resources/txtree/ismaridae/taxref.txtree b/webservice/src/test/resources/txtree/ismaridae/taxref.txtree
new file mode 100644
index 0000000000..7cdb8f6a72
--- /dev/null
+++ b/webservice/src/test/resources/txtree/ismaridae/taxref.txtree
@@ -0,0 +1,20 @@
+Biota Endl. (D.Don) [domain]
+  Animalia Linnaeus, 1758 [kingdom]
+    Eumetazoa Bütschli, 1910 [subkingdom]
+      Bilateria Haeckel, 1874 [unranked]
+        Protostomia Grobben, 1908 [infrakingdom]
+          Cuticulata [unranked]
+            Ecdysozoa Aguinaldo, Turbeville, Linford, Rivera, Garey, Raff & Lake, 1997 [unranked]
+              Panarthropoda Nielsen, 1995 [unranked]
+                Arthropoda Latreille, 1829 [phylum]
+                  Pancrustacea Zrzavý & Štys, 1997 [subphylum]
+                    Altocrustacea Regier, Schultz, Zwick, Hussey, Ball, Wetzer, Martin & Cunningham, 2010 [infraphylum]
+                      Hexapoda Blainville, 1816 [superclass]
+                        Insecta Linnaeus, 1758 [class]
+                          Dicondylia Hennig, 1953 [infraclass]
+                            Pterygota Brauer, 1885 [infraclass]
+                              Neoptera Martynov, 1923 [unranked]
+                                Hymenoptera Linnaeus, 1758 [order]
+                                  Apocrita Gerstäcker, 1867 [suborder]
+                                    Diaprioidea Haliday, 1833 [superfamily]
+                                      Ismaridae Thomson, 1858 [family]
\ No newline at end of file