Skip to content

Commit

Permalink
proper affiliation coordinates
Browse files Browse the repository at this point in the history
  • Loading branch information
kermitt2 committed Dec 13, 2023
1 parent 8ade3c0 commit d7b2ff1
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 8 deletions.
22 changes: 21 additions & 1 deletion grobid-core/src/main/java/org/grobid/core/data/Affiliation.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import org.grobid.core.layout.LayoutToken;
import org.grobid.core.utilities.OffsetPosition;
import org.grobid.core.engines.label.TaggingLabel;
import org.grobid.core.engines.config.GrobidAnalysisConfig;
import org.grobid.core.utilities.LayoutTokensUtil;

import java.util.*;

Expand Down Expand Up @@ -355,7 +357,7 @@ public void setLayoutTokens(List<LayoutToken> tokens) {

public void appendLayoutTokens(List<LayoutToken> tokens) {
if (this.layoutTokens == null)
layoutTokens = new ArrayList<>();
this.layoutTokens = new ArrayList<>();
this.layoutTokens.addAll(tokens);
}

Expand Down Expand Up @@ -572,11 +574,29 @@ public String toTEI() {
}*/

public static String toTEI(Affiliation aff, int nbTag) {
return toTEI(aff, nbTag, null);
}

public static String toTEI(Affiliation aff, int nbTag, GrobidAnalysisConfig config) {
StringBuffer tei = new StringBuffer();
TextUtilities.appendN(tei, '\t', nbTag + 1);

boolean withAffCoords = (config != null) &&
(config.getGenerateTeiCoordinates() != null) &&
(config.getGenerateTeiCoordinates().contains("affiliation"));
boolean orgNameCoords = (config != null) &&
(config.getGenerateTeiCoordinates() != null) &&
(config.getGenerateTeiCoordinates().contains("orgName"));

tei.append("<affiliation");
if (aff.getKey() != null)
tei.append(" key=\"").append(aff.getKey()).append("\"");
if (withAffCoords) {
String coords = LayoutTokensUtil.getCoordsString(aff.getLayoutTokens());
if (coords != null && coords.length()>0) {
tei.append(" coord=\"" + coords + "\"");
}
}
tei.append(">\n");

if (aff.getDepartments() != null) {
Expand Down
15 changes: 9 additions & 6 deletions grobid-core/src/main/java/org/grobid/core/data/BiblioItem.java
Original file line number Diff line number Diff line change
Expand Up @@ -3779,17 +3779,20 @@ private void appendAffiliation(
GrobidAnalysisConfig config,
Lexicon lexicon
) {
boolean affiliationWithCoords = (config.getGenerateTeiCoordinates() != null) && (config.getGenerateTeiCoordinates().contains("affiliation"));
boolean orgnameWithCoords = (config.getGenerateTeiCoordinates() != null) && (config.getGenerateTeiCoordinates().contains("orgName"));
boolean affiliationWithCoords = (config != null) &&
(config.getGenerateTeiCoordinates() != null) &&
(config.getGenerateTeiCoordinates().contains("affiliation"));
boolean orgnameWithCoords = (config != null) &&
(config.getGenerateTeiCoordinates() != null) &&
(config.getGenerateTeiCoordinates().contains("orgName"));

TextUtilities.appendN(tei, '\t', nbTag);
tei.append("<affiliation");
if (aff.getKey() != null)
tei.append(" key=\"").append(aff.getKey()).append("\"");
if (affiliationWithCoords) {
// we serialize the coordinates for the whole affiliation block
List<LayoutToken> affTokens = aff.getLayoutTokens();
String coords = LayoutTokensUtil.getCoordsString(affTokens);
String coords = LayoutTokensUtil.getCoordsString(aff.getLayoutTokens());
if (coords != null && coords.length()>0) {
tei.append(" coord=\"" + coords + "\"");
}
Expand Down Expand Up @@ -3824,8 +3827,8 @@ private void appendAffiliation(
int q = 1;
for (String depa : aff.getDepartments()) {
TextUtilities.appendN(tei, '\t', nbTag + 1);
tei.append("<orgName type=\"department\" key=\"dep" + q + "\">" +
TextUtilities.HTMLEncode(depa) + "</orgName>\n");
tei.append("<orgName type=\"department\" key=\"dep" + q + "\"");
tei.append(">" +TextUtilities.HTMLEncode(depa) + "</orgName>\n");
q++;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,10 @@ protected List<Affiliation> resultExtractionLayoutTokens(String result,
}
affiliation.addLabeledResult(TaggingLabels.AFFILIATION_ADDRESSLINE, tokens);
}

if (!clusterLabel.equals(TaggingLabels.OTHER) && affiliation.isNotNull()) {
affiliation.appendLayoutTokens(tokens);
}
}

// last affiliation
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,8 @@ public class TaggingLabels {
public static final TaggingLabel AFFILIATION_REGION = new TaggingLabelImpl(GrobidModels.AFFILIATION_ADDRESS, REGION_LABEL);
public static final TaggingLabel AFFILIATION_SETTLEMENT = new TaggingLabelImpl(GrobidModels.AFFILIATION_ADDRESS, SETTLEMENT_LABEL);
public static final TaggingLabel AFFILIATION_ADDRESSLINE = new TaggingLabelImpl(GrobidModels.AFFILIATION_ADDRESS, ADDRESSLINE_LABEL);

public static final TaggingLabel AFFILIATION_OTHER = new TaggingLabelImpl(GrobidModels.AFFILIATION_ADDRESS, OTHER_LABEL);

protected static void register(TaggingLabel label) {
cache.putIfAbsent(new Pair<>(label.getGrobidModel(), label.getLabel()), label);
}
Expand Down Expand Up @@ -455,6 +456,7 @@ protected static void register(TaggingLabel label) {
register(AFFILIATION_REGION);
register(AFFILIATION_SETTLEMENT);
register(AFFILIATION_ADDRESSLINE);
register(AFFILIATION_OTHER);
}

protected TaggingLabels() {
Expand Down

0 comments on commit d7b2ff1

Please sign in to comment.