Skip to content

Commit

Permalink
fix for training data generation with updated affiliation-address parser
Browse files Browse the repository at this point in the history
  • Loading branch information
kermitt2 committed Dec 21, 2023
1 parent a9b6826 commit e235c88
Showing 1 changed file with 8 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1364,9 +1364,15 @@ public Document createTraining(File inputFile,
}

// buffer for the affiliation+address block
List<LayoutToken> tokenizationsAffiliation = resHeader.getLayoutTokens(TaggingLabels.HEADER_AFFILIATION);

List<List<LayoutToken>> tokenizationsAffiliation = resHeader.getAffiliationAddresslabeledTokens();
//List<LayoutToken> tokenizationsAffiliation = resHeader.getLayoutTokens(TaggingLabels.HEADER_AFFILIATION);
List<LayoutToken> tokenizationAffiliation = new ArrayList<>();
for (List<LayoutToken> tokenization : tokenizationsAffiliation) {
tokenizationAffiliation.addAll(tokenization);
}
StringBuilder bufferAffiliation =
parsers.getAffiliationAddressParser().trainingExtraction(tokenizationsAffiliation);
parsers.getAffiliationAddressParser().trainingExtraction(tokenizationAffiliation);

// buffer for the date block
StringBuilder bufferDate = null;
Expand Down

0 comments on commit e235c88

Please sign in to comment.