From c8302316c9007a03edaf77b9e20d2f6c623877b2 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Sat, 7 Dec 2024 07:11:00 +0000 Subject: [PATCH] fix wrong upper indexes, cleanup --- .../grobid/core/engines/FullTextParser.java | 20 +------------------ 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/grobid-core/src/main/java/org/grobid/core/engines/FullTextParser.java b/grobid-core/src/main/java/org/grobid/core/engines/FullTextParser.java index 8a1f342712..0523d98c23 100755 --- a/grobid-core/src/main/java/org/grobid/core/engines/FullTextParser.java +++ b/grobid-core/src/main/java/org/grobid/core/engines/FullTextParser.java @@ -374,18 +374,6 @@ private static String revertResultsForBadItems(List badItems, List rawLayoutTokenTable = badItem.getLayoutTokens(); LayoutToken firstLayoutTokenItem = rawLayoutTokenTable.get(0); -// final List documentTokenization = layoutTokenization.getTokenization(); - -// int tokenIndex = IntStream.range(0, documentTokenization.size()) -// .filter(i -> { -// LayoutToken l = documentTokenization.get(i); -// return l.getText().equals(firstLayoutTokenTable.getText()) -// && l.getPage() == firstLayoutTokenTable.getPage() -// && l.getOffset() == firstLayoutTokenTable.getOffset(); -// }) -// .findFirst() -// .orElse(-1); - List candidateIndexes = IntStream.range(0, splitResult.size()) .filter(i -> splitResult.get(i).get(0).equals(firstLayoutTokenItem.getText()) && Iterables.getLast(splitResult.get(i)).equals("I-"+itemLabel)) @@ -435,7 +423,7 @@ private static String revertResultsForBadItems(List badItems, if (resultIndexCandidate > -1) { boolean first = true; - for (int i = resultIndexCandidate;i < resultIndexCandidate + tokensNoSpace.size(); i++) { + for (int i = resultIndexCandidate;i < Math.min(resultIndexCandidate + tokensNoSpace.size(), splitResult.size()); i++) { List line = splitResult.get(i); String label = Iterables.getLast(line); if (first) { @@ -450,12 +438,6 @@ private static String revertResultsForBadItems(List badItems, } else { LOGGER.warn("Cannot find the result index candidate."); } -// List> badTableResult = Arrays.stream(badTable.getRawLayoutTokens().stream() -// .map(LayoutToken::getText) -// .toArray(String[]::new)) -// .map(l -> Arrays.stream(l.split("\t")).collect(Collectors.toList())) -// .collect(Collectors.toList()); -// } String resultBody2 = splitResult.stream()