Skip to content

Commit

Permalink
remove unnecessary layout tokens list, renaming stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
lfoppiano committed Dec 6, 2024
1 parent 7780804 commit e0e217d
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 16 deletions.
10 changes: 0 additions & 10 deletions grobid-core/src/main/java/org/grobid/core/data/Figure.java
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,6 @@ public boolean apply(GraphicObject graphicObject) {
private List<BoundingBox> textArea;
private List<LayoutToken> layoutTokens;

// Contains the raw layoutTokens from the fulltext model
protected List<LayoutToken> rawLayoutTokens = new ArrayList<>();

// coordinates
private int page = -1;
private double y = 0.0;
Expand Down Expand Up @@ -576,11 +573,4 @@ public void setUri(URI uri) {
this.uri = uri;
}

public List<LayoutToken> getRawLayoutTokens() {
return rawLayoutTokens;
}

public void setRawLayoutTokens(List<LayoutToken> rawLayoutTokens) {
this.rawLayoutTokens = rawLayoutTokens;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -369,10 +369,10 @@ private static String revertResultsForBadItems(List<? extends Figure> badItems,
.map(l -> Arrays.stream(l.split("\t")).collect(Collectors.toList()))
.collect(Collectors.toList());

for (Figure badTable : badItems) {
for (Figure badItem : badItems) {
// Find the index of the first layoutToken of the table in the tokenization
List<LayoutToken> rawLayoutTokenTable = badTable.getRawLayoutTokens();
LayoutToken firstLayoutTokenTable = rawLayoutTokenTable.get(0);
List<LayoutToken> rawLayoutTokenTable = badItem.getLayoutTokens();
LayoutToken firstLayoutTokenItem = rawLayoutTokenTable.get(0);

// final List<LayoutToken> documentTokenization = layoutTokenization.getTokenization();

Expand All @@ -387,14 +387,14 @@ private static String revertResultsForBadItems(List<? extends Figure> badItems,
// .orElse(-1);

List<Integer> candidateIndexes = IntStream.range(0, splitResult.size())
.filter(i -> splitResult.get(i).get(0).equals(firstLayoutTokenTable.getText())
.filter(i -> splitResult.get(i).get(0).equals(firstLayoutTokenItem.getText())
&& Iterables.getLast(splitResult.get(i)).equals("I-"+itemLabel))
.boxed()
.collect(Collectors.toList());

if (candidateIndexes.isEmpty()) {
candidateIndexes = IntStream.range(0, splitResult.size())
.filter(i -> splitResult.get(i).get(0).equals(firstLayoutTokenTable.getText())
.filter(i -> splitResult.get(i).get(0).equals(firstLayoutTokenItem.getText())
&& Iterables.getLast(splitResult.get(i)).equals(itemLabel))
.boxed()
.collect(Collectors.toList());
Expand Down Expand Up @@ -2289,7 +2289,7 @@ protected List<Table> processTables(String rese,

for (Table result : localResults) {
List<LayoutToken> localTokenizationTable = result.getLayoutTokens();
result.setRawLayoutTokens(tokenizationTable);
// result.setRawLayoutTokens(tokenizationTable);

// block setting: we restrict to the tokenization of this particular table
SortedSet<Integer> blockPtrs = new TreeSet<>();
Expand Down

0 comments on commit e0e217d

Please sign in to comment.