Skip to content

Commit

Permalink
fix a missing serialization case; add option includeRawCopyrights in …
Browse files Browse the repository at this point in the history
…service
  • Loading branch information
kermitt2 committed Feb 4, 2024
1 parent 4359b40 commit af5a20b
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,8 @@ public StringBuilder toTEIHeader(BiblioItem biblio,

if ((biblio.getPublisher() != null) ||
(biblio.getPublicationDate() != null) ||
(biblio.getNormalizedPublicationDate() != null)) {
(biblio.getNormalizedPublicationDate() != null) ||
biblio.getCopyrightsLicense() != null) {
tei.append("\t\t\t<publicationStmt>\n");

CopyrightsLicense copyrightsLicense = biblio.getCopyrightsLicense();
Expand All @@ -273,7 +274,7 @@ public StringBuilder toTEIHeader(BiblioItem biblio,
tei.append("\t\t\t\t<availability ");

boolean addCopyrightsComment = false;
if (copyrightsLicense.getCopyrightsOwner() != null) {
if (copyrightsLicense.getCopyrightsOwner() != null && copyrightsLicense.getCopyrightsOwner() != License.UNDECIDED) {
tei.append("resp=\""+ copyrightsLicense.getCopyrightsOwner().getName() +"\" ");
addCopyrightsComment = true;
}
Expand All @@ -292,21 +293,31 @@ public StringBuilder toTEIHeader(BiblioItem biblio,
tei.append("\t\t\t\t\t<licence/>\n");
}

/*tei.append("<p>Copyright ");
//if (biblio.getPublicationDate() != null)
tei.append(TextUtilities.HTMLEncode(biblio.getPublisher()) + "</p>\n");*/
if (config.getIncludeRawCopyrights() && biblio.getCopyright() != null && biblio.getCopyright().length()>0) {
tei.append("\t\t\t\t\t<p type=\"raw\">");
tei.append(TextUtilities.HTMLEncode(biblio.getCopyright()));
tei.append("</note>\n");
}

tei.append("\t\t\t\t</availability>\n");
} else {
tei.append("\t\t\t\t<availability ");

tei.append(" status=\"unknown\">\n");
tei.append("\t\t\t\t\t<licence/>\n");

if (defaultPublicationStatement == null) {
tei.append(" status=\"unknown\"><licence/></availability>");
} else {
tei.append(" status=\"unknown\"><p>" +
TextUtilities.HTMLEncode(defaultPublicationStatement) + "</p></availability>");
if (defaultPublicationStatement != null) {
tei.append("\t\t\t\t\t<p>" +
TextUtilities.HTMLEncode(defaultPublicationStatement) + "</p>\n");
}
tei.append("\n");

if (config.getIncludeRawCopyrights() && biblio.getCopyright() != null && biblio.getCopyright().length()>0) {
tei.append("\t\t\t\t\t<p type=\"raw\">");
tei.append(TextUtilities.HTMLEncode(biblio.getCopyright()));
tei.append("</note>\n");
}

tei.append("\t\t\t\t</availability>\n");
}

if (biblio.getNormalizedPublicationDate() != null) {
Expand Down
12 changes: 10 additions & 2 deletions grobid-core/src/main/java/org/grobid/core/engines/Engine.java
Original file line number Diff line number Diff line change
Expand Up @@ -350,13 +350,15 @@ public String processHeader(
String inputFile,
int consolidate,
boolean includeRawAffiliations,
boolean includeRawCopyrights,
BiblioItem result
) {
GrobidAnalysisConfig config = new GrobidAnalysisConfig.GrobidAnalysisConfigBuilder()
.startPage(0)
.endPage(2)
.consolidateHeader(consolidate)
.includeRawAffiliations(includeRawAffiliations)
.includeRawCopyrights(includeRawCopyrights)
.build();
return processHeader(inputFile, null, config, result);
}
Expand All @@ -380,12 +382,14 @@ public String processHeaderFunding(
File inputFile,
int consolidateHeader,
int consolidateFunders,
boolean includeRawAffiliations
boolean includeRawAffiliations,
boolean includeRawCopyrights
) throws Exception {
GrobidAnalysisConfig config = new GrobidAnalysisConfig.GrobidAnalysisConfigBuilder()
.consolidateHeader(consolidateHeader)
.consolidateFunders(consolidateFunders)
.includeRawAffiliations(includeRawAffiliations)
.includeRawCopyrights(includeRawCopyrights)
.build();
return processHeaderFunding(inputFile, null, config);
}
Expand All @@ -408,13 +412,15 @@ public String processHeader(
String md5Str,
int consolidate,
boolean includeRawAffiliations,
boolean includeRawCopyrights,
BiblioItem result
) {
GrobidAnalysisConfig config = new GrobidAnalysisConfig.GrobidAnalysisConfigBuilder()
.startPage(0)
.endPage(2)
.consolidateHeader(consolidate)
.includeRawAffiliations(includeRawAffiliations)
.includeRawCopyrights(includeRawCopyrights)
.build();
return processHeader(inputFile, md5Str, config, result);
}
Expand All @@ -440,12 +446,14 @@ public String processHeaderFunding(
String md5Str,
int consolidateHeader,
int consolidateFunders,
boolean includeRawAffiliations
boolean includeRawAffiliations,
boolean includeRawCopyrights
) throws Exception {
GrobidAnalysisConfig config = new GrobidAnalysisConfig.GrobidAnalysisConfigBuilder()
.consolidateHeader(consolidateHeader)
.consolidateFunders(consolidateFunders)
.includeRawAffiliations(includeRawAffiliations)
.includeRawCopyrights(includeRawCopyrights)
.build();
return processHeaderFunding(inputFile, md5Str, config);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ private GrobidAnalysisConfig() {
// if the raw bibliographical string should be included in the parsed results
private boolean includeRawCitations = false;

// if the raw copyrights/license string should be included in the parsed results
private boolean includeRawCopyrights = false;

/// === TEI-specific settings ==

// if true, generate random attribute id on the textual elements of
Expand Down Expand Up @@ -131,6 +134,11 @@ public GrobidAnalysisConfigBuilder includeRawCitations(boolean rawCitations) {
return this;
}

public GrobidAnalysisConfigBuilder includeRawCopyrights(boolean rawCopyrights) {
config.includeRawCopyrights = rawCopyrights;
return this;
}

public GrobidAnalysisConfigBuilder startPage(int p) {
config.startPage = p;
return this;
Expand Down Expand Up @@ -238,6 +246,10 @@ public boolean getIncludeRawCitations() {
return includeRawCitations;
}

public boolean getIncludeRawCopyrights() {
return includeRawCopyrights;
}

public boolean isGenerateTeiIds() {
return generateTeiIds;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ public class GrobidRestService implements GrobidPaths {
public static final String CONSOLIDATE_FUNDERS = "consolidateFunders";
public static final String INCLUDE_RAW_AFFILIATIONS = "includeRawAffiliations";
public static final String INCLUDE_RAW_CITATIONS = "includeRawCitations";
public static final String INCLUDE_RAW_COPYRIGHTS = "includeRawCopyrights";
public static final String INCLUDE_FIGURES_TABLES = "includeFiguresTables";

@Inject
Expand Down Expand Up @@ -156,11 +157,13 @@ public Response getAdmin_htmlGet(@QueryParam(SHA1) String sha1) {
public Response processHeaderDocumentReturnXml_post(
@FormDataParam(INPUT) InputStream inputStream,
@DefaultValue("0") @FormDataParam(CONSOLIDATE_HEADER) String consolidate,
@DefaultValue("0") @FormDataParam(INCLUDE_RAW_AFFILIATIONS) String includeRawAffiliations) {
@DefaultValue("0") @FormDataParam(INCLUDE_RAW_AFFILIATIONS) String includeRawAffiliations,
@DefaultValue("0") @FormDataParam(INCLUDE_RAW_COPYRIGHTS) String includeRawCopyrights) {
int consol = validateConsolidationParam(consolidate);
return restProcessFiles.processStatelessHeaderDocument(
inputStream, consol,
validateIncludeRawParam(includeRawAffiliations),
validateIncludeRawParam(includeRawCopyrights),
ExpectedResponseType.XML
);
}
Expand All @@ -173,12 +176,13 @@ public Response processHeaderFundingDocumentReturnXml_post(
@FormDataParam(INPUT) InputStream inputStream,
@DefaultValue("0") @FormDataParam(CONSOLIDATE_HEADER) String consolidateHeader,
@DefaultValue("0") @FormDataParam(CONSOLIDATE_FUNDERS) String consolidateFunders,
@DefaultValue("0") @FormDataParam(INCLUDE_RAW_AFFILIATIONS) String includeRawAffiliations) {
@DefaultValue("0") @FormDataParam(INCLUDE_RAW_AFFILIATIONS) String includeRawAffiliations,
@DefaultValue("0") @FormDataParam(INCLUDE_RAW_COPYRIGHTS) String includeRawCopyrights) {
int consolHeader = validateConsolidationParam(consolidateHeader);
int consolFunders = validateConsolidationParam(consolidateFunders);
return restProcessFiles.processStatelessHeaderFundingDocument(
inputStream, consolHeader, consolFunders,
validateIncludeRawParam(includeRawAffiliations)
validateIncludeRawParam(includeRawAffiliations), validateIncludeRawParam(includeRawCopyrights)
);
}

Expand All @@ -190,8 +194,9 @@ public Response processHeaderFundingDocumentReturnXml_post(
public Response processStatelessHeaderDocumentReturnXml(
@FormDataParam(INPUT) InputStream inputStream,
@DefaultValue("0") @FormDataParam(CONSOLIDATE_HEADER) String consolidate,
@DefaultValue("0") @FormDataParam(INCLUDE_RAW_AFFILIATIONS) String includeRawAffiliations) {
return processHeaderDocumentReturnXml_post(inputStream, consolidate, includeRawAffiliations);
@DefaultValue("0") @FormDataParam(INCLUDE_RAW_AFFILIATIONS) String includeRawAffiliations,
@DefaultValue("0") @FormDataParam(INCLUDE_RAW_COPYRIGHTS) String includeRawCopyrights) {
return processHeaderDocumentReturnXml_post(inputStream, consolidate, includeRawAffiliations, includeRawCopyrights);
}

@Path(PATH_HEADER)
Expand All @@ -201,11 +206,13 @@ public Response processStatelessHeaderDocumentReturnXml(
public Response processHeaderDocumentReturnBibTeX_post(
@FormDataParam(INPUT) InputStream inputStream,
@DefaultValue("0") @FormDataParam(CONSOLIDATE_HEADER) String consolidate,
@DefaultValue("0") @FormDataParam(INCLUDE_RAW_AFFILIATIONS) String includeRawAffiliations) {
@DefaultValue("0") @FormDataParam(INCLUDE_RAW_AFFILIATIONS) String includeRawAffiliations,
@DefaultValue("0") @FormDataParam(INCLUDE_RAW_COPYRIGHTS) String includeRawCopyrights) {
int consol = validateConsolidationParam(consolidate);
return restProcessFiles.processStatelessHeaderDocument(
inputStream, consol,
validateIncludeRawParam(includeRawAffiliations),
validateIncludeRawParam(includeRawCopyrights),
ExpectedResponseType.BIBTEX
);
}
Expand All @@ -217,8 +224,9 @@ public Response processHeaderDocumentReturnBibTeX_post(
public Response processStatelessHeaderDocumentReturnBibTeX(
@FormDataParam(INPUT) InputStream inputStream,
@DefaultValue("0") @FormDataParam(CONSOLIDATE_HEADER) String consolidate,
@DefaultValue("0") @FormDataParam(INCLUDE_RAW_AFFILIATIONS) String includeRawAffiliations) {
return processHeaderDocumentReturnBibTeX_post(inputStream, consolidate, includeRawAffiliations);
@DefaultValue("0") @FormDataParam(INCLUDE_RAW_AFFILIATIONS) String includeRawAffiliations,
@DefaultValue("0") @FormDataParam(INCLUDE_RAW_COPYRIGHTS) String includeRawCopyrights) {
return processHeaderDocumentReturnBibTeX_post(inputStream, consolidate, includeRawAffiliations, includeRawCopyrights);
}

@Path(PATH_FULL_TEXT)
Expand All @@ -231,6 +239,7 @@ public Response processFulltextDocument_post(
@DefaultValue("0") @FormDataParam(CONSOLIDATE_CITATIONS) String consolidateCitations,
@DefaultValue("0") @FormDataParam(CONSOLIDATE_FUNDERS) String consolidateFunders,
@DefaultValue("0") @FormDataParam(INCLUDE_RAW_AFFILIATIONS) String includeRawAffiliations,
@DefaultValue("0") @FormDataParam(INCLUDE_RAW_COPYRIGHTS) String includeRawCopyrights,
@DefaultValue("0") @FormDataParam(INCLUDE_RAW_CITATIONS) String includeRawCitations,
@DefaultValue("-1") @FormDataParam("start") int startPage,
@DefaultValue("-1") @FormDataParam("end") int endPage,
Expand All @@ -239,7 +248,7 @@ public Response processFulltextDocument_post(
@FormDataParam("teiCoordinates") List<FormDataBodyPart> coordinates) throws Exception {
return processFulltext(
inputStream, consolidateHeader, consolidateCitations, consolidateFunders,
includeRawAffiliations, includeRawCitations,
includeRawAffiliations, includeRawCitations, includeRawCopyrights,
startPage, endPage, generateIDs, segmentSentences, coordinates
);
}
Expand All @@ -254,6 +263,7 @@ public Response processFulltextDocument(
@DefaultValue("0") @FormDataParam(CONSOLIDATE_CITATIONS) String consolidateCitations,
@DefaultValue("0") @FormDataParam(CONSOLIDATE_FUNDERS) String consolidateFunders,
@DefaultValue("0") @FormDataParam(INCLUDE_RAW_AFFILIATIONS) String includeRawAffiliations,
@DefaultValue("0") @FormDataParam(INCLUDE_RAW_COPYRIGHTS) String includeRawCopyrights,
@DefaultValue("0") @FormDataParam(INCLUDE_RAW_CITATIONS) String includeRawCitations,
@DefaultValue("-1") @FormDataParam("start") int startPage,
@DefaultValue("-1") @FormDataParam("end") int endPage,
Expand All @@ -262,7 +272,7 @@ public Response processFulltextDocument(
@FormDataParam("teiCoordinates") List<FormDataBodyPart> coordinates) throws Exception {
return processFulltext(
inputStream, consolidateHeader, consolidateCitations, consolidateFunders,
includeRawAffiliations, includeRawCitations,
includeRawAffiliations, includeRawCitations, includeRawCopyrights,
startPage, endPage, generateIDs, segmentSentences, coordinates
);
}
Expand All @@ -273,6 +283,7 @@ private Response processFulltext(InputStream inputStream,
String consolidateFunders,
String includeRawAffiliations,
String includeRawCitations,
String includeRawCopyrights,
int startPage,
int endPage,
String generateIDs,
Expand All @@ -291,7 +302,7 @@ private Response processFulltext(InputStream inputStream,
return restProcessFiles.processFulltextDocument(
inputStream, consolHeader, consolCitations, consolFunders,
validateIncludeRawParam(includeRawAffiliations),
includeRaw,
includeRaw, validateIncludeRawParam(includeRawCopyrights),
startPage, endPage, generate, segment, teiCoordinates
);
}
Expand Down Expand Up @@ -341,6 +352,7 @@ public Response processFulltextAssetDocument_post(
@DefaultValue("0") @FormDataParam(CONSOLIDATE_CITATIONS) String consolidateCitations,
@DefaultValue("0") @FormDataParam(CONSOLIDATE_FUNDERS) String consolidateFunders,
@DefaultValue("0") @FormDataParam(INCLUDE_RAW_AFFILIATIONS) String includeRawAffiliations,
@DefaultValue("0") @FormDataParam(INCLUDE_RAW_COPYRIGHTS) String includeRawCopyrights,
@DefaultValue("0") @FormDataParam(INCLUDE_RAW_CITATIONS) String includeRawCitations,
@DefaultValue("-1") @FormDataParam("start") int startPage,
@DefaultValue("-1") @FormDataParam("end") int endPage,
Expand All @@ -349,7 +361,7 @@ public Response processFulltextAssetDocument_post(
@FormDataParam("teiCoordinates") List<FormDataBodyPart> coordinates) throws Exception {
return processStatelessFulltextAssetHelper(
inputStream, consolidateHeader, consolidateCitations, consolidateFunders,
includeRawAffiliations, includeRawCitations,
includeRawAffiliations, includeRawCitations, includeRawCopyrights,
startPage, endPage, generateIDs, segmentSentences, coordinates
);
}
Expand All @@ -364,6 +376,7 @@ public Response processStatelessFulltextAssetDocument(
@DefaultValue("0") @FormDataParam(CONSOLIDATE_CITATIONS) String consolidateCitations,
@DefaultValue("0") @FormDataParam(CONSOLIDATE_FUNDERS) String consolidateFunders,
@DefaultValue("0") @FormDataParam(INCLUDE_RAW_AFFILIATIONS) String includeRawAffiliations,
@DefaultValue("0") @FormDataParam(INCLUDE_RAW_COPYRIGHTS) String includeRawCopyrights,
@DefaultValue("0") @FormDataParam(INCLUDE_RAW_CITATIONS) String includeRawCitations,
@DefaultValue("-1") @FormDataParam("start") int startPage,
@DefaultValue("-1") @FormDataParam("end") int endPage,
Expand All @@ -372,7 +385,7 @@ public Response processStatelessFulltextAssetDocument(
@FormDataParam("teiCoordinates") List<FormDataBodyPart> coordinates) throws Exception {
return processStatelessFulltextAssetHelper(
inputStream, consolidateHeader, consolidateCitations, consolidateFunders,
includeRawAffiliations, includeRawCitations,
includeRawAffiliations, includeRawCitations, includeRawCopyrights,
startPage, endPage, generateIDs, segmentSentences, coordinates
);
}
Expand All @@ -382,6 +395,7 @@ private Response processStatelessFulltextAssetHelper(InputStream inputStream,
String consolidateCitations,
String consolidateFunders,
String includeRawAffiliations,
String includeRawCopyrights,
String includeRawCitations,
int startPage,
int endPage,
Expand All @@ -400,7 +414,7 @@ private Response processStatelessFulltextAssetHelper(InputStream inputStream,
return restProcessFiles.processStatelessFulltextAssetDocument(
inputStream, consolHeader, consolCitations, consolFunders,
validateIncludeRawParam(includeRawAffiliations),
includeRaw,
includeRaw, validateIncludeRawParam(includeRawCopyrights),
startPage, endPage, generate, segment, teiCoordinates
);
}
Expand Down
Loading

0 comments on commit af5a20b

Please sign in to comment.