Skip to content

Commit

Permalink
update XML schema
Browse files Browse the repository at this point in the history
  • Loading branch information
kermitt2 committed Feb 9, 2024
1 parent 86b03e8 commit 16b9abb
Show file tree
Hide file tree
Showing 9 changed files with 26,321 additions and 12,087 deletions.
4,754 changes: 814 additions & 3,940 deletions grobid-home/schemas/doc/Grobid_doc.html

Large diffs are not rendered by default.

295 changes: 208 additions & 87 deletions grobid-home/schemas/dtd/Grobid.dtd

Large diffs are not rendered by default.

19,285 changes: 19,285 additions & 0 deletions grobid-home/schemas/odd/Grobid.compiled.odd

Large diffs are not rendered by default.

76 changes: 40 additions & 36 deletions grobid-home/schemas/odd/Grobid.odd
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<?xml version="1.0"?>
<?xml version="1.0" encoding="UTF-8"?>
<TEI xmlns="http://www.tei-c.org/ns/1.0" xmlns:rng="http://relaxng.org/ns/structure/1.0" xml:lang="en">
<teiHeader>
<fileDesc>
Expand All @@ -15,7 +15,7 @@
<sourceDesc>
<p>created on Sunday 05th October 2014 06:25:09 AM</p>
</sourceDesc>
</fileDesc>
</fileDesc><encodingDesc><appInfo><application ident="RomaJS" version="1.0.0" when="2024-02-08T19:56:37.127Z"><desc>File edited with <ref target="https://github.com/TEIC/romajs">RomaJS</ref></desc></application></appInfo></encodingDesc>
</teiHeader>
<text>
<front>
Expand All @@ -24,37 +24,23 @@
<body>
<p>Schema for structured documents resulting from the automatic extraction and restructuring realized by
Grobid.</p>
<schemaSpec ident="Grobid" docLang="en" prefix="tei_" xml:lang="en">
<schemaSpec ident="Grobid" docLang="en" prefix="tei_" xml:lang="en" targetLang="en">
<!--moduleRef
url="http://www.tei-c.org/release/xml/tei/custom/schema/relaxng/mathml2-main.rng"/-->
<moduleRef key="core"
except="abbr add addName binaryObject cb choice cit citedRange corr del distinct divGen emph expan
foreign gap gb gloss headItem headLabel index l lb lg measure measureGrp media mentioned milestone num orig pb q
quote reg relatedItem resp respStmt rs said sic soCalled sp speaker stage teiCorpus textLang time unclear"/>
<moduleRef key="core" except="abbr add addName binaryObject cb choice cit citedRange corr del distinct divGen emph expan foreign gap gb gloss headItem headLabel index l lb lg measure measureGrp media mentioned milestone num orig pb q quote reg relatedItem resp respStmt rs said sic soCalled sp speaker stage teiCorpus textLang time unclear"/>
<moduleRef key="tei" except=""/>
<moduleRef key="header"
except="authority biblFull cRefPattern calendar calendarDesc catDesc catRef category change classDecl correction
creation distributor editorialDecl extent funder geoDecl handNote hyphenation interpretation langUsage language listChange listPrefixDef
namespace normalization prefixDef principal projectDesc punctuation quotation refState refsDecl rendition revisionDesc samplingDecl
scriptNote segmentation sponsor stdVals styleDefDecl tagUsage tagsDecl taxonomy typeNote"/>
<moduleRef key="textstructure"
except="argument byline closer dateline div1 div2 div3 div4 div5 div6 div7
docAuthor docDate docEdition docImprint docTitle epigraph floatingText group imprimatur opener postscript
salute signed titlePage titlePart titlePart trailer"/>
<moduleRef key="namesdates"
except="age birth bloc climate death district education event faith floruit genName geo geogFeat
geogName langKnowledge langKnown listEvent listNym listOrg listPerson listPlace listRelation location nameLink nationality
nym occupation offset person personGrp place placeName population relation residence sex socecStatus terrain trait"/>
<moduleRef key="header" except="authority biblFull cRefPattern calendar calendarDesc catDesc catRef category classDecl correction creation distributor editorialDecl extent funder geoDecl handNote hyphenation interpretation langUsage language listChange listPrefixDef namespace normalization prefixDef principal projectDesc punctuation quotation refState refsDecl rendition samplingDecl scriptNote segmentation sponsor stdVals styleDefDecl tagUsage tagsDecl taxonomy typeNote"/>
<moduleRef key="textstructure" except="argument byline closer dateline div1 div2 div3 div4 div5 div6 div7 docAuthor docDate docEdition docImprint docTitle epigraph floatingText group imprimatur opener postscript salute signed titlePage titlePart titlePart trailer"/>
<moduleRef key="namesdates" except="age birth bloc climate death district education event faith floruit geo geogFeat geogName langKnowledge langKnown listEvent listNym listOrg listPerson listPlace listRelation location nameLink nationality nym occupation offset person personGrp place placeName population relation residence sex socecStatus terrain trait"/>
<moduleRef key="linking" except="ab alt altGrp join joinGrp linkGrp seg state timeline"/>
<moduleRef key="figures" except="notatedMusic"/>
<moduleRef key="transcr"
except="addSpan am damage damageSpan delSpan ex fw handNotes handShift line listTranspose metamark mod path redo restore retrace secl sourceDoc space subst substJoin supplied surfaceGrp surplus transpose undo zone"/>
<moduleRef key="transcr" except="addSpan am damage damageSpan delSpan ex fw handNotes handShift line listTranspose metamark mod path redo restore retrace secl sourceDoc space subst substJoin supplied surfaceGrp surplus transpose undo zone"/>
<!-- only s from the analysis module, in case we want the final result with sentence segmentation -->
<!-- used to be except="c cl interp interpGrp m pc phr span spanGrp w" -->
<moduleRef key="analysis" include="s"/>

<!-- Inclusion of the MathML schema -->
<moduleRef url="https://www.tei-c.org/release/xml/tei/Exemplars/mathml2-main.rng"/>


<!-- formula can contains raw text or graphics, and formula can appear under div, because often completely outside a paragraph -->
<elementSpec ident="formula" mode="change">
Expand Down Expand Up @@ -115,21 +101,17 @@
<elementSpec ident="imprint" mode="change" module="core">
<content>
<sequence>
<alternate minOccurs="0"
maxOccurs="unbounded">
<alternate minOccurs="0" maxOccurs="unbounded">
<elementRef key="classCode"/>
<elementRef key="catRef"/>
</alternate>
<sequence minOccurs="0"
maxOccurs="unbounded">
<sequence minOccurs="0" maxOccurs="unbounded">
<alternate minOccurs="0" maxOccurs="unbounded">
<classRef key="model.imprintPart"/>
<classRef key="model.dateLike"/>
</alternate>
<elementRef key="respStmt" minOccurs="0"
maxOccurs="unbounded"/>
<classRef key="model.global"
minOccurs="0" maxOccurs="unbounded"/>
<elementRef key="respStmt" minOccurs="0" maxOccurs="unbounded"/>
<classRef key="model.global" minOccurs="0" maxOccurs="unbounded"/>
</sequence>
</sequence>
</content>
Expand All @@ -139,8 +121,7 @@
<!-- structured abstract is an obvious need, see https://github.com/TEIC/TEI/issues/548 -->
<elementSpec ident="abstract" mode="change" module="header">
<content>
<alternate minOccurs="0"
maxOccurs="unbounded">
<alternate minOccurs="0" maxOccurs="unbounded">
<classRef key="model.pLike"/>
<classRef key="model.listLike"/>
<classRef key="model.divLike"/>
Expand All @@ -153,8 +134,7 @@
<content>
<alternate minOccurs="0">
<textNode/>
<elementRef key="term" minOccurs="1"
maxOccurs="unbounded"/>
<elementRef key="term" minOccurs="1" maxOccurs="unbounded"/>
<elementRef key="list"/>
</alternate>
</content>
Expand Down Expand Up @@ -264,7 +244,31 @@
</attDef>
</attList>
</classSpec>

<elementSpec ident="availability" mode="change">
<attList>
<attDef ident="resp" mode="change">
<datatype>
<dataRef name="string"/>
</datatype>
<valList type="closed" mode="change">
<valItem mode="add" ident="authors">
<desc versionDate="2024-02-08" xml:lang="en">Copyrights owner of the document are the authors</desc>
</valItem>
<valItem mode="add" ident="publisher">
<desc versionDate="2024-02-08" xml:lang="en">Copyrights owner of the document is the publisher</desc>
</valItem>
</valList>
</attDef>
</attList>
</elementSpec>

<elementSpec ident="s" mode="change">
<classes mode="change">
<memberOf key="model.pLike"/>
</classes>
</elementSpec>
</schemaSpec>
</body>
</text>
</TEI>
</TEI>
Binary file modified grobid-home/schemas/rng/Grobid.rnc
Binary file not shown.
Loading

0 comments on commit 16b9abb

Please sign in to comment.