Skip to content

Commit

Permalink
Introduce uniform multi-value separator.
Browse files Browse the repository at this point in the history
  • Loading branch information
TomazErjavec committed Apr 9, 2024
1 parent fd0012a commit 524dae3
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 52 deletions.
36 changes: 5 additions & 31 deletions Build/Scripts/parlamint2cnt-overview.xsl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
exclude-result-prefixes="fn et tei xs xi"
version="2.0">

<xsl:import href="parlamint-lib.xsl"/>

<xsl:output method="text" encoding="utf-8"/>

<!-- What to output the table as -->
Expand Down Expand Up @@ -76,7 +78,7 @@
<xsl:value-of select="$header-row"/>
</xsl:when>
<xsl:otherwise>
<xsl:message terminate="yes">Parameter 'mode' should be either TSV of TeX.</xsl:message>
<xsl:message terminate="yes">FATAL ERROR: parameter 'mode' should be either TSV of TeX.</xsl:message>
</xsl:otherwise>
</xsl:choose>

Expand Down Expand Up @@ -149,8 +151,6 @@
<xsl:variable name="terms" select="count(.//tei:titleStmt/tei:meeting[@corresp=$corpus]
[contains(@ana, 'parla.term')])"/>
<xsl:choose>
<!-- DK does not properly list terms, cf. https://github.com/clarin-eric/ParlaMint/issues/828) -->
<xsl:when test="$terms = 0 and $corpus = '#ParlaMint-DK'">4</xsl:when>
<xsl:when test="$terms = 0">-</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$terms"/>
Expand All @@ -161,8 +161,8 @@
<xsl:variable name="date">
<xsl:copy-of select=".//tei:settingDesc/tei:setting[@corresp=$corpus]/tei:date"/>
</xsl:variable>
<xsl:variable name="from" select="et:pad-date($date/tei:date/@from)"/>
<xsl:variable name="to" select="et:pad-date($date/tei:date/@to)"/>
<xsl:variable name="from" select="et:norm-date($date/tei:date/@from)"/>
<xsl:variable name="to" select="et:norm-date($date/tei:date/@to)"/>
<xsl:value-of select="replace($from, '-\d\d$', '')"/>
<xsl:value-of select="$col-sep"/>
<xsl:value-of select="replace($to, '-\d\d$', '')"/>
Expand All @@ -179,30 +179,4 @@
<xsl:value-of select="$line-sep"/>
</xsl:template>

<!-- Fix too long or too short dates
a la "2013-10-26T14:00:00" or "2018" to xs:date e.g. 2018-01-01 -->
<xsl:function name="et:pad-date">
<xsl:param name="date"/>
<xsl:choose>
<xsl:when test="matches($date, '^\d\d\d\d-\d\d-\d\dT.+$')">
<xsl:value-of select="substring-before($date, 'T')"/>
</xsl:when>
<xsl:when test="matches($date, '^\d\d\d\d-\d\d-\d\d$')">
<xsl:value-of select="$date"/>
</xsl:when>
<xsl:when test="matches($date, '^\d\d\d\d-\d\d$')">
<xsl:value-of select="concat($date, '-01')"/>
</xsl:when>
<xsl:when test="matches($date, '^\d\d\d\d$')">
<xsl:value-of select="concat($date, '-01-01')"/>
</xsl:when>
<xsl:otherwise>
<xsl:message terminate="yes">
<xsl:text>ERROR: bad date </xsl:text>
<xsl:value-of select="$date"/>
</xsl:message>
</xsl:otherwise>
</xsl:choose>
</xsl:function>

</xsl:stylesheet>
36 changes: 15 additions & 21 deletions Scripts/parlamint-lib.xsl
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,9 @@
<!-- Filename of corpus root containing the corpus-wide metadata -->
<xsl:param name="meta"/>

<!-- Separator for multi-valued (parliamentary) "body" attribute; must have only one char -->
<xsl:param name="body-separator">|</xsl:param>

<!-- Separator for multi-valued (speech) "topic" attribute; must have only one char -->
<xsl:param name="topic-separator">|</xsl:param>

<!-- Separator for multi-valued semantic attributes; must have only one char -->
<xsl:param name="sem-separator">|</xsl:param>

<!-- Separator for multi-valued attributes in vertical and TSV files; must have only one char! -->
<xsl:param name="multi-separator">|</xsl:param>

<!-- Output label for MPs and non-MPs (in vertical and metadata output) -->
<xsl:param name="mp-label">MP</xsl:param>
<xsl:param name="nonmp-label">notMP</xsl:param>
Expand Down Expand Up @@ -85,7 +79,7 @@
<xsl:variable name="joined-subtitles">
<xsl:variable name="j-s">
<xsl:for-each select="$subtitles/self::tei:*">
<xsl:value-of select="concat(., $body-separator)"/>
<xsl:value-of select="concat(., $multi-separator)"/>
</xsl:for-each>
</xsl:variable>
<xsl:value-of select="replace($j-s, '.$', '')"/>
Expand Down Expand Up @@ -277,11 +271,11 @@
$body-en = 'Upper house' or
$body-en = 'Lower house' or
$body-en = 'Committee'">
<xsl:if test="contains($body, $body-separator)">
<xsl:message select="concat('ERROR: ', $body, ' should not contain ', $body-separator)"/>
<xsl:if test="contains($body, $multi-separator)">
<xsl:message select="concat('ERROR: ', $body, ' should not contain ', $multi-separator)"/>
</xsl:if>
<xsl:value-of select="$body"/>
<xsl:value-of select="$body-separator"/>
<xsl:value-of select="$multi-separator"/>
</xsl:if>
</xsl:if>
</xsl:for-each>
Expand Down Expand Up @@ -850,16 +844,16 @@
<xsl:value-of select="true()"/>
</xsl:when>
<xsl:when test="normalize-space($from) and normalize-space($to) and
xs:date(et:pad-date($date)) &gt;= xs:date(et:pad-date($from)) and
xs:date(et:pad-date($date)) &lt;= xs:date(et:pad-date($to))">
xs:date(et:norm-date($date)) &gt;= xs:date(et:norm-date($from)) and
xs:date(et:norm-date($date)) &lt;= xs:date(et:norm-date($to))">
<xsl:value-of select="true()"/>
</xsl:when>
<xsl:when test="not(normalize-space($from)) and normalize-space($to) and
xs:date(et:pad-date($date)) &lt;= xs:date(et:pad-date($to))" >
xs:date(et:norm-date($date)) &lt;= xs:date(et:norm-date($to))" >
<xsl:value-of select="true()"/>
</xsl:when>
<xsl:when test="normalize-space($from) and not(normalize-space($to)) and
xs:date(et:pad-date($date)) &gt;= xs:date(et:pad-date($from))" >
xs:date(et:norm-date($date)) &gt;= xs:date(et:norm-date($from))" >
<xsl:value-of select="true()"/>
</xsl:when>
<xsl:otherwise>
Expand All @@ -868,9 +862,9 @@
</xsl:choose>
</xsl:function>

<!-- Fix too long or too short dates
<!-- Normalize too long or too short dates
a la "2013-10-26T14:00:00" or "2018" to xs:date e.g. 2018-01-01 -->
<xsl:function name="et:pad-date">
<xsl:function name="et:norm-date">
<xsl:param name="date"/>
<xsl:choose>
<xsl:when test="matches($date, '^\d\d\d\d-\d\d-\d\dT.+$')">
Expand Down Expand Up @@ -1002,7 +996,7 @@
<xsl:for-each select="tokenize($element/@ana, ' ')">
<!-- Here we a) assume that the catDesc is only in English and b) that the extended pointer resolves to a local reference -->
<xsl:value-of select="key('id', substring-after(., ':'), $rootHeader)/tei:catDesc/tei:term"/>
<xsl:value-of select="$sem-separator"/>
<xsl:value-of select="$multi-separator"/>
</xsl:for-each>
</xsl:variable>
<xsl:value-of select="replace($terms, '.$', '')"/>
Expand All @@ -1013,7 +1007,7 @@
<xsl:for-each select="tokenize($element/@ana, ' ')">
<!-- Here we a) assume that the catDesc is only in English and b) that the extended pointer resolves to a local reference -->
<xsl:value-of select="key('id', substring-after(., ':'), $rootHeader)/normalize-space(tei:catDesc)"/>
<xsl:value-of select="$sem-separator"/>
<xsl:value-of select="$multi-separator"/>
</xsl:for-each>
</xsl:variable>
<xsl:value-of select="replace($glosses, '.$', '')"/>
Expand Down

0 comments on commit 524dae3

Please sign in to comment.