Skip to content

Commit

Permalink
Merge pull request #28 from dracor-org/20-speakers
Browse files Browse the repository at this point in the history
Improve transformation of `who` attributes
  • Loading branch information
cmil authored Sep 2, 2024
2 parents 9183657 + 7cef7ae commit 9d0ab6b
Show file tree
Hide file tree
Showing 162 changed files with 950 additions and 1,479 deletions.
65 changes: 14 additions & 51 deletions tc2dracor.xq
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ declare variable $id-map := doc('ids.xml');
(: XML document mapping original author info to normalized author names :)
declare variable $author-map := doc('authors.xml');

declare variable $who-tokenize-pattern := '/|,';
declare variable $who-tokenize-pattern := '\s*(/|,|_)\s*';

declare variable $comedy-genres := (
"comédie ballet",
Expand Down Expand Up @@ -170,18 +170,10 @@ declare function local:attribute-to-comment($node as attribute()+) {
};

declare function local:translate($string as xs:string) as xs:string {
let $work :=
translate(lower-case($string), "*[]’' áàâéèêíìîóòôúùû", '------aaaeeeiiiooouuu')
=> replace('\.', '')
=> replace('^\-', '')
=> replace('^(\d)', 'num') (: FIXME: this can effectively createe the same ID for different characters :)
=> replace('^[|]$', '')
=> replace('�', '')
return
(: quality assurance :)
if($work => matches('^\s*?$'))
then 'empty-string'
else $work
translate(lower-case($string), "_*[]’' áàâéèêíìîóòôúùû", '-------aaaeeeiiiooouuu')
=> replace('\.', '')
=> replace('^\-', '')
=> replace('^(\d+(?:e|nde?|eme?|ere?)?)-(.+)$', '$2-$1')
};

declare function local:fix-type ($value) as xs:string {
Expand Down Expand Up @@ -228,29 +220,6 @@ declare function local:transform($nodes) {
return
typeswitch ( $node )

(: 👇 the incredible typo hack :)
case element(SPEAKER) return
element {QName('', 'speaker')} {
$node/node()} => local:transform()
case element(P) return
element {QName('', 'p')} {
$node/node()} => local:transform()
case element(acheverImprimer) return
element {QName('', 'acheveImprime')} {
$node/node()} => local:transform()
case element(achevedImprime) return
element {QName('', 'acheveImprime')} {
$node/node()} => local:transform()
case element(acheveImprimer) return
element {QName('', 'acheveImprime')} {
$node/node()} => local:transform()
case element(appobation) return
element {QName('', 'approbation')} {
$node/node()} => local:transform()
case element(pinter) return
element {QName('', 'printer')} {
$node/node()} => local:transform()

case text() return
(: replace occurrences of 'quart_d_heure' :)
(: see https://github.com/dracor-org/theatre-classique/commit/0e1f871dea95f4343895dad7e648de750c6dcf91 :)
Expand Down Expand Up @@ -284,24 +253,13 @@ declare function local:transform($nodes) {
}

case element(sp) return
let $exceptionsWho := (
$node/@who,
$node/@stwho,
$node/@givewho,
$node/@towardwho,
$node/@embarrassedwho,
$node/@breakwho,
$node/@ho,
$node/@w4ho
)
return
if( not(exists($node/* except $node/*:speaker)) )
then comment { 'ERROR: ', serialize($node)}
else
element {QName('http://www.tei-c.org/ns/1.0', 'sp')} {
$node/@* except ($node/@stage, $exceptionsWho, $node/@type),
$node/@* except ($node/@stage, $node/@who, $node/@type),
attribute who {
let $easy := tokenize(string-join($exceptionsWho, ' '), $who-tokenize-pattern)
let $easy := tokenize(string-join($node/@who, ' '), $who-tokenize-pattern)
! ('#' || local:translate(.))
return
if(string($easy[1]) != '')
Expand Down Expand Up @@ -717,8 +675,13 @@ declare function local:transform($nodes) {
};

declare function local:make-particDesc ($doc as element()) as node()* {
let $whos := ($doc//*:text//*:sp/tokenize(./@who || ./@ho || ./@w4ho, $who-tokenize-pattern) => distinct-values())
let $whos := if(string($whos[1]) != '') then $whos else (($doc//*:speaker/string(.)) => distinct-values())
let $whos := (for $sp in $doc//*:text//*:sp
return if ($sp/@who != '') then
tokenize(normalize-space($sp/@who), $who-tokenize-pattern)
else if ($sp/*:speaker[matches(., '\p{L}')]) then
replace($sp/*:speaker, "([-\p{L}' ]+).*", '$1')
else ()) => distinct-values()

return if (count($whos)) then
element {QName('http://www.tei-c.org/ns/1.0', 'particDesc')} {
element {QName('http://www.tei-c.org/ns/1.0', 'listPerson')} {
Expand Down
10 changes: 5 additions & 5 deletions tei/allainval-hiver.xml
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@
<person xml:id="la-volupte" sex="FEMALE">
<persName>La Volupté</persName>
</person>
<person xml:id="jeux_ris_graces" sex="UNKNOWN"><!--WARNING: no castItem found for reference in @who-->
<persName>Jeux_ris_graces</persName>
<person xml:id="jeux-ris" sex="UNKNOWN">
<persName>Les Jeux et les Ris de la Suite de l'Hiver</persName>
</person>
</listPerson>
</particDesc>
Expand Down Expand Up @@ -138,7 +138,7 @@
<castItem>
<role corresp="#bacchus">BACCHUS</role>.</castItem>
<castItem>
<role corresp="#jeux">Les Jeux et les Ris de la Suite de l'Hiver</role>.</castItem>
<role corresp="#jeux-ris">Les Jeux et les Ris de la Suite de l'Hiver</role>.</castItem>
</castList>
<!--TODO: usage of set in correct place but with unknown attributes <set location="Paris" country="France" periode="(indéterminé)" gps="48.856614, 2.352222">La scène est à Paris.</set>-->
</front>
Expand Down Expand Up @@ -2144,7 +2144,7 @@
<head>DIVERTISSEMENT.</head>
<stage type="entrance">Le Bal amène les Jeux, les Ris et les Grâces.</stage>
<stage type="musique">MARCHE.</stage>
<sp who="#jeux_ris_graces">
<sp who="#jeux-ris">
<speaker>[JEUX, RIS et GRACES].</speaker>
<stage type="music">Air.</stage>
<l n="857">Venez plaisirs charmants et doux ;</l>
Expand Down Expand Up @@ -2176,7 +2176,7 @@
</div>
<div type="scene" n="15">
<head>VAUDEVILLE.</head>
<sp who="#jeux_ris_graces">
<sp who="#jeux-ris">
<speaker>[JEUX, RIS et GRACES].</speaker>
<l n="879">Quand un jeune amant vif et tendre,</l>
<l n="880">A trouvé l'art de nous surprendre,</l>
Expand Down
4 changes: 2 additions & 2 deletions tei/andrieux-le-reve-du-mari.xml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
<person xml:id="emilie" sex="FEMALE">
<persName>Emilie</persName>
</person>
<person xml:id="gillot" sex="UNKNOWN"><!--WARNING: no castItem found for reference in @who-->
<person xml:id="gillot" sex="MALE">
<persName>Gillot</persName>
</person>
<person xml:id="la-baronne" sex="FEMALE">
Expand Down Expand Up @@ -143,7 +143,7 @@
<castItem>
<role corresp="#la-baronne">LA BARONNE</role>, soeur de Mathilde. Mlle DEMERSON.</castItem>
<castItem>
<role corresp="#empty-string">GILLOT</role>, concierge. M. ARMAND-DAILLY.</castItem>
<role corresp="#gillot">GILLOT</role>, concierge. M. ARMAND-DAILLY.</castItem>
</castList>
<!--TODO: usage of set in correct place but with unknown attributes <set location="Paris" country="France" periode="XVIIIème" gps="46.580224, 0.340375">La scène est en Poitou, dans un château, à la campagne.</set>-->
</front>
Expand Down
6 changes: 3 additions & 3 deletions tei/anonyme-sauvages-civilises.xml
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@
<person xml:id="louis" sex="UNKNOWN"><!--WARNING: no castItem found for reference in @who-->
<persName>Louis</persName>
</person>
<person xml:id="guerriers-et-illinois" sex="UNKNOWN"><!--WARNING: no castItem found for reference in @who-->
<persName>Guerriers et Illinois</persName>
<person xml:id="guerriers" sex="UNKNOWN"><!--WARNING: no castItem found for reference in @who-->
<persName>Guerriers</persName>
</person>
<person xml:id="choeur-general" sex="UNKNOWN"><!--WARNING: no castItem found for reference in @who-->
<persName>Choeur General</persName>
Expand Down Expand Up @@ -1123,7 +1123,7 @@ statue d'Ariski, et préparent le bûcher.</stage>
<l n="432">Rappelleront ta simplicité</l>
<l n="433">Des vertus du premier âge.</l>
</sp>
<sp who="#guerriers-et-illinois">
<sp who="#guerriers #illinois">
<speaker>GUERRIERS et ILLINOIS.</speaker>
<l n="434">Nous renonçons, </l>
<l n="435">Aux sanglantes conquêtes ;</l>
Expand Down
3 changes: 0 additions & 3 deletions tei/barre-radet-candide.xml
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,6 @@
<person xml:id="tous" sex="UNKNOWN"><!--WARNING: no castItem found for reference in @who-->
<persName>Tous</persName>
</person>
<person xml:id="madame--de-candide" sex="UNKNOWN"><!--WARNING: no castItem found for reference in @who-->
<persName>Madame de Candide</persName>
</person>
<person xml:id="osmin" sex="MALE">
<persName>Osmin</persName>
</person>
Expand Down
6 changes: 3 additions & 3 deletions tei/bary-du-beau-sein.xml
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,12 @@
<profileDesc>
<particDesc>
<listPerson>
<person xml:id="sinope" sex="FEMALE">
<persName>Sinope</persName>
</person>
<person xml:id="tyrisias" sex="MALE">
<persName>Tyrisias</persName>
</person>
<person xml:id="sinope" sex="FEMALE">
<persName>Sinope</persName>
</person>
</listPerson>
</particDesc>
<textClass>
Expand Down
10 changes: 2 additions & 8 deletions tei/biancollelli-foire-galante.xml
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,6 @@
<person xml:id="une-des-graces" sex="UNKNOWN"><!--WARNING: no castItem found for reference in @who-->
<persName>Une des Graces</persName>
</person>
<person xml:id="arlequin-et-pierrot" sex="UNKNOWN"><!--WARNING: no castItem found for reference in @who-->
<persName>Arlequin et Pierrot</persName>
</person>
<person xml:id="le-docteur" sex="MALE">
<persName>Le Docteur</persName>
</person>
Expand All @@ -73,9 +70,6 @@
<person xml:id="marinette" sex="FEMALE">
<persName>Marinette</persName>
</person>
<person xml:id="le-choeur-et-le-docteur" sex="UNKNOWN"><!--WARNING: no castItem found for reference in @who-->
<persName>Le Choeur et le Docteur</persName>
</person>
<person xml:id="les-chanteuses" sex="UNKNOWN"><!--WARNING: no castItem found for reference in @who-->
<persName>Les Chanteuses</persName>
</person>
Expand Down Expand Up @@ -317,7 +311,7 @@
<l n="88">Et Tircis sans le cocuage,</l>
<l n="89">Serait-il ce qu'il est ?</l>
</sp>
<sp who="#arlequin-et-pierrot">
<sp who="#arlequin #pierrot">
<speaker>ARLEQUIN et PIERROT, sur le même ton, ensemble.</speaker>
<l n="90">Devenons par le cocuage</l>
<l n="91">Aussi riche qu'il est.</l>
Expand Down Expand Up @@ -574,7 +568,7 @@
<l n="226">Cette râpe de fer blanc,</l>
<l n="227">Cette andouille de Tabac.</l>
</sp>
<sp who="#le-choeur-et-le-docteur">
<sp who="#le-choeur #le-docteur">
<speaker>LE CHOEUR et LE DOCTEUR, à l'imitation de Aimez, aimez, belle bergère, ensemble</speaker>
<l n="228">Râpez, râpez, belle bergère,</l>
<l n="229">Si vous voulez charmer !</l>
Expand Down
16 changes: 5 additions & 11 deletions tei/boissy-babillard.xml
Original file line number Diff line number Diff line change
Expand Up @@ -66,24 +66,18 @@
<person xml:id="la-fleur" sex="MALE">
<persName>La Fleur</persName>
</person>
<person xml:id="doris_melite" sex="UNKNOWN"><!--WARNING: no castItem found for reference in @who-->
<persName>Doris_melite</persName>
</person>
<person xml:id="doris" sex="FEMALE">
<persName>Doris</persName>
</person>
<person xml:id="melite" sex="FEMALE">
<persName>Mélite</persName>
</person>
<person xml:id="ismene" sex="FEMALE">
<persName>Ismène</persName>
</person>
<person xml:id="cephise" sex="FEMALE">
<persName>Céphise</persName>
</person>
<person xml:id="melite" sex="FEMALE">
<persName>Mélite</persName>
</person>
<person xml:id="leandre_melite_doris" sex="UNKNOWN"><!--WARNING: no castItem found for reference in @who-->
<persName>Leandre_melite_doris</persName>
</person>
</listPerson>
</particDesc>
<textClass>
Expand Down Expand Up @@ -779,7 +773,7 @@
</div>
<div type="scene" n="9">
<head>SCÈNE IX. Léandre, Céphise, Ismène, Hortense, Daphné, Drois, Mélite.</head>
<sp who="#doris_melite">
<sp who="#doris #melite">
<speaker>DORIS et MÉLITE, entrant les premières. </speaker>
<l n="326">Nous nous rendons, madame, et ne disputons plus. </l>
</sp>
Expand Down Expand Up @@ -1234,7 +1228,7 @@
<speaker>DAPHNÉ, à Léandre.</speaker>
<l n="409" part="F">Tenez bon. </l>
</sp>
<sp who="#leandre_melite_doris">
<sp who="#leandre #melite #doris">
<speaker>LÉANDRE, MÉLITE, DORIS.</speaker>
<l n="410" part="I">Madame... </l>
</sp>
Expand Down
14 changes: 7 additions & 7 deletions tei/boursault-fete-de-la-seine.xml
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,11 @@
<person xml:id="deux-nereides" sex="UNKNOWN"><!--WARNING: no castItem found for reference in @who-->
<persName>Deux Nereides</persName>
</person>
<person xml:id="une-jeune-fontaine-et-un-ruisseau" sex="UNKNOWN"><!--WARNING: no castItem found for reference in @who-->
<persName>Une Jeune Fontaine et un Ruisseau</persName>
<person xml:id="une-jeune-fontaine" sex="UNKNOWN"><!--WARNING: no castItem found for reference in @who-->
<persName>Une Jeune Fontaine</persName>
</person>
<person xml:id="un-ruisseau" sex="UNKNOWN"><!--WARNING: no castItem found for reference in @who-->
<persName>Un Ruisseau</persName>
</person>
<person xml:id="deux-jeunes-ruisseaux" sex="UNKNOWN"><!--WARNING: no castItem found for reference in @who-->
<persName>Deux Jeunes Ruisseaux</persName>
Expand All @@ -80,9 +83,6 @@
<person xml:id="ensemble" sex="UNKNOWN"><!--WARNING: no castItem found for reference in @who-->
<persName>Ensemble</persName>
</person>
<person xml:id="le-gange-et-le-nil" sex="UNKNOWN"><!--WARNING: no castItem found for reference in @who-->
<persName>Le Gange et le Nil</persName>
</person>
<person xml:id="tous-ensemble" sex="UNKNOWN"><!--WARNING: no castItem found for reference in @who-->
<persName>Tous Ensemble</persName>
</person>
Expand Down Expand Up @@ -252,7 +252,7 @@
<l n="69">Trouvent la fin de leurs peines,</l>
<l n="70">Sans jamais changer leurs cours.</l>
</sp>
<sp who="#une-jeune-fontaine-et-un-ruisseau">
<sp who="#une-jeune-fontaine #un-ruisseau">
<speaker>UNE JEUNE FONTAINE ET UN RUISSEAU.</speaker>
<l n="71">Le temps d'aimer est un temps admirable ;</l>
<l n="72">Mais il ne dure pas assez :</l>
Expand Down Expand Up @@ -379,7 +379,7 @@
<l n="160">Consolez-vous, vos souffrances</l>
<l n="161">Ne dureront pas longtemps.</l>
</sp>
<sp who="#le-gange-et-le-nil">
<sp who="#le-gange #le-nil">
<speaker>LE GANGE et LE NIL.</speaker>
<l n="162">Puisse le Ciel qui l'a fait naître</l>
<l n="163">Pour affranchir du joug tant de peuples divers,</l>
Expand Down
Loading

0 comments on commit 9d0ab6b

Please sign in to comment.