From ebb1b1653f6f6ec67c9d7dfc521ec0981c60761e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maty=C3=A1=C5=A1=20Kopp?= Date: Tue, 7 May 2024 15:48:51 +0200 Subject: [PATCH] inline media, fix CZ url #868 --- Scripts/parlamint2teitok.pl | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/Scripts/parlamint2teitok.pl b/Scripts/parlamint2teitok.pl index 7c584dbb7..47b5246de 100644 --- a/Scripts/parlamint2teitok.pl +++ b/Scripts/parlamint2teitok.pl @@ -117,7 +117,7 @@ foreach $media ( $xml->findnodes("//media") ) { if ( $sname =~ /-CZ/ ) { - $mbase = "https://lindat.mff.cuni.cz/services/teitok/data/parczech/www.psp.cz/eknih/"; + $mbase = "https://lindat.mff.cuni.cz/services/teitok/data/parczech/"; }; if ( $mbase ) { $media->setAttribute("url", $mbase.$media->getAttribute("url")); @@ -162,6 +162,21 @@ # Already pb'd if ( $verbose ) { print "Already pb'd - adding atts later"; }; $dopb = 1; + # inline media if pb contain corresp + foreach $pb ( $xml->findnodes("//text//pb[\@corresp]") ) { + print STDERR $pb->toString; + foreach $corresp (map {s/^#//;$_} split(/\s+/, $pb->getAttribute("corresp"))){ + print STDERR $corresp; + my ($audio) = $xml->findnodes("//media[\@xml:id = '$corresp']"); + print STDERR $audio->toString; + if($audio){ + $audio->unbindNode(); + $pb->parentNode->insertAfter($audio,$pb); + } + }; + }; + + } else { $dcnt = 0; $pbcnt = 0; $scnt = 1; @@ -398,8 +413,6 @@ ( $$$ ) $pb->setAttribute("ana", $utt->getAttribute("ana")); if ( $debug ) { print $pb->toString; } ; }; - - # inline media if pb contain corresp }; `mkdir -p $outf`;