-
Notifications
You must be signed in to change notification settings - Fork 0
/
batch-latin-terms.xq
72 lines (58 loc) · 4.18 KB
/
batch-latin-terms.xq
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
declare function local:get-text($root as document-node()) as xs:string{
string-join(for $x in $root//*:article/*[local-name() = 'body' or local-name() = 'back']//*
return
if ($x/ancestor::sec[@sec-type='additional-information']) then ()
else if ($x/local-name() = 'italic') then ()
else $x/text(),'')
};
declare function local:get-text-matches($text as xs:string, $regex as xs:string, $type as xs:string){
if (not($type=('roman','italic')))
then (xs:QName("local:error"),
($type||' is not allowed as value in thirs argument of local:get-text-matches'))
else
let $result := <result>{analyze-string($text,$regex)//*:match}</result>
let $values := distinct-values($result//*:match)
let $items := for $value in $values return <item count="{count($result//*:match[.=$value])}">{$value}</item>
return <result type="{$type}" count="{sum(for $c in $items return number($c/@count))}">{$items}</result>
};
declare function local:get-node-matches($nodes as element()*, $regex as xs:string) as element(){
let $result := <result>{for $node in $nodes return analyze-string($node,$regex)//*:match}</result>
let $values := distinct-values($result//*:match)
let $items := for $value in $values return <item count="{count($result//*:match[.=$value])}">{$value}</item>
return <result type="italic" count="{sum(for $c in $items return number($c/@count))}">{$items}</result>
};
declare function local:print-specifics($result as element()) as xs:string{
if ($result/@type="italic")
then ('Italicised terms: '||$result/@count||' ['||string-join(for $item in $result//*:item return ($item/@count||' instance(s) of '||$item),', ')||']')
else ('Roman terms: '||$result/@count||' ['||string-join(for $item in $result//*:item return ($item/@count||' instance(s) of '||$item),', ')||']')
};
declare function local:print-result($result1 as element(),$result2 as element()) as xs:string{
let $roman := ($result1|$result2)[@type="roman"]
let $italic := ($result1|$result2)[@type="italic"]
return
if (not($roman//*:item) and not($italic//*:item)) then 'No latin terms found'
else if (not($roman//*:item) and $italic//*:item) then 'Latin terms are consistently italicised'
else if ($roman//*:item and not($italic//*:item)) then 'Latin terms are consistently not italicised'
else if (number($roman/@count) = number($italic/@count)) then ('Latin terms are inconsistenly italicised/roman.
There are an equal number of roman and italicised Latin terms '||'('||$roman/@count||')'||'.
'||local:print-specifics($roman)||'
'||local:print-specifics($italic))
else if (number($roman/@count) gt number($italic/@count)) then
let $no := number($roman/@count) - number($italic/@count)
let $text-before-no := if ($no = 1) then 'is' else ('are')
return ('Latin terms are inconsistenly italicised.
There '||$text-before-no||' '||$no||' more roman term(s)
'||local:print-specifics($roman)||'
'||local:print-specifics($italic))
else
let $no := number($italic/@count) - number($roman/@count)
let $text-before-no := if ($no = 1) then 'is' else ('are')
return ('Terms are inconsistenly italicised.
There '||$text-before-no||' '||number($italic/@count) - number($roman/@count)||' more italic term(s)
'||local:print-specifics($italic)||'
'||local:print-specifics($roman))
};
declare function local:get-result($xml,$regex as xs:string){
let $italic-text := string-join($xml//*:italic,' ')
let $roman-text := local:get-text($xml)
let $roman := local:get-text-matches($roman-text,$regex,'roman')
let $italic := local:get-text-matches($italic-text,$regex,'italic')
return local:print-result($roman,$italic)
};
let $regex := '[Ii]n\s+[Vv]itro|[Ee]x\s+[Vv]itro|[Ii]n\s+[Vv]ivo|[Ee]x\s+[Vv]ivo|a\s+priori|a\s+posteriori|[Dd]e\s+[Nn]ovo|[Ii]n\s+[Uu]tero|[Ii]n\s+[Nn]atura|[Ii]n\s+[Ss]itu|[Ii]n\s+[Pp]lanta|[Rr]ete\s+[Mm]irabile|[Nn]omen\s+[Nn]ovum| [Ss]ensu |[Aa]d\s+[Ll]ibitum|[Ii]n\s+[Oo]vo'
let $folder := '/Users/fredatherden/Desktop/pre-edit/'
let $files := file:list($folder)[.!='pre-report.xml' and ends-with(.,'.xml')]
for $file in $files
let $xml := doc($folder||$file)
return ($file||' '||local:get-result($xml,$regex))