1+ import groovy.transform.Memoized
2+
3+ import java.util.concurrent.ConcurrentHashMap
4+
5+ PrintWriter matchedAndSpecified = getReportWriter(" matched.tsv" )
6+ PrintWriter unmatchedSpecifiedAnyway = getReportWriter(" mismatched.tsv" )
7+ PrintWriter matchedInOtherWork = getReportWriter(" matched-in-other-work.tsv" )
8+ PrintWriter notSpecifiedMovedToInstance = getReportWriter(" not-specified-moved-to-instance.txt" )
9+
10+ def where = """
11+ collection = 'bib'
12+ AND data#>>'{@graph, 0, identifiedBy}' LIKE '%Elib%'
13+ AND (data#>>'{@graph, 1, instanceOf, summary}' is not null OR data#>>'{@graph, 1, summary}' is not null)
14+ AND deleted = false
15+ """
16+
17+ ROLES = [
18+ ' Formgivare:' : ' https://id.kb.se/relator/designer' ,
19+ ' Omslag:' : ' https://id.kb.se/relator/coverDesigner' ,
20+ ' Omslagsformgivare:' : ' https://id.kb.se/relator/coverDesigner' ,
21+ ]
22+
23+ OTHER = [[' @id' : ' https://id.kb.se/relator/unspecifiedContributor' ]]
24+
25+ Map<String , Set<String > > knownNames = new ConcurrentHashMap ([' https://id.kb.se/relator/designer' : new ConcurrentHashMap (). newKeySet(),
26+ ' https://id.kb.se/relator/coverDesigner' : new ConcurrentHashMap (). newKeySet()])
27+ Map<String , Set<String > > knownAgents = new ConcurrentHashMap ([' https://id.kb.se/relator/designer' : new ConcurrentHashMap (). newKeySet(),
28+ ' https://id.kb.se/relator/coverDesigner' : new ConcurrentHashMap (). newKeySet()])
29+ Set<String > handled = new ConcurrentHashMap (). newKeySet()
30+
31+ selectBySqlWhere(where) { bib ->
32+ def id = bib. doc. shortId
33+ def instance = bib. graph[1 ]
34+ def summary = asList(instance[' instanceOf' ][' summary' ]) + asList(bib. graph[1 ][' summary' ])
35+
36+ def nameToRoles = summary
37+ .findResults { it[' label' ] }
38+ .join(' ' )
39+ .with { parseRoles(it) }
40+ .each { name , roles ->
41+ knownNames. computeIfAbsent(name, f -> []). add(roles)
42+ }
43+
44+ List workContribution = instance[' instanceOf' ][' contribution' ]
45+ if (! workContribution) {
46+ return
47+ }
48+
49+ def modified = workContribution. removeAll { ! it. agent }
50+
51+ Set existingRoles = workContribution. collect { asList(it. role)* . ' @id' }. grep(). flatten()
52+
53+ if (existingRoles. contains(' https://id.kb.se/relator/unspecifiedContributor' ) && nameToRoles) {
54+ workContribution. each { c ->
55+ if (asList(c. role) == OTHER ) {
56+ def agentName = name(loadIfLink(c. agent))
57+ def roles = nameToRoles[agentName]
58+ if (roles) {
59+ c[' role' ] = roles. collect { [' @id' : it] }
60+ matchedAndSpecified. println ([id, c. agent, roles]. join(' \t ' ))
61+ nameToRoles. remove(agentName)
62+ modified = true
63+ }
64+ }
65+ }
66+
67+ def other = workContribution. findAll { asList(it. role) == OTHER }
68+
69+ if (nameToRoles. size() == 1 && other. size() == 1 ) {
70+ def c = other[0 ]
71+ def name = nameToRoles. keySet()[0 ]
72+ def roles = nameToRoles[name]
73+ other[0 ][' role' ] = roles. collect { [' @id' : it] }
74+ other. clear()
75+ unmatchedSpecifiedAnyway. println ([id, c. agent, name, roles]. join(' \t ' ))
76+ modified = true
77+ }
78+
79+ if (other. isEmpty()) {
80+ handled. add(id)
81+ }
82+ }
83+
84+ workContribution. each { c ->
85+ def roles = asList(c. role)* . ' @id'
86+ if (knownAgents. keySet(). intersect(roles)) {
87+ knownAgents. computeIfAbsent(c. agent, f -> []). add(roles)
88+ }
89+ }
90+
91+ if (modified) {
92+ bib. scheduleSave()
93+ }
94+ }
95+
96+ selectBySqlWhere(" collection = 'bib' AND data#>>'{@graph, 0, identifiedBy}' LIKE '%Elib%' AND deleted = false" ) { bib ->
97+ def id = bib. doc. shortId
98+ if (id in handled) {
99+ return
100+ }
101+ def instance = bib. graph[1 ]
102+ List workContribution = instance[' instanceOf' ][' contribution' ]
103+ if (! workContribution) {
104+ return
105+ }
106+
107+ workContribution. removeAll { ! it. agent }
108+
109+ workContribution. each { c ->
110+ if (asList(c. role) == OTHER ) {
111+ def roles = knownAgents[c. agent] ?: knownNames[name(loadIfLink(c. agent))]
112+ if (roles) {
113+ def countByRole = roles. countBy { it }. sort { - it. value }
114+ if (countByRole. size() == 1 ) {
115+ countByRole. find { it. value > 2 }?. with {
116+ def role = it. key
117+ def count = it. value
118+ c[' role' ] = [[' @id' : role]]
119+ matchedInOtherWork. println ([id, c. agent, role, count]. join(' \t ' ))
120+ bib. scheduleSave()
121+ }
122+ }
123+ }
124+ }
125+ }
126+
127+ workContribution. removeAll { c ->
128+ if (asList(c. role) == OTHER ) {
129+ instance[' contribution' ] = asList(instance[' contribution' ]) + c
130+ notSpecifiedMovedToInstance. println (id)
131+ bib. scheduleSave()
132+ return true
133+ }
134+ return false
135+ }
136+ }
137+
138+ private Map parseRoles (String summary ) {
139+ def roleToNames = ROLES . collectEntries { s , id ->
140+ def names = summary
141+ .findAll(/ $s[^\[ ,"]+/ )
142+ .collect { it. substring(s. size()) }
143+ .collect { it. trim() }
144+
145+ [(id): names]
146+ }
147+
148+ def nameToRoles = [:]
149+ roleToNames. each { role , names ->
150+ names. each { n -> nameToRoles[n] = nameToRoles. getOrDefault(n, []) + [role] }
151+ }
152+
153+ return nameToRoles
154+ }
155+
156+ private String name (Map agent ) {
157+ agent. name ?: " ${ agent.givenName} ${ agent.familyName} "
158+ }
159+
160+ private Map loadIfLink (Map m ) {
161+ m[' @id' ] ? loadThing(m[' @id' ]) : m
162+ }
163+
164+ @Memoized
165+ private Map loadThing (def id ) {
166+ def thing = [:]
167+ selectByIds([id]) { t ->
168+ thing = t. graph[1 ]
169+ }
170+ return thing
171+ }
0 commit comments