1111
1212import click
1313from flask import current_app
14- from invenio_app_ils .eitems .api import EItemIdProvider
14+ from invenio_app_ils .eitems .api import (
15+ EItemIdProvider ,
16+ get_eitems_for_document_by_creator ,
17+ get_eitems_for_document_by_source ,
18+ )
1519from invenio_app_ils .errors import IlsValidationError
1620from invenio_app_ils .proxies import current_app_ils
1721from invenio_db import db
1822from invenio_pidstore .models import PersistentIdentifier
1923
20- from cds_ils .importer .eitems .api import get_eitems_for_document_by_provider
21-
2224
2325class EItemImporter (object ):
2426 """EItem importer class."""
@@ -47,6 +49,7 @@ def __init__(
4749 self .output_pid = None
4850 self .action = None
4951 self .eitem_record = None
52+ self .duplicate_list = []
5053 self .ambiguous_list = []
5154 self .deleted_list = []
5255
@@ -125,12 +128,13 @@ def _delete_existing_record(self, existing_eitem):
125128 eitem_indexer .delete (existing_eitem )
126129 return existing_eitem
127130
128- def _report_ambiguous_records (self , multiple_results ):
129- eitem_cls = current_app_ils .eitem_record_cls
131+ def _report_duplicate_records (self , multiple_results ):
132+ for hit in multiple_results :
133+ self .duplicate_list .append (hit ["pid" ])
130134
135+ def _report_ambiguous_records (self , multiple_results ):
131136 for hit in multiple_results :
132- existing_eitem = eitem_cls .get_record_by_pid (hit ["pid" ])
133- self .ambiguous_list .append (existing_eitem )
137+ self .ambiguous_list .append (hit ["pid" ])
134138
135139 def _get_other_eitems_of_document (self , matched_document ):
136140 eitem_search = current_app_ils .eitem_search_cls ()
@@ -237,10 +241,25 @@ def eitems_search(self, matched_document):
237241 if self .eitem_json :
238242 # eitem is not always there, sometime we just create a doc
239243 eitem_type = self .eitem_json .get ("_type" , "E-BOOK" ).upper ()
240- search = get_eitems_for_document_by_provider (
244+ exact_eitem_search = get_eitems_for_document_by_creator (
241245 document_pid , self .metadata_provider
242246 ).filter ("term" , eitem_type = eitem_type )
243- return search
247+
248+ # Declare items that are matched by `source``, but not by `created_by`` as ambiguous
249+ # They might have been created manually and the source field was filled in
250+ exact_hits = exact_eitem_search .execute ()
251+ exact_hit_ids = [hit .meta .id for hit in exact_hits ]
252+ ambiguous_eitem_search = (
253+ get_eitems_for_document_by_source (
254+ document_pid , self .metadata_provider , case_insensitive = True
255+ )
256+ .exclude ("ids" , values = exact_hit_ids )
257+ .filter ("term" , eitem_type = eitem_type )
258+ )
259+
260+ self ._report_ambiguous_records (ambiguous_eitem_search .scan ())
261+
262+ return exact_eitem_search
244263
245264 def import_eitem_action (self , search ):
246265 """Determine import action."""
@@ -280,8 +299,8 @@ def update_eitems(self, matched_document):
280299 self .output_pid = existing_eitem ["pid" ]
281300 else :
282301 results = search .scan ()
283- self ._report_ambiguous_records (results )
284- # still creates an item, even ambiguous eitems found
302+ self ._report_duplicate_records (results )
303+ # still creates an item, even duplicate eitems found
285304 # checks if there are higher priority eitems
286305 if should_eitem_be_imported :
287306 self .eitem_record = self .create_eitem (matched_document )
@@ -291,13 +310,10 @@ def update_eitems(self, matched_document):
291310 def delete_eitems (self , matched_document ):
292311 """Deletes eitems for a given document."""
293312 eitem_cls = current_app_ils .eitem_record_cls
294- document_pid = matched_document ["pid" ]
295313 self .action = "delete"
296314
297315 # get eitems for current provider
298- search = get_eitems_for_document_by_provider (
299- document_pid , self .metadata_provider
300- )
316+ search = self .eitems_search (matched_document )
301317 results = search .scan ()
302318
303319 for record in results :
@@ -307,11 +323,8 @@ def delete_eitems(self, matched_document):
307323 def preview_delete (self , matched_document ):
308324 """Preview delete action on eitems for given document."""
309325 eitem_cls = current_app_ils .eitem_record_cls
310- document_pid = matched_document ["pid" ]
311326 self .action = "delete"
312- search = get_eitems_for_document_by_provider (
313- document_pid , self .metadata_provider
314- )
327+ search = self .eitems_search (matched_document )
315328 results = search .scan ()
316329 for record in results :
317330 existing_eitem = eitem_cls .get_record_by_pid (record ["pid" ])
@@ -350,7 +363,8 @@ def summary(self):
350363 "eitem" : self .eitem_record ,
351364 "json" : self .eitem_json ,
352365 "output_pid" : self .output_pid ,
353- "duplicates" : self .ambiguous_list ,
366+ "ambiguous" : self .ambiguous_list ,
367+ "duplicates" : self .duplicate_list ,
354368 "action" : self .action ,
355369 "deleted_eitems" : self .deleted_list ,
356370 }
@@ -374,8 +388,8 @@ def preview_import(self, matched_document):
374388 return self .summary ()
375389 else :
376390 results = search .scan ()
377- self ._report_ambiguous_records (results )
378- # still creates an item, even ambiguous eitems found
391+ self ._report_duplicate_records (results )
392+ # still creates an item, even duplicate eitems found
379393 # checks if there are higher priority eitems
380394 if should_eitem_be_imported :
381395 self .action = "create"
0 commit comments