Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ target/
# Vim swapfiles
.*.sw?

# vscode history
# vscode files and history
.vscode/
**/.history

.python-version
18 changes: 0 additions & 18 deletions cds_ils/importer/eitems/api.py

This file was deleted.

58 changes: 36 additions & 22 deletions cds_ils/importer/eitems/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,16 @@

import click
from flask import current_app
from invenio_app_ils.eitems.api import EItemIdProvider
from invenio_app_ils.eitems.api import (
EItemIdProvider,
get_eitems_for_document_by_creator,
get_eitems_for_document_by_source,
)
from invenio_app_ils.errors import IlsValidationError
from invenio_app_ils.proxies import current_app_ils
from invenio_db import db
from invenio_pidstore.models import PersistentIdentifier

from cds_ils.importer.eitems.api import get_eitems_for_document_by_provider


class EItemImporter(object):
"""EItem importer class."""
Expand Down Expand Up @@ -47,6 +49,7 @@ def __init__(
self.output_pid = None
self.action = None
self.eitem_record = None
self.duplicate_list = []
self.ambiguous_list = []
self.deleted_list = []

Expand Down Expand Up @@ -125,12 +128,13 @@ def _delete_existing_record(self, existing_eitem):
eitem_indexer.delete(existing_eitem)
return existing_eitem

def _report_ambiguous_records(self, multiple_results):
eitem_cls = current_app_ils.eitem_record_cls
def _report_duplicate_records(self, multiple_results):
for hit in multiple_results:
self.duplicate_list.append(hit["pid"])

def _report_ambiguous_records(self, multiple_results):
for hit in multiple_results:
existing_eitem = eitem_cls.get_record_by_pid(hit["pid"])
self.ambiguous_list.append(existing_eitem)
self.ambiguous_list.append(hit["pid"])

def _get_other_eitems_of_document(self, matched_document):
eitem_search = current_app_ils.eitem_search_cls()
Expand Down Expand Up @@ -237,10 +241,25 @@ def eitems_search(self, matched_document):
if self.eitem_json:
# eitem is not always there, sometime we just create a doc
eitem_type = self.eitem_json.get("_type", "E-BOOK").upper()
search = get_eitems_for_document_by_provider(
exact_eitem_search = get_eitems_for_document_by_creator(
document_pid, self.metadata_provider
).filter("term", eitem_type=eitem_type)
return search

# Declare items that are matched by `source``, but not by `created_by`` as ambiguous
# They might have been created manually and the source field was filled in
exact_hits = exact_eitem_search.execute()
exact_hit_ids = [hit.meta.id for hit in exact_hits]
ambiguous_eitem_search = (
get_eitems_for_document_by_source(
document_pid, self.metadata_provider, case_insensitive=True
)
.exclude("ids", values=exact_hit_ids)
.filter("term", eitem_type=eitem_type)
)

self._report_ambiguous_records(ambiguous_eitem_search.scan())

return exact_eitem_search

def import_eitem_action(self, search):
"""Determine import action."""
Expand Down Expand Up @@ -280,8 +299,8 @@ def update_eitems(self, matched_document):
self.output_pid = existing_eitem["pid"]
else:
results = search.scan()
self._report_ambiguous_records(results)
# still creates an item, even ambiguous eitems found
self._report_duplicate_records(results)
# still creates an item, even duplicate eitems found
# checks if there are higher priority eitems
if should_eitem_be_imported:
self.eitem_record = self.create_eitem(matched_document)
Expand All @@ -291,13 +310,10 @@ def update_eitems(self, matched_document):
def delete_eitems(self, matched_document):
"""Deletes eitems for a given document."""
eitem_cls = current_app_ils.eitem_record_cls
document_pid = matched_document["pid"]
self.action = "delete"

# get eitems for current provider
search = get_eitems_for_document_by_provider(
document_pid, self.metadata_provider
)
search = self.eitems_search(matched_document)
results = search.scan()

for record in results:
Expand All @@ -307,11 +323,8 @@ def delete_eitems(self, matched_document):
def preview_delete(self, matched_document):
"""Preview delete action on eitems for given document."""
eitem_cls = current_app_ils.eitem_record_cls
document_pid = matched_document["pid"]
self.action = "delete"
search = get_eitems_for_document_by_provider(
document_pid, self.metadata_provider
)
search = self.eitems_search(matched_document)
results = search.scan()
for record in results:
existing_eitem = eitem_cls.get_record_by_pid(record["pid"])
Expand Down Expand Up @@ -350,7 +363,8 @@ def summary(self):
"eitem": self.eitem_record,
"json": self.eitem_json,
"output_pid": self.output_pid,
"duplicates": self.ambiguous_list,
"ambiguous": self.ambiguous_list,
"duplicates": self.duplicate_list,
"action": self.action,
"deleted_eitems": self.deleted_list,
}
Expand All @@ -374,8 +388,8 @@ def preview_import(self, matched_document):
return self.summary()
else:
results = search.scan()
self._report_ambiguous_records(results)
# still creates an item, even ambiguous eitems found
self._report_duplicate_records(results)
# still creates an item, even duplicate eitems found
# checks if there are higher priority eitems
if should_eitem_be_imported:
self.action = "create"
Expand Down
1 change: 1 addition & 0 deletions cds_ils/importer/serializers/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ class ImportedEItemSchema(Schema):
action = fields.String(dump_only=True)
priority_provider = fields.Bool()
duplicates = fields.List(fields.String)
ambiguous = fields.List(fields.String)
deleted_eitems = fields.List(fields.Dict)
eitem = fields.Nested(EItemSchemaV1)
json = fields.Raw()
Expand Down
37 changes: 37 additions & 0 deletions tests/importer/data/ambiguous_document_data.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
[
{
"$schema": "https://127.0.0.1:5000/schemas/documents/document-v1.0.0.json",
"created_by": { "type": "script", "value": "test" },
"pid": "docid-13",
"title": "Metallic materials — Determination of forming-limit curves for sheet and strip (ed. 2nd - 2021)",
"authors": [{ "full_name": "International Organization for Standardization. Geneva" }],
"abstract": "This is an abstract",
"identifiers": [{ "scheme": "STANDARD_NUMBER", "value": "ISO-12004-2" }],
"keywords": [{ "source": "X", "value": "Patata" }],
"document_type": "BOOK",
"publication_year": "1950",
"agency_code": "DE-He213",
"_eitem": {
"pid": "eitemid-12",
"created_by": {"type": "import", "value": "springer"},
"document_pid": "docid-13",
"eitem_type": "E-BOOK",
"internal_notes": "Ambiguous eitem with source field",
"description": "Description of the electronic item",
"open_access": false,
"source": "springer",
"urls": [
{
"description": "Protected URL",
"value": "http://protected-cds-ils.ch/",
"login_required": true
},
{
"description": "Another open URL",
"value": "http://cds-ils.ch/",
"login_required": false
}
]
}
}
]
33 changes: 33 additions & 0 deletions tests/importer/data/create_duplicate_eitems_document.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
[
{
"$schema": "https://127.0.0.1:5000/schemas/documents/document-v1.0.0.json",
"agency_code": "DE-He213",
"created_by": {"type": "import", "value": "springer"},
"pid": "docid-14",
"title": "Test Document with Multiple Springer EItems",
"document_type": "BOOK",
"authors": [{ "full_name": "Test Author" }],
"abstract": "This is a test document that should have multiple eitems from the same provider",
"edition": "1st",
"publication_year": "2024",
"identifiers": [{ "scheme": "ISBN", "value": "1234567890123" }],
"_eitem": {
"pid": "eitemid-duplicate-test",
"internal_notes": "This should create a duplicate when imported",
"description": "Test eitem that should trigger duplicate detection",
"open_access": false,
"urls": [
{
"description": "Protected URL",
"value": "http://protected-cds-ils.ch/",
"login_required": true
},
{
"description": "Another open URL",
"value": "http://cds-ils.ch/",
"login_required": false
}
]
}
}
]
13 changes: 13 additions & 0 deletions tests/importer/data/existing_documents.json
Original file line number Diff line number Diff line change
Expand Up @@ -222,5 +222,18 @@
"document_type": "BOOK",
"publication_year": "1950",
"note": "MATCH BY STANDARD NUMBER"
},
{
"$schema": "https://127.0.0.1:5000/schemas/documents/document-v1.0.0.json",
"created_by": { "type": "import", "value": "springer" },
"pid": "docid-14",
"title": "Test Document with Multiple Springer EItems",
"document_type": "BOOK",
"authors": [{ "full_name": "Test Author" }],
"abstract": "This is a test document that should have multiple eitems from the same provider",
"edition": "1st",
"publication_year": "2024",
"identifiers": [{ "scheme": "ISBN", "value": "1234567890123" }],
"note": "DUPLICATE EITEMS TEST"
}
]
78 changes: 78 additions & 0 deletions tests/importer/data/existing_eitems.json
Original file line number Diff line number Diff line change
Expand Up @@ -217,5 +217,83 @@
"login_required": false
}
]
},
{
"pid": "eitemid-12",
"created_by": {"type": "import", "value": "springer"},
"source": "springer",
"document_pid": "docid-13",
"eitem_type": "E-BOOK",
"internal_notes": "Unambiguous eitem with source field",
"description": "Description of the electronic item",
"open_access": false,
"urls": [
{
"description": "Protected URL",
"value": "http://protected-cds-ils.ch/",
"login_required": true
},
{
"description": "Another open URL",
"value": "http://cds-ils.ch/",
"login_required": false
}
]
},
{
"pid": "eitemid-13",
"created_by": {"type": "user_id", "value": "1"},
"source": "springer",
"document_pid": "docid-13",
"eitem_type": "E-BOOK",
"internal_notes": "Ambiguous eitem with source field",
"description": "Description of the electronic item",
"open_access": false,
"urls": [
{
"description": "Protected URL",
"value": "http://protected-cds-ils.ch/",
"login_required": true
},
{
"description": "Another open URL",
"value": "http://cds-ils.ch/",
"login_required": false
}
]
},
{
"pid": "eitemid-14-dup1",
"created_by": {"type": "import", "value": "springer"},
"source": "springer",
"document_pid": "docid-14",
"eitem_type": "E-BOOK",
"internal_notes": "First duplicate eitem from springer",
"description": "Description of the first duplicate electronic item",
"open_access": false,
"urls": [
{
"description": "Protected URL 1",
"value": "http://protected-cds-ils-1.ch/",
"login_required": true
}
]
},
{
"pid": "eitemid-14-dup2",
"created_by": {"type": "import", "value": "springer"},
"source": "springer",
"document_pid": "docid-14",
"eitem_type": "E-BOOK",
"internal_notes": "Second duplicate eitem from springer",
"description": "Description of the second duplicate electronic item",
"open_access": false,
"urls": [
{
"description": "Protected URL 2",
"value": "http://protected-cds-ils-2.ch/",
"login_required": true
}
]
}
]
Loading
Loading