diff --git a/.gitignore b/.gitignore index 5e73cfd20a..83cac4f766 100644 --- a/.gitignore +++ b/.gitignore @@ -40,6 +40,7 @@ local_store/main/* local_store/tmp/* !local_store/tmp/README.md *_options_*.json +portality/_playground/ # test detritous doajtest/unit/*.csv diff --git a/doajtest/example_files/incoming_application.json b/doajtest/example_files/incoming_application.json index 3a582ca4d7..dd36248002 100644 --- a/doajtest/example_files/incoming_application.json +++ b/doajtest/example_files/incoming_application.json @@ -35,10 +35,10 @@ "is_replaced_by": ["2222-2222"], "institution": { "name": "Society Institution", - "country": "US" + "country": "USA" }, "keywords": ["word", "key"], - "language": ["EN", "FR"], + "language": ["ENG", "FRE"], "license": [{ "type": "Publisher's own license", "BY": "True", @@ -70,7 +70,7 @@ "publication_time_weeks": 8, "publisher": { "name": "The Publisher", - "country": "US" + "country": "USA" }, "ref": { "oa_statement": "http://oa.statement", diff --git a/doajtest/fixtures/article.py b/doajtest/fixtures/article.py index 635fe61257..d84c2aedeb 100644 --- a/doajtest/fixtures/article.py +++ b/doajtest/fixtures/article.py @@ -154,8 +154,8 @@ def make_article_apido_struct(): "number": "99", "publisher": "The Publisher", "title": "The Title", - "language": ["EN", "FR"], - "country": "US" + "language": ["ENG", "FRE"], + "country": "USA" }, "year": "1991", "month": "January", diff --git a/doajtest/fixtures/bibjson.py b/doajtest/fixtures/bibjson.py index 2de80c557c..a881845f27 100644 --- a/doajtest/fixtures/bibjson.py +++ b/doajtest/fixtures/bibjson.py @@ -52,7 +52,7 @@ def article_bibjson(cls): "publisher" : "IEEE", "title" : "Journal of Things", "language" : ["eng"], - "country" : "GB", + "country" : "GBR", "issns" : ["1234-5678", "9876-5432"] }, "year" : "1987", diff --git a/doajtest/fixtures/v2/common.py b/doajtest/fixtures/v2/common.py index 201854187f..b9ebe16f78 100644 --- a/doajtest/fixtures/v2/common.py +++ b/doajtest/fixtures/v2/common.py @@ -57,10 +57,10 @@ "is_replaced_by": ["2222-2222"], "institution": { "name": "Society Institution", - "country": "US" + "country": "USA" }, "keywords": ["word", "key"], - "language": ["EN", "FR"], + "language": ["ENG", "FRE"], "license": [ { "type": "Publisher's own license", @@ -94,7 +94,7 @@ "publication_time_weeks": 8, "publisher": { "name": "The Publisher", - "country": "US" + "country": "USA" }, "ref": { "oa_statement": "http://oa.statement", @@ -131,7 +131,7 @@ "preservation_service_url" : "http://digital.archiving.policy", "copyright_author_retains" : "y", "copyright_url" : "http://copyright.com", - "publisher_country" : "US", + "publisher_country" : "USA", "publisher_name" : "The Publisher", "deposit_policy" : ["Sherpa/Romeo", "other"], "deposit_policy_other" : "Store it", @@ -141,9 +141,9 @@ "pissn": "1234-5678", "eissn": "9876-5432", "institution_name" : "Society Institution", - "institution_country" : "US", + "institution_country" : "USA", "keywords": ["word", "key"], - "language": ["EN", "FR"], + "language": ["ENG", "FRE"], "license_attributes" : ["BY", "NC"], "license_display" : "y", "license_display_example_url": "http://licence.embedded", diff --git a/doajtest/helpers.py b/doajtest/helpers.py index 2c405060cc..13c0eb84c5 100644 --- a/doajtest/helpers.py +++ b/doajtest/helpers.py @@ -12,7 +12,7 @@ from flask_login import login_user from portality import core, dao -from portality.core import app +from portality.app import app from portality.lib import paths from portality.tasks.redis_huey import main_queue, long_running diff --git a/doajtest/mocks/preservation.py b/doajtest/mocks/preservation.py index 3c6d610d5b..a171296984 100644 --- a/doajtest/mocks/preservation.py +++ b/doajtest/mocks/preservation.py @@ -64,7 +64,7 @@ class PreservationMock: "number": "1", "publisher": "BMC", "title": "Acta Neuropathologica Communications", - "country": "GB", + "country": "GBR", "license": [ { "title": "CC BY", @@ -74,7 +74,7 @@ class PreservationMock: } ], "language": [ - "EN" + "ENG" ], "issns": [ "2051-5960" diff --git a/doajtest/unit/api_tests/test_api_crud_article.py b/doajtest/unit/api_tests/test_api_crud_article.py index 08b0649d87..12c5399270 100644 --- a/doajtest/unit/api_tests/test_api_crud_article.py +++ b/doajtest/unit/api_tests/test_api_crud_article.py @@ -142,8 +142,8 @@ def test_02_create_article_success(self): "url" : "http://license.example.com" } ) - journal.bibjson().country = "US" - journal.bibjson().set_language(["EN", "FR"]) + journal.bibjson().country = "USA" + journal.bibjson().set_language(["ENG", "FRE"]) journal.save(blocking=True) # call create on the object (which will save it to the index) @@ -163,8 +163,8 @@ def test_02_create_article_success(self): # but none of these - these should all be the same as the original article in the index assert a.bibjson().publisher == 'The Publisher', a.bibjson().publisher assert a.bibjson().journal_title == 'The Title' - assert a.bibjson().journal_language == ["EN", "FR"] - assert a.bibjson().journal_country == "US" + assert a.bibjson().journal_language == ["ENG", "FRE"] + assert a.bibjson().journal_country == "USA" # time.sleep(1) @@ -288,8 +288,8 @@ def test_06_retrieve_article_success(self): assert a.bibjson.journal.license[0].url == "http://license.example.com" assert a.bibjson.journal.license[0].version == "1.0" assert a.bibjson.journal.license[0].open_access == True - assert a.bibjson.journal.language == ["EN", "FR"] - assert a.bibjson.journal.country == "US" + assert a.bibjson.journal.language == ["ENG", "FRE"] + assert a.bibjson.journal.country == "USA" @with_es(indices=[models.Article.__type__, models.Journal.__type__]) def test_07_retrieve_article_fail(self): @@ -344,8 +344,8 @@ def test_08_update_article_success(self): "url" : "http://license.example.com" } ) - journal.bibjson().country = "US" - journal.bibjson().set_language(["EN", "FR"]) + journal.bibjson().country = "USA" + journal.bibjson().set_language(["ENG", "FRE"]) journal.save(blocking=True) data = ArticleFixtureFactory.make_incoming_api_article() @@ -404,8 +404,8 @@ def test_08_update_article_success(self): # but none of these - these should all be the same as the original article in the index assert updated.bibjson().publisher == 'The Publisher', updated.bibjson().publisher assert updated.bibjson().journal_title == 'The Title' - assert updated.bibjson().journal_language == ["EN", "FR"] - assert updated.bibjson().journal_country == "US" + assert updated.bibjson().journal_language == ["ENG", "FRE"] + assert updated.bibjson().journal_country == "USA" @with_es(indices=[models.Article.__type__, models.Journal.__type__], warm_mappings=[models.Article.__type__]) diff --git a/doajtest/unit/api_tests/test_apiv3_crud_application.py b/doajtest/unit/api_tests/test_apiv3_crud_application.py index 983df3acfa..d05eb20652 100644 --- a/doajtest/unit/api_tests/test_apiv3_crud_application.py +++ b/doajtest/unit/api_tests/test_apiv3_crud_application.py @@ -327,12 +327,12 @@ def test_04_coerce(self): ia = IncomingApplication(data) ba = ia.bibjson() - assert ba.country == "BD" + assert ba.country == "BGD" assert ba.apc[0]["currency"] == "BDT" assert isinstance(ba.title, str) assert ba.publication_time_weeks == 15 - assert "FR" in ba.language - assert "EN" in ba.language + assert "FRE" in ba.language + assert "ENG" in ba.language assert len(ba.language) == 2 assert ba.pid_scheme[0] == "doi" assert ba.pid_scheme[1] == "HandleS" diff --git a/doajtest/unit/resources/create_application.json b/doajtest/unit/resources/create_application.json index a6e2d67cf3..260a554284 100644 --- a/doajtest/unit/resources/create_application.json +++ b/doajtest/unit/resources/create_application.json @@ -51,7 +51,7 @@ }, "institution": { "name": "Society Institution", - "country": "US" + "country": "USA" }, "other_charges": { "has_other_charges": true, @@ -85,7 +85,7 @@ }, "publisher": { "name": "The Publisher", - "country": "US" + "country": "USA" }, "ref": { "oa_statement": "http://oa.statement", diff --git a/doajtest/unit/resources/create_article.json b/doajtest/unit/resources/create_article.json index ffab42b971..a85c560af6 100644 --- a/doajtest/unit/resources/create_article.json +++ b/doajtest/unit/resources/create_article.json @@ -19,7 +19,7 @@ "publisher": "The Publisher", "title": "The Title", "language": ["EN", "FR"], - "country": "US" + "country": "USA" }, "year": "1991", "month": "January", diff --git a/doajtest/unit/test_crosswalks.py b/doajtest/unit/test_crosswalks.py index 92ee805b5d..28e67a7757 100644 --- a/doajtest/unit/test_crosswalks.py +++ b/doajtest/unit/test_crosswalks.py @@ -96,7 +96,7 @@ def test_05_doaj_article_xml_xwalk(self): article = models.Article(**art[0]) bibjson = article.bibjson() - assert bibjson.journal_language == ["fre"], "expected ['fre'], actual: {} ".format(bibjson.journal_language) + assert bibjson.journal_language == ["FRE"], "expected ['fre'], actual: {} ".format(bibjson.journal_language) assert bibjson.publisher == "Codicille éditeur et CRILCQ", "expected 'Codicille éditeur et CRILCQ', actual: {} ".format(bibjson.publisher) assert bibjson.journal_title == "2 ISSNs Correct", "expected '2 ISSNs Correct', received: {}".format(bibjson.journal_title) assert bibjson.get_one_identifier(bibjson.P_ISSN) == "1234-5678", "expected '1234-5678', received: {}".format(bibjson.get_one_identifier(bibjson.P_ISSN)) diff --git a/doajtest/unit/test_datasets.py b/doajtest/unit/test_datasets.py index b0ac544f19..9bd9c7dec3 100644 --- a/doajtest/unit/test_datasets.py +++ b/doajtest/unit/test_datasets.py @@ -1,5 +1,6 @@ from portality import datasets from doajtest.helpers import DoajTestCase +from portality.lib.isolang import get_doaj_3char_lang_by_lang class TestDatasets(DoajTestCase): @@ -11,7 +12,7 @@ def tearDown(self): def test_01_countries(self): """ Use country information from our datasets """ - assert datasets.get_country_code('united kingdom') == 'GB' + assert datasets.get_country_code('united kingdom') == 'GBR' assert datasets.get_country_name('GB') == 'United Kingdom' # If the country is unrecognised, we send it back unchanged. @@ -19,7 +20,7 @@ def test_01_countries(self): assert datasets.get_country_name('mordor') == 'mordor' # Unless fail_if_not_found is set in get_country_code() - assert datasets.get_country_code('united states') == 'US' + assert datasets.get_country_code('united states') == 'USA' assert datasets.get_country_code('the shire', fail_if_not_found=True) is None assert datasets.get_country_code('the shire', fail_if_not_found=False) == 'the shire' @@ -46,7 +47,7 @@ def test_03_languages(self): assert datasets.language_for('german').bibliographic == 'ger' # Specific languages we were asked to correct e.g. https://github.com/DOAJ/doajPM/issues/1262 - assert datasets.name_for_lang("ro") == "Romanian" # alpha_2 + assert datasets.name_for_lang("ro") == "Romanian" # alpha_2 assert datasets.name_for_lang("ron") == "Romanian" # alpha_3 assert datasets.name_for_lang("rum") == "Romanian" # bibliographic assert datasets.name_for_lang("hr") == "Croatian" @@ -86,4 +87,4 @@ def test_04_from_options(self): for (code, name) in languages_options: assert datasets.name_for_lang(code) == name - assert datasets.language_for(code).alpha_2.upper() == code + assert get_doaj_3char_lang_by_lang(datasets.language_for(code)).upper() == code diff --git a/doajtest/unit/test_models.py b/doajtest/unit/test_models.py index a015f9620b..13396cfc8d 100644 --- a/doajtest/unit/test_models.py +++ b/doajtest/unit/test_models.py @@ -634,7 +634,7 @@ def test_14_journal_like_bibjson(self): assert bj.title == "The Title" assert bj.is_replaced_by == ["2222-2222"] assert bj.keywords == ["word", "key"] - assert bj.language == ["EN", "FR"] + assert bj.language == ["ENG", "FRE"] assert len(bj.licences) == 1 assert bj.replaces == ["1111-1111"] assert len(bj.subject) == 2 @@ -656,7 +656,7 @@ def test_14_journal_like_bibjson(self): assert bj.editorial_review_url == "http://review.process" assert bj.editorial_board_url == "http://editorial.board" assert bj.institution == "Society Institution" - assert bj.institution_country == "US" + assert bj.institution_country == "USA" assert bj.has_other_charges is True assert bj.other_charges_url == "http://other.charges" assert bj.pid_scheme == ["DOI", "ARK", "PURL", "PIDMachine"] @@ -666,7 +666,7 @@ def test_14_journal_like_bibjson(self): assert bj.preservation_summary == ["LOCKSS", "CLOCKSS", "A safe place", ["A national library", "Trinity"], ["A national library", "Imperial"]] assert bj.preservation_url == "http://digital.archiving.policy" assert bj.publisher_name == "The Publisher" - assert bj.publisher_country == "US" + assert bj.publisher_country == "USA" assert bj.oa_statement_url == "http://oa.statement" assert bj.journal_url == "http://journal.url" assert bj.aims_scope_url == "http://aims.scope" @@ -684,7 +684,7 @@ def test_14_journal_like_bibjson(self): bj.title = "Another title" bj.keywords = ["new", "terms"] bj.is_replaced_by = ["4444-4444"] - bj.language = ["IT"] + bj.language = ["ITA"] bj.replaces = ["3333-3333"] bj.subject = [{"scheme": "TEST", "term": "first", "code": "one"}] bj.apc_url = "http://apc2.com" @@ -706,7 +706,7 @@ def test_14_journal_like_bibjson(self): bj.set_plagiarism_detection("http://test1", False) bj.set_preservation(["LOCKSS", ["a national library", "UCL"]], "http://preservation") bj.publisher_name = "Me" - bj.publisher_country = "GB" + bj.publisher_country = "GBR" bj.oa_statement_url = "http://oa2.statement" bj.journal_url = "http://journal2.url" bj.aims_scope_url = "http://aims2.url" @@ -724,7 +724,7 @@ def test_14_journal_like_bibjson(self): assert bj.title == "Another title" assert bj.is_replaced_by == ["4444-4444"] assert bj.keywords == ["new", "terms"] - assert bj.language == ["IT"] + assert bj.language == ["ITA"] assert len(bj.licences) == 1 assert bj.replaces == ["3333-3333"] assert len(bj.subject) == 1 @@ -742,7 +742,7 @@ def test_14_journal_like_bibjson(self): assert bj.editorial_review_url == "http://whatever" assert bj.editorial_board_url == "http://board2.url" assert bj.institution == "UCL" - assert bj.institution_country == "FR" + assert bj.institution_country == "FRA" assert bj.has_other_charges is False assert bj.other_charges_url == "http://other2.url" assert bj.pid_scheme == ["Handle"] @@ -752,7 +752,7 @@ def test_14_journal_like_bibjson(self): assert bj.preservation_summary == ["LOCKSS", ["A national library", "UCL"]] assert bj.preservation_url == "http://preservation" assert bj.publisher_name == "Me" - assert bj.publisher_country == "GB" + assert bj.publisher_country == "GBR" assert bj.oa_statement_url == "http://oa2.statement" assert bj.journal_url == "http://journal2.url" assert bj.aims_scope_url == "http://aims2.url" @@ -766,7 +766,7 @@ def test_14_journal_like_bibjson(self): bj.add_is_replaced_by("4321-4321") bj.add_keyword("keyword") - bj.add_language("CZ") + bj.add_language("CES") bj.add_license("CC YOUR", "http://cc.your", True, True, True, False) bj.add_replaces("1234-1234") bj.add_subject("SCH", "TERM", "CDE") @@ -778,7 +778,7 @@ def test_14_journal_like_bibjson(self): assert bj.is_replaced_by == ["4444-4444", "4321-4321"] assert bj.keywords == ["new", "terms", "keyword"] - assert bj.language == ["IT", "CZ"] + assert bj.language == ["ITA", "CES"] assert len(bj.licences) == 2 assert bj.replaces == ["3333-3333", "1234-1234"] assert len(bj.subject) == 2 @@ -808,8 +808,8 @@ def test_14_journal_like_bibjson(self): bj.set_keywords(["one", "two"]) assert bj.keywords == ["one", "two"] - bj.set_language("DE") - assert bj.language == ["DE"] + bj.set_language("de") + assert bj.language == ["GER"] bj.persistent_identifier_scheme = ["ARK"] assert bj.persistent_identifier_scheme == ["ARK"] @@ -871,9 +871,9 @@ def test_14_journal_like_bibjson(self): assert bj.country == bj.publisher_country assert bj.open_access == bj.boai - bj.country = "RU" - assert bj.country == "RU" - assert bj.publisher_country == "RU" + bj.country = "RUS" + assert bj.country == "RUS" + assert bj.publisher_country == "RUS" bj.set_open_access(not bj.open_access) assert bj.open_access == bj.boai @@ -957,8 +957,8 @@ def test_16_article_bibjson(self): assert bj.volume == "No 10" assert bj.number == "Iss. 4" assert bj.journal_title == "Journal of Things" - assert bj.journal_language == ["eng"] - assert bj.journal_country == "GB" + assert bj.journal_language == ["ENG"] + assert bj.journal_country == "GBR" assert bj.journal_issns == ["1234-5678", "9876-5432"] assert bj.publisher == "IEEE" assert bj.author[0].get("name") == "Test" @@ -973,7 +973,7 @@ def test_16_article_bibjson(self): bj.volume = "Four" bj.number = "Q1" bj.journal_title = "Journal of Stuff" - bj.journal_language = "fra" + bj.journal_language = "fre" bj.journal_country = "FR" bj.journal_issns = ["1111-1111", "9999-9999"] bj.publisher = "Elsevier" @@ -989,7 +989,7 @@ def test_16_article_bibjson(self): assert bj.volume == "Four" assert bj.number == "Q1" assert bj.journal_title == "Journal of Stuff" - assert bj.journal_language == ["fra"] + assert bj.journal_language == ["FRE"] assert bj.journal_country == "FR" assert bj.journal_issns == ["1111-1111", "9999-9999"] assert bj.publisher == "Elsevier" diff --git a/doajtest/unit/test_task_journal_bulkedit.py b/doajtest/unit/test_task_journal_bulkedit.py index 12facfee39..40d97f012e 100644 --- a/doajtest/unit/test_task_journal_bulkedit.py +++ b/doajtest/unit/test_task_journal_bulkedit.py @@ -174,7 +174,7 @@ def test_05_edit_metadata(self): summary = journal_manage({"query": {"terms": {"_id": [j.id for j in self.journals]}}}, publisher_name="my replacement publisher", change_doaj_seal=True, - publisher_country="AF", + publisher_country="AFG", owner="test1", dry_run=True) assert summary.as_dict().get("affected", {}).get("journals") == TEST_JOURNAL_COUNT, summary.as_dict() @@ -182,7 +182,7 @@ def test_05_edit_metadata(self): summary = journal_manage({"query": {"terms": {"_id": [j.id for j in self.journals]}}}, publisher_name="my replacement publisher", change_doaj_seal=True, - publisher_country="AF", + publisher_country="AFG", owner="test1", dry_run=False) assert summary.as_dict().get("affected", {}).get("journals") == TEST_JOURNAL_COUNT, summary.as_dict() @@ -200,7 +200,7 @@ def test_05_edit_metadata(self): .format(ix, j.bibjson().publisher, json.dumps(job.audit, indent=2)) assert j.has_seal() - assert j.bibjson().country == "AF" + assert j.bibjson().country == "AFG" assert j.owner == "test1" diff --git a/docs/dictionary.md b/docs/dictionary.md index d1bdff336c..1655371377 100644 --- a/docs/dictionary.md +++ b/docs/dictionary.md @@ -1,5 +1,7 @@ | Short | Description | | ----- |----------------| | bgjob | background job | +| fn | function | | noti | notification | -| noqa | NO-QA (NO Quality Assurance) | \ No newline at end of file +| noqa | NO-QA (NO Quality Assurance) | +| bibjson | bibliographic JSON | \ No newline at end of file diff --git a/portality/crosswalks/application_form.py b/portality/crosswalks/application_form.py index c92e41ae18..aca15e78fc 100644 --- a/portality/crosswalks/application_form.py +++ b/portality/crosswalks/application_form.py @@ -77,7 +77,7 @@ def formField2objectFields(cls, field): return fields @classmethod - def form2obj(cls, form): + def form2obj(cls, form) -> models.Application: application = models.Application() bibjson = application.bibjson() @@ -96,7 +96,7 @@ def obj2formdata(cls, obj): return cls.forminfo2multidict(forminfo) @classmethod - def obj2form(cls, obj): + def obj2form(cls, obj) -> dict: forminfo = {} bibjson = obj.bibjson() diff --git a/portality/crosswalks/journal_form.py b/portality/crosswalks/journal_form.py index 829bc32f2e..ab13bdaa1d 100644 --- a/portality/crosswalks/journal_form.py +++ b/portality/crosswalks/journal_form.py @@ -445,7 +445,7 @@ def admin2form(cls, obj, forminfo): class JournalFormXWalk(JournalGenericXWalk): @classmethod - def form2obj(cls, form): + def form2obj(cls, form) -> models.Journal: journal = models.Journal() bibjson = journal.bibjson() @@ -458,7 +458,7 @@ def form2obj(cls, form): return journal @classmethod - def obj2form(cls, obj): + def obj2form(cls, obj) -> dict: forminfo = {} bibjson = obj.bibjson() diff --git a/portality/crosswalks/journal_questions.py b/portality/crosswalks/journal_questions.py index 7330249e4f..e13efbc901 100644 --- a/portality/crosswalks/journal_questions.py +++ b/portality/crosswalks/journal_questions.py @@ -146,10 +146,17 @@ def languages(vals): codes = [c.lower() for c, _ in datasets.language_options] names = [n.lower() for _, n in datasets.language_options] for v in vals: - if v.lower() in codes: + v = v.lower() + if v in codes: keep.append(datasets.name_for_lang(v)) - elif v.lower() in names: + elif v in names: keep.append(v) + else: + # handle if input value is 2-letter language code + lang = datasets.language_for(v) + if lang is not None: + keep.append(lang.name) + return ", ".join(keep) # start by converting the object to the forminfo version @@ -263,8 +270,8 @@ def _comma_to_list(x): def _lang_codes(x): """ Get the uppercase 2-char language string for each comma separated language name""" - langs = [datasets.language_for(_) for _ in _comma_to_list(x)] - return [l.alpha_2.upper() for l in langs if l is not None] + langs = (datasets.language_for(_) for _ in _comma_to_list(x)) + return [l.alpha_3.upper() for l in langs if l is not None] def _unfurl_apc(x): """ Allow an APC update by splitting the APC string from the spreadsheet """ diff --git a/portality/datasets.py b/portality/datasets.py index 885808d3e7..57b612c501 100644 --- a/portality/datasets.py +++ b/portality/datasets.py @@ -2,8 +2,10 @@ ~~DataSets:Data~~ """ -import pycountry from collections import OrderedDict + +import pycountry + from portality.lib import isolang @@ -11,13 +13,13 @@ def _generate_country_options(): """ ~~->Countries:Data~~ ~~!Countries:Data->PyCountry:Technology~~ - Gather the countries with 2-character codes + Gather the countries with 3-character codes """ country_options_ = [('', '')] for co in sorted(pycountry.countries, key=lambda x: x.name): try: - country_options_.append((co.alpha_2.upper(), co.name)) + country_options_.append((co.alpha_3.upper(), co.name)) except AttributeError: continue return country_options_ @@ -49,7 +51,7 @@ def _generate_language_options(): language_options_ = [('', '')] for l in sorted(pycountry.languages, key=lambda x: x.name): try: - language_options_.append((l.alpha_2.upper(), l.name)) + language_options_.append((isolang.get_doaj_3char_lang_by_lang(l).upper(), l.name)) except AttributeError: continue @@ -64,14 +66,22 @@ def _generate_license_options(): licenses_ = { # The titles and types are made to match the current values of journals in the DOAJ. # DOAJ currently assumes type and title are the same. - "CC BY": {'BY': True, 'NC': False, 'ND': False, 'SA': False, 'form_label': 'CC BY', "url" : "https://creativecommons.org/licenses/by/4.0/"}, - "CC BY-SA": {'BY': True, 'NC': False, 'ND': False, 'SA': True, 'form_label': 'CC BY-SA', "url" : "https://creativecommons.org/licenses/by-sa/4.0/"}, - "CC BY-ND": {'BY': True, 'NC': False, 'ND': True, 'SA': False, 'form_label': 'CC BY-ND', "url" : "https://creativecommons.org/licenses/by-nd/4.0/"}, - "CC BY-NC": {'BY': True, 'NC': True, 'ND': False, 'SA': False, 'form_label': 'CC BY-NC', "url" : "https://creativecommons.org/licenses/by-nc/4.0/"}, - "CC BY-NC-SA": {'BY': True, 'NC': True, 'ND': False, 'SA': True, 'form_label': 'CC BY-NC-SA', "url" : "https://creativecommons.org/licenses/by-nc-sa/4.0/"}, - "CC BY-NC-ND": {'BY': True, 'NC': True, 'ND': True, 'SA': False, 'form_label': 'CC BY-NC-ND', "url" : "https://creativecommons.org/licenses/by-nc-nd/4.0/"}, - "CC0" : {'BY': False, 'NC': False, 'ND': False, 'SA': False, 'form_label': 'CC0', "url" : "https://creativecommons.org/publicdomain/zero/1.0/"}, - "Public domain" : {'BY': False, 'NC': False, 'ND': False, 'SA': False, 'form_label': 'CC BY', "url" : "https://creativecommons.org/publicdomain/mark/1.0/"}, + "CC BY": {'BY': True, 'NC': False, 'ND': False, 'SA': False, 'form_label': 'CC BY', + "url": "https://creativecommons.org/licenses/by/4.0/"}, + "CC BY-SA": {'BY': True, 'NC': False, 'ND': False, 'SA': True, 'form_label': 'CC BY-SA', + "url": "https://creativecommons.org/licenses/by-sa/4.0/"}, + "CC BY-ND": {'BY': True, 'NC': False, 'ND': True, 'SA': False, 'form_label': 'CC BY-ND', + "url": "https://creativecommons.org/licenses/by-nd/4.0/"}, + "CC BY-NC": {'BY': True, 'NC': True, 'ND': False, 'SA': False, 'form_label': 'CC BY-NC', + "url": "https://creativecommons.org/licenses/by-nc/4.0/"}, + "CC BY-NC-SA": {'BY': True, 'NC': True, 'ND': False, 'SA': True, 'form_label': 'CC BY-NC-SA', + "url": "https://creativecommons.org/licenses/by-nc-sa/4.0/"}, + "CC BY-NC-ND": {'BY': True, 'NC': True, 'ND': True, 'SA': False, 'form_label': 'CC BY-NC-ND', + "url": "https://creativecommons.org/licenses/by-nc-nd/4.0/"}, + "CC0": {'BY': False, 'NC': False, 'ND': False, 'SA': False, 'form_label': 'CC0', + "url": "https://creativecommons.org/publicdomain/zero/1.0/"}, + "Public domain": {'BY': False, 'NC': False, 'ND': False, 'SA': False, 'form_label': 'CC BY', + "url": "https://creativecommons.org/publicdomain/mark/1.0/"}, } # The top-level keys in the licenses dict should always be == to the "type" of each license object @@ -122,9 +132,9 @@ def name_for_lang(rep): def get_country_code(current_country, fail_if_not_found=False): - """ Get the two-character country code for a given country name """ + """ Get the three-character country code for a given country name """ try: - return pycountry.countries.lookup(current_country).alpha_2 + return pycountry.countries.lookup(current_country).alpha_3 except LookupError: return None if fail_if_not_found else current_country diff --git a/portality/lib/coerce.py b/portality/lib/coerce.py index 9215df0a02..9fb6c508ee 100644 --- a/portality/lib/coerce.py +++ b/portality/lib/coerce.py @@ -1,5 +1,5 @@ # ~~Coerce:Library~~ -from portality.lib import dates +from portality.lib import dates, val_convert from datetime import date, datetime from portality.lib import seamless from portality.datasets import get_country_code, get_currency_code @@ -23,42 +23,6 @@ def datify(val): return datify -def to_isolang(output_format=None): - """ - :param output_format: format from input source to putput. Must be one of: - * alpha3 - * alt3 - * alpha2 - * name - * fr - Can be a list in order of preference, too - ~~-> Languages:Data~~ - :return: - """ - # delayed import, since we may not always want to load the whole dataset for a dataobj - from portality.lib import isolang as dataset - - # sort out the output format list - if output_format is None: - output_format = ["alpha3"] - if not isinstance(output_format, list): - output_format = [output_format] - - def isolang(val): - if val is None: - return None - l = dataset.find(val) - if l is None: - raise ValueError("Unable to find iso code for language {x}".format(x=val)) - for f in output_format: - v = l.get(f) - if v is None or v == "": - continue - return v.upper() - - return isolang - - def to_currency_code(val): """ ~~-> Currencies:Data~~ @@ -74,20 +38,6 @@ def to_currency_code(val): return uc(nv) -def to_country_code(val): - """ - ~~-> Countries:Data~~ - :param val: - :return: - """ - if val is None: - return None - nv = get_country_code(val, fail_if_not_found=True) - if nv is None: - raise ValueError("Unable to convert {x} to a valid country code".format(x=val)) - uc = seamless.to_utf8_unicode - return uc(nv) - def to_issn(issn): if len(issn) > 9 or issn == '': @@ -124,9 +74,9 @@ def to_issn(issn): "utcdatetime" : date_str(), "utcdatetimemicros" : date_str(out_format="%Y-%m-%dT%H:%M:%S.%fZ"), "bigenddate" : date_str(out_format="%Y-%m-%d"), - "isolang": to_isolang(), - "isolang_2letter": to_isolang(output_format="alpha2"), - "country_code": to_country_code, + "isolang": val_convert.create_fn_to_isolang( is_upper=True), + "isolang_2letter": val_convert.create_fn_to_isolang(output_format="alpha2", is_upper=True), + "country_code": val_convert.to_country_code_3, "currency_code": to_currency_code, "issn" : to_issn } \ No newline at end of file diff --git a/portality/lib/dataobj.py b/portality/lib/dataobj.py index 3333a65277..dca475c2ef 100644 --- a/portality/lib/dataobj.py +++ b/portality/lib/dataobj.py @@ -1,7 +1,7 @@ # -*- coding: UTF-8 -*- -from portality.lib import dates -from portality.datasets import get_country_code, get_currency_code +from portality.lib import dates, val_convert +from portality.datasets import get_currency_code from copy import deepcopy import locale, json, warnings from urllib.parse import urlparse @@ -19,15 +19,6 @@ def to_currency_code(val): uc = to_unicode() return uc(nv) -def to_country_code(val): - if val is None: - return None - nv = get_country_code(val, fail_if_not_found=True) - if nv is None: - raise ValueError("Unable to convert {x} to a valid country code".format(x=val)) - uc = to_unicode() - return uc(nv) - def to_unicode(): def to_utf8_unicode(val): if isinstance(val, str): @@ -123,40 +114,6 @@ def stampify(val): return stampify -def to_isolang(output_format=None): - """ - :param output_format: format from input source to putput. Must be one of: - * alpha3 - * alt3 - * alpha2 - * name - * fr - Can be a list in order of preference, too - fixme: we could make these pycountry's keys, removing the need for so many transformations and intermediate steps - :return: - """ - # delayed import, since we may not always want to load the whole dataset for a dataobj - from portality.lib import isolang as dataset - - # sort out the output format list - if output_format is None: - output_format = ["alpha3"] - if not isinstance(output_format, list): - output_format = [output_format] - - def isolang(val): - if val is None: - return None - l = dataset.find(val) - if l is None: - raise ValueError("Unable to find iso code for language {x}".format(x=val)) - for f in output_format: - v = l.get(f) - if v is None or v == "": - continue - return v - - return isolang def to_url(val): if not isinstance(val, str): @@ -260,11 +217,13 @@ class DataObj(object): "bigenddate" : date_str(out_format="%Y-%m-%d"), "integer": to_int(), "float": to_float(), - "isolang": to_isolang(), + "isolang": val_convert.create_fn_to_isolang(is_upper=False), + "isolang_up": val_convert.create_fn_to_isolang(is_upper=True), "url": to_url, "bool": to_bool, - "isolang_2letter": to_isolang(output_format="alpha2"), - "country_code": to_country_code, + "isolang_2letter": val_convert.create_fn_to_isolang(output_format="alpha2", + is_upper=False), + "country_code": val_convert.to_country_code_3, "currency_code": to_currency_code, "license": string_canonicalise(["CC BY", "CC BY-NC", "CC BY-NC-ND", "CC BY-NC-SA", "CC BY-ND", "CC BY-SA", "Not CC-like"], allow_fail=True), "persistent_identifier_scheme": string_canonicalise(["None", "DOI", "Handles", "ARK"], allow_fail=True), diff --git a/portality/lib/formulaic.py b/portality/lib/formulaic.py index 3e370f3ffc..d66fdec533 100644 --- a/portality/lib/formulaic.py +++ b/portality/lib/formulaic.py @@ -100,6 +100,8 @@ """ import csv from copy import deepcopy +from typing import Callable, Iterable, Optional + from wtforms import Form from wtforms.fields.core import UnboundField, FieldList, FormField @@ -108,6 +110,13 @@ import json +from typing import TypeVar +from typing import TYPE_CHECKING +if TYPE_CHECKING: + from portality.forms.application_processors import ApplicationProcessor + ApplicationProcessorLike = TypeVar('ApplicationProcessorLike', bound=ApplicationProcessor) + + UI_CONFIG_FIELDS = [ "label", "input", @@ -392,7 +401,7 @@ def render_template(self, **kwargs): template = self._definition.get("templates", {}).get("form") return render_template(template, formulaic_context=self, **kwargs) - def processor(self, formdata=None, source=None): + def processor(self, formdata=None, source=None) -> "ApplicationProcessorLike": # ~~^-> FormProcessor:Feature~~ klazz = self._definition.get("processor") if isinstance(klazz, str): @@ -719,7 +728,7 @@ def render_form_control(self, custom_args=None, wtfinst=None): return wtf(**kwargs) @classmethod - def make_wtforms_field(cls, formulaic_context, field) -> UnboundField: + def make_wtforms_field(cls, formulaic_context: FormulaicContext, field: dict) -> UnboundField: builder = cls._get_wtforms_builder(field, formulaic_context.wtforms_builders) if builder is None: raise FormulaicException("No WTForms mapping for field '{x}'".format(x=field.get("name"))) @@ -752,7 +761,10 @@ def make_wtforms_field(cls, formulaic_context, field) -> UnboundField: return builder(formulaic_context, field, wtargs) @classmethod - def _get_wtforms_builder(self, field, wtforms_builders): + def _get_wtforms_builder( + self, field: dict, + wtforms_builders: Iterable['WTFormsBuilder'] + ) -> Optional[Callable[[FormulaicContext, dict, dict], 'Field']]: for builder in wtforms_builders: if builder.match(field): return builder.wtform diff --git a/portality/lib/isolang.py b/portality/lib/isolang.py index 6777add03f..076d8bc628 100644 --- a/portality/lib/isolang.py +++ b/portality/lib/isolang.py @@ -1,3 +1,5 @@ +from typing import Optional + import pycountry @@ -46,3 +48,9 @@ def _as_dict(language_object: pycountry.Languages): "name": language_dict.get('name', ''), "fr": '' } + + +def get_doaj_3char_lang_by_lang(lang: 'pycountry.db.Language') -> Optional[str]: + return lang and getattr(lang, + 'bibliographic', + getattr(lang, 'alpha_3', None)) diff --git a/portality/lib/iter_utils.py b/portality/lib/iter_utils.py new file mode 100644 index 0000000000..9ba8931456 --- /dev/null +++ b/portality/lib/iter_utils.py @@ -0,0 +1,20 @@ +from itertools import islice +from typing import Iterable, Any + + +def batched(iterable: Any, n: int) -> Iterable[Iterable[Any]]: + """ + Batch data into tuples of length n. The last batch may be shorter. + batched('ABCDEFG', 3) --> ABC DEF G + copy from more-itertools + """ + # + if n < 1: + raise ValueError('n must be at least one') + + it = iter(iterable) + while True: + batch = tuple(islice(it, n)) + if not batch: + break + yield batch diff --git a/portality/lib/seamless.py b/portality/lib/seamless.py index c850ee9e34..34464d27c7 100644 --- a/portality/lib/seamless.py +++ b/portality/lib/seamless.py @@ -2,22 +2,14 @@ from urllib.parse import urlparse from copy import deepcopy from datetime import datetime +from portality.lib import val_convert ############################################### ## Common coerce functions ############################################### -def to_utf8_unicode(val): - if isinstance(val, str): - return val - elif isinstance(val, str): - try: - return val.decode("utf8", "strict") - except UnicodeDecodeError: - raise ValueError("Could not decode string") - else: - return str(val) - +# proxy function, which extract to val_convert +to_utf8_unicode = val_convert.to_utf8_unicode def to_unicode_upper(val): val = to_utf8_unicode(val) diff --git a/portality/lib/val_convert.py b/portality/lib/val_convert.py new file mode 100644 index 0000000000..e0ae954a1b --- /dev/null +++ b/portality/lib/val_convert.py @@ -0,0 +1,72 @@ +""" +contain functions or factory which provide value conversion(coerce) in +SeamlessMixin or DataObj layer +""" + +from typing import Callable, Any + +from portality.datasets import get_country_code + + +def create_fn_to_isolang(output_format=None, is_upper=False) -> Callable[[Any], str]: + """ + :param is_upper: return upper code if True + :param output_format: format from input source to putput. Must be one of: + * alpha3 + * alt3 + * alpha2 + * name + * fr + Can be a list in order of preference, too + ~~-> Languages:Data~~ + :return: + """ + # delayed import, since we may not always want to load the whole dataset for a dataobj + from portality.lib import isolang as dataset + + # sort out the output format list + if output_format is None: + output_format = ["alpha3"] + if not isinstance(output_format, list): + output_format = [output_format] + + def isolang(val): + if val is None: + return None + l = dataset.find(val) + if l is None: + raise ValueError("Unable to find iso code for language {x}".format(x=val)) + for f in output_format: + v = l.get(f) + if v is None or v == "": + continue + return v.upper() if is_upper else v + + return isolang + + +def to_utf8_unicode(val) -> str: + if isinstance(val, str): + return val + elif isinstance(val, str): # why check isinstance(val, str) again ?? + try: + return val.decode("utf8", "strict") + except UnicodeDecodeError: + raise ValueError("Could not decode string") + else: + return str(val) + + +def to_country_code_3(val): + """ + ~~-> Countries:Data~~ + :param val: + :return: + """ + if val is None: + return None + nv = get_country_code(val, fail_if_not_found=True) + if nv is None: + raise ValueError("Unable to convert {x} to a valid country code".format(x=val)) + uc = to_utf8_unicode + return uc(nv) diff --git a/portality/migrate/3374_three_letter_language_code/__init__.py b/portality/migrate/3374_three_letter_language_code/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/portality/migrate/3374_three_letter_language_code/migrate.json b/portality/migrate/3374_three_letter_language_code/migrate.json new file mode 100644 index 0000000000..4408253c59 --- /dev/null +++ b/portality/migrate/3374_three_letter_language_code/migrate.json @@ -0,0 +1,17 @@ +{ + "batch": 10000, + "types": [ + { + "type": "application", + "init_with_model": true, + "scroll_size": 1000, + "keepalive": "20m" + }, + { + "type": "journal", + "init_with_model": true, + "scroll_size": 1000, + "keepalive": "20m" + } + ] +} diff --git a/portality/models/article.py b/portality/models/article.py index d42fca377f..7c1337ec2a 100644 --- a/portality/models/article.py +++ b/portality/models/article.py @@ -838,10 +838,10 @@ def lcc_codes_full_list(self): "number" : {"coerce" : "unicode"}, "publisher" : {"coerce" : "unicode"}, "title" : {"coerce" : "unicode"}, - "country" : {"coerce" : "unicode"} + "country" : {"coerce" : "country_code", "set__allow_coerce_failure" : True}, }, "lists" : { - "language" : {"contains" : "field", "coerce" : "unicode"}, + "language" : {"contains" : "field", "coerce" : "isolang_up", "set__allow_coerce_failure" : True}, "issns" : {"contains" : "field", "coerce" : "unicode"} } } diff --git a/portality/models/v2/shared_structs.py b/portality/models/v2/shared_structs.py index 0fdcbbb854..14caf7dd13 100644 --- a/portality/models/v2/shared_structs.py +++ b/portality/models/v2/shared_structs.py @@ -17,7 +17,7 @@ "lists" : { "is_replaced_by" : {"coerce" : "issn", "contains" : "field", "set__allow_coerce_failure" : True}, "keywords" : {"contains" : "field", "coerce" : "unicode_lower"}, - "language" : {"contains" : "field", "coerce" : "isolang_2letter"}, + "language" : {"contains" : "field", "coerce" : "isolang"}, "license" : {"contains" : "object"}, "replaces" : {"contains" : "field", "coerce" : "issn", "set__allow_coerce_failure" : True}, "subject" : {"contains" : "object"} diff --git a/portality/scripts/generate_iso639b_language_code_schema.py b/portality/scripts/generate_iso639b_language_code_schema.py index 6b2b20f779..afe4ce5010 100644 --- a/portality/scripts/generate_iso639b_language_code_schema.py +++ b/portality/scripts/generate_iso639b_language_code_schema.py @@ -57,10 +57,6 @@ def write_lang_schema(out_file, schema_version): # Gather names and 3-char codes (bibliographic preferred) for only the languages with 2-character codes (ISO639-1) for l in pycountry.languages: - try: - _ = l.alpha_2 - except AttributeError: - continue # Skip languages without 2-char codes try: code = l.bibliographic diff --git a/portality/scripts/init_es_index.py b/portality/scripts/init_es_index.py new file mode 100644 index 0000000000..5b9bb8e0f1 --- /dev/null +++ b/portality/scripts/init_es_index.py @@ -0,0 +1,8 @@ +def main(): + from portality import core + from portality.core import app, initialise_index + initialise_index(app, core.es_connection) + + +if __name__ == '__main__': + main() diff --git a/portality/upgrade.py b/portality/upgrade.py index f9cfb75bf5..89ccdea990 100644 --- a/portality/upgrade.py +++ b/portality/upgrade.py @@ -3,6 +3,7 @@ # FIXME: this script requires more work if it's to be used for specified source and target clusters """ import json, os, dictdiffer +import logging from datetime import datetime, timedelta from copy import deepcopy from collections import OrderedDict @@ -22,6 +23,8 @@ "background_job": models.BackgroundJob #~~->BackgroundJob:Model~~ } +log = logging.getLogger(__name__) + class UpgradeTask(object): @@ -30,6 +33,16 @@ def upgrade_article(self, article): def do_upgrade(definition, verbose, save_batches=None): + """ + :param definition: + * init_with_model: record index will be re-generated on save if true + * keepalive: keepalive time of ES connection (e.g. 1m, 20m) + * batch: size of save or model_class.bulk + * scroll_size: size of ES query + :param verbose: + :param save_batches: + :return: + """ # get the source and target es definitions # ~~->Elasticsearch:Technology~~ @@ -68,6 +81,8 @@ def do_upgrade(definition, verbose, save_batches=None): for function_path in tdef.get("functions", []): fn = plugin.load_function(function_path) result = fn(result) + if result is None: + log.warning('WARNING! return of [functions] should not None') data = result _id = result.get("id", "id not specified")